paddlex 3.0.0rc1__py3-none-any.whl → 3.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (240) hide show
  1. paddlex/.version +1 -1
  2. paddlex/__init__.py +1 -1
  3. paddlex/configs/modules/chart_parsing/PP-Chart2Table.yaml +13 -0
  4. paddlex/configs/modules/doc_vlm/PP-DocBee2-3B.yaml +14 -0
  5. paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-L.yaml +40 -0
  6. paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-M.yaml +40 -0
  7. paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-S.yaml +40 -0
  8. paddlex/configs/modules/layout_detection/PP-DocBlockLayout.yaml +40 -0
  9. paddlex/configs/modules/layout_detection/PP-DocLayout-L.yaml +2 -2
  10. paddlex/configs/modules/layout_detection/PP-DocLayout-M.yaml +2 -2
  11. paddlex/configs/modules/layout_detection/PP-DocLayout-S.yaml +2 -2
  12. paddlex/configs/modules/layout_detection/PP-DocLayout_plus-L.yaml +40 -0
  13. paddlex/configs/modules/text_detection/PP-OCRv5_mobile_det.yaml +40 -0
  14. paddlex/configs/modules/text_detection/PP-OCRv5_server_det.yaml +40 -0
  15. paddlex/configs/modules/text_recognition/PP-OCRv5_mobile_rec.yaml +39 -0
  16. paddlex/configs/modules/text_recognition/PP-OCRv5_server_rec.yaml +39 -0
  17. paddlex/configs/modules/textline_orientation/PP-LCNet_x1_0_textline_ori.yaml +41 -0
  18. paddlex/configs/pipelines/OCR.yaml +7 -6
  19. paddlex/configs/pipelines/PP-ChatOCRv3-doc.yaml +3 -1
  20. paddlex/configs/pipelines/PP-ChatOCRv4-doc.yaml +91 -34
  21. paddlex/configs/pipelines/PP-StructureV3.yaml +72 -72
  22. paddlex/configs/pipelines/doc_understanding.yaml +1 -1
  23. paddlex/configs/pipelines/formula_recognition.yaml +2 -2
  24. paddlex/configs/pipelines/layout_parsing.yaml +3 -2
  25. paddlex/configs/pipelines/seal_recognition.yaml +1 -0
  26. paddlex/configs/pipelines/table_recognition.yaml +2 -1
  27. paddlex/configs/pipelines/table_recognition_v2.yaml +7 -1
  28. paddlex/hpip_links.html +20 -20
  29. paddlex/inference/common/batch_sampler/doc_vlm_batch_sampler.py +33 -10
  30. paddlex/inference/common/batch_sampler/image_batch_sampler.py +34 -25
  31. paddlex/inference/common/result/mixin.py +19 -12
  32. paddlex/inference/models/base/predictor/base_predictor.py +2 -8
  33. paddlex/inference/models/common/static_infer.py +29 -73
  34. paddlex/inference/models/common/tokenizer/__init__.py +2 -0
  35. paddlex/inference/models/common/tokenizer/clip_tokenizer.py +1 -1
  36. paddlex/inference/models/common/tokenizer/gpt_tokenizer.py +2 -2
  37. paddlex/inference/models/common/tokenizer/qwen2_5_tokenizer.py +112 -0
  38. paddlex/inference/models/common/tokenizer/qwen2_tokenizer.py +7 -1
  39. paddlex/inference/models/common/tokenizer/qwen_tokenizer.py +288 -0
  40. paddlex/inference/models/common/tokenizer/tokenizer_utils.py +13 -13
  41. paddlex/inference/models/common/tokenizer/tokenizer_utils_base.py +3 -3
  42. paddlex/inference/models/common/tokenizer/vocab.py +7 -7
  43. paddlex/inference/models/common/ts/funcs.py +19 -8
  44. paddlex/inference/models/common/vlm/conversion_utils.py +99 -0
  45. paddlex/inference/models/common/vlm/fusion_ops.py +205 -0
  46. paddlex/inference/models/common/vlm/generation/configuration_utils.py +1 -1
  47. paddlex/inference/models/common/vlm/generation/logits_process.py +1 -1
  48. paddlex/inference/models/common/vlm/generation/utils.py +1 -1
  49. paddlex/inference/models/common/vlm/transformers/configuration_utils.py +3 -3
  50. paddlex/inference/models/common/vlm/transformers/conversion_utils.py +3 -3
  51. paddlex/inference/models/common/vlm/transformers/model_outputs.py +2 -2
  52. paddlex/inference/models/common/vlm/transformers/model_utils.py +7 -31
  53. paddlex/inference/models/doc_vlm/modeling/GOT_ocr_2_0.py +830 -0
  54. paddlex/inference/models/doc_vlm/modeling/__init__.py +2 -0
  55. paddlex/inference/models/doc_vlm/modeling/qwen2.py +1606 -0
  56. paddlex/inference/models/doc_vlm/modeling/qwen2_5_vl.py +3006 -0
  57. paddlex/inference/models/doc_vlm/modeling/qwen2_vl.py +0 -105
  58. paddlex/inference/models/doc_vlm/predictor.py +79 -24
  59. paddlex/inference/models/doc_vlm/processors/GOT_ocr_2_0.py +97 -0
  60. paddlex/inference/models/doc_vlm/processors/__init__.py +2 -0
  61. paddlex/inference/models/doc_vlm/processors/common.py +189 -0
  62. paddlex/inference/models/doc_vlm/processors/qwen2_5_vl.py +548 -0
  63. paddlex/inference/models/doc_vlm/processors/qwen2_vl.py +21 -176
  64. paddlex/inference/models/formula_recognition/predictor.py +8 -2
  65. paddlex/inference/models/formula_recognition/processors.py +90 -77
  66. paddlex/inference/models/formula_recognition/result.py +28 -27
  67. paddlex/inference/models/image_feature/processors.py +3 -4
  68. paddlex/inference/models/keypoint_detection/predictor.py +3 -0
  69. paddlex/inference/models/object_detection/predictor.py +2 -0
  70. paddlex/inference/models/object_detection/processors.py +28 -3
  71. paddlex/inference/models/object_detection/utils.py +2 -0
  72. paddlex/inference/models/table_structure_recognition/result.py +0 -10
  73. paddlex/inference/models/text_detection/predictor.py +8 -0
  74. paddlex/inference/models/text_detection/processors.py +44 -10
  75. paddlex/inference/models/text_detection/result.py +0 -10
  76. paddlex/inference/models/text_recognition/result.py +1 -1
  77. paddlex/inference/pipelines/__init__.py +9 -5
  78. paddlex/inference/pipelines/_parallel.py +172 -0
  79. paddlex/inference/pipelines/anomaly_detection/pipeline.py +16 -6
  80. paddlex/inference/pipelines/attribute_recognition/pipeline.py +11 -1
  81. paddlex/inference/pipelines/base.py +14 -4
  82. paddlex/inference/pipelines/components/faisser.py +1 -1
  83. paddlex/inference/pipelines/doc_preprocessor/pipeline.py +53 -27
  84. paddlex/inference/pipelines/formula_recognition/pipeline.py +120 -82
  85. paddlex/inference/pipelines/formula_recognition/result.py +1 -11
  86. paddlex/inference/pipelines/image_classification/pipeline.py +16 -6
  87. paddlex/inference/pipelines/image_multilabel_classification/pipeline.py +16 -6
  88. paddlex/inference/pipelines/instance_segmentation/pipeline.py +16 -6
  89. paddlex/inference/pipelines/keypoint_detection/pipeline.py +16 -6
  90. paddlex/inference/pipelines/layout_parsing/layout_objects.py +859 -0
  91. paddlex/inference/pipelines/layout_parsing/pipeline.py +34 -47
  92. paddlex/inference/pipelines/layout_parsing/pipeline_v2.py +832 -260
  93. paddlex/inference/pipelines/layout_parsing/result.py +4 -17
  94. paddlex/inference/pipelines/layout_parsing/result_v2.py +259 -245
  95. paddlex/inference/pipelines/layout_parsing/setting.py +88 -0
  96. paddlex/inference/pipelines/layout_parsing/utils.py +391 -2028
  97. paddlex/inference/pipelines/layout_parsing/xycut_enhanced/__init__.py +16 -0
  98. paddlex/inference/pipelines/layout_parsing/xycut_enhanced/utils.py +1199 -0
  99. paddlex/inference/pipelines/layout_parsing/xycut_enhanced/xycuts.py +615 -0
  100. paddlex/inference/pipelines/m_3d_bev_detection/pipeline.py +2 -2
  101. paddlex/inference/pipelines/multilingual_speech_recognition/pipeline.py +2 -2
  102. paddlex/inference/pipelines/object_detection/pipeline.py +16 -6
  103. paddlex/inference/pipelines/ocr/pipeline.py +127 -70
  104. paddlex/inference/pipelines/ocr/result.py +21 -18
  105. paddlex/inference/pipelines/open_vocabulary_detection/pipeline.py +2 -2
  106. paddlex/inference/pipelines/open_vocabulary_segmentation/pipeline.py +2 -2
  107. paddlex/inference/pipelines/pp_chatocr/pipeline_base.py +2 -2
  108. paddlex/inference/pipelines/pp_chatocr/pipeline_v3.py +2 -5
  109. paddlex/inference/pipelines/pp_chatocr/pipeline_v4.py +6 -6
  110. paddlex/inference/pipelines/rotated_object_detection/pipeline.py +16 -6
  111. paddlex/inference/pipelines/seal_recognition/pipeline.py +109 -53
  112. paddlex/inference/pipelines/semantic_segmentation/pipeline.py +16 -6
  113. paddlex/inference/pipelines/small_object_detection/pipeline.py +16 -6
  114. paddlex/inference/pipelines/table_recognition/pipeline.py +26 -18
  115. paddlex/inference/pipelines/table_recognition/pipeline_v2.py +624 -53
  116. paddlex/inference/pipelines/table_recognition/result.py +1 -1
  117. paddlex/inference/pipelines/table_recognition/table_recognition_post_processing_v2.py +9 -5
  118. paddlex/inference/pipelines/ts_anomaly_detection/pipeline.py +2 -2
  119. paddlex/inference/pipelines/ts_classification/pipeline.py +2 -2
  120. paddlex/inference/pipelines/ts_forecasting/pipeline.py +2 -2
  121. paddlex/inference/pipelines/video_classification/pipeline.py +2 -2
  122. paddlex/inference/pipelines/video_detection/pipeline.py +2 -2
  123. paddlex/inference/serving/basic_serving/_app.py +46 -13
  124. paddlex/inference/serving/basic_serving/_pipeline_apps/_common/common.py +5 -1
  125. paddlex/inference/serving/basic_serving/_pipeline_apps/layout_parsing.py +0 -1
  126. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv3_doc.py +0 -1
  127. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv4_doc.py +1 -1
  128. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_structurev3.py +6 -2
  129. paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition.py +1 -5
  130. paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition_v2.py +4 -5
  131. paddlex/inference/serving/infra/utils.py +20 -22
  132. paddlex/inference/serving/schemas/formula_recognition.py +1 -1
  133. paddlex/inference/serving/schemas/layout_parsing.py +1 -2
  134. paddlex/inference/serving/schemas/pp_chatocrv3_doc.py +1 -2
  135. paddlex/inference/serving/schemas/pp_chatocrv4_doc.py +2 -2
  136. paddlex/inference/serving/schemas/pp_structurev3.py +10 -6
  137. paddlex/inference/serving/schemas/seal_recognition.py +1 -1
  138. paddlex/inference/serving/schemas/table_recognition.py +2 -6
  139. paddlex/inference/serving/schemas/table_recognition_v2.py +5 -6
  140. paddlex/inference/utils/hpi.py +30 -16
  141. paddlex/inference/utils/hpi_model_info_collection.json +666 -162
  142. paddlex/inference/utils/io/readers.py +12 -12
  143. paddlex/inference/utils/misc.py +20 -0
  144. paddlex/inference/utils/mkldnn_blocklist.py +59 -0
  145. paddlex/inference/utils/official_models.py +140 -5
  146. paddlex/inference/utils/pp_option.py +74 -9
  147. paddlex/model.py +2 -2
  148. paddlex/modules/__init__.py +1 -1
  149. paddlex/modules/anomaly_detection/evaluator.py +2 -2
  150. paddlex/modules/base/__init__.py +1 -1
  151. paddlex/modules/base/evaluator.py +5 -5
  152. paddlex/modules/base/trainer.py +1 -1
  153. paddlex/modules/doc_vlm/dataset_checker.py +2 -2
  154. paddlex/modules/doc_vlm/evaluator.py +2 -2
  155. paddlex/modules/doc_vlm/exportor.py +2 -2
  156. paddlex/modules/doc_vlm/model_list.py +1 -1
  157. paddlex/modules/doc_vlm/trainer.py +2 -2
  158. paddlex/modules/face_recognition/evaluator.py +2 -2
  159. paddlex/modules/formula_recognition/evaluator.py +5 -2
  160. paddlex/modules/formula_recognition/model_list.py +3 -0
  161. paddlex/modules/formula_recognition/trainer.py +3 -0
  162. paddlex/modules/general_recognition/evaluator.py +1 -1
  163. paddlex/modules/image_classification/evaluator.py +2 -2
  164. paddlex/modules/image_classification/model_list.py +1 -0
  165. paddlex/modules/instance_segmentation/evaluator.py +1 -1
  166. paddlex/modules/keypoint_detection/evaluator.py +1 -1
  167. paddlex/modules/m_3d_bev_detection/evaluator.py +2 -2
  168. paddlex/modules/multilabel_classification/evaluator.py +2 -2
  169. paddlex/modules/object_detection/dataset_checker/dataset_src/convert_dataset.py +4 -4
  170. paddlex/modules/object_detection/evaluator.py +2 -2
  171. paddlex/modules/object_detection/model_list.py +2 -0
  172. paddlex/modules/semantic_segmentation/dataset_checker/__init__.py +12 -2
  173. paddlex/modules/semantic_segmentation/evaluator.py +2 -2
  174. paddlex/modules/table_recognition/evaluator.py +2 -2
  175. paddlex/modules/text_detection/evaluator.py +2 -2
  176. paddlex/modules/text_detection/model_list.py +2 -0
  177. paddlex/modules/text_recognition/evaluator.py +2 -2
  178. paddlex/modules/text_recognition/model_list.py +2 -0
  179. paddlex/modules/ts_anomaly_detection/evaluator.py +2 -2
  180. paddlex/modules/ts_classification/dataset_checker/dataset_src/split_dataset.py +1 -1
  181. paddlex/modules/ts_classification/evaluator.py +2 -2
  182. paddlex/modules/ts_forecast/evaluator.py +2 -2
  183. paddlex/modules/video_classification/evaluator.py +2 -2
  184. paddlex/modules/video_detection/evaluator.py +2 -2
  185. paddlex/ops/__init__.py +8 -5
  186. paddlex/paddlex_cli.py +19 -13
  187. paddlex/repo_apis/Paddle3D_api/bev_fusion/model.py +2 -2
  188. paddlex/repo_apis/PaddleClas_api/cls/config.py +1 -1
  189. paddlex/repo_apis/PaddleClas_api/cls/model.py +1 -1
  190. paddlex/repo_apis/PaddleClas_api/cls/register.py +10 -0
  191. paddlex/repo_apis/PaddleClas_api/cls/runner.py +1 -1
  192. paddlex/repo_apis/PaddleDetection_api/instance_seg/model.py +1 -1
  193. paddlex/repo_apis/PaddleDetection_api/instance_seg/runner.py +1 -1
  194. paddlex/repo_apis/PaddleDetection_api/object_det/config.py +1 -1
  195. paddlex/repo_apis/PaddleDetection_api/object_det/model.py +1 -1
  196. paddlex/repo_apis/PaddleDetection_api/object_det/official_categories.py +25 -0
  197. paddlex/repo_apis/PaddleDetection_api/object_det/register.py +30 -0
  198. paddlex/repo_apis/PaddleDetection_api/object_det/runner.py +1 -1
  199. paddlex/repo_apis/PaddleOCR_api/formula_rec/config.py +3 -3
  200. paddlex/repo_apis/PaddleOCR_api/formula_rec/model.py +5 -9
  201. paddlex/repo_apis/PaddleOCR_api/formula_rec/register.py +27 -0
  202. paddlex/repo_apis/PaddleOCR_api/formula_rec/runner.py +1 -1
  203. paddlex/repo_apis/PaddleOCR_api/table_rec/model.py +1 -1
  204. paddlex/repo_apis/PaddleOCR_api/table_rec/runner.py +1 -1
  205. paddlex/repo_apis/PaddleOCR_api/text_det/model.py +1 -1
  206. paddlex/repo_apis/PaddleOCR_api/text_det/register.py +18 -0
  207. paddlex/repo_apis/PaddleOCR_api/text_det/runner.py +1 -1
  208. paddlex/repo_apis/PaddleOCR_api/text_rec/config.py +3 -3
  209. paddlex/repo_apis/PaddleOCR_api/text_rec/model.py +5 -9
  210. paddlex/repo_apis/PaddleOCR_api/text_rec/register.py +18 -0
  211. paddlex/repo_apis/PaddleOCR_api/text_rec/runner.py +1 -1
  212. paddlex/repo_apis/PaddleSeg_api/seg/model.py +1 -1
  213. paddlex/repo_apis/PaddleSeg_api/seg/runner.py +1 -1
  214. paddlex/repo_apis/PaddleTS_api/ts_ad/config.py +3 -3
  215. paddlex/repo_apis/PaddleTS_api/ts_cls/config.py +2 -2
  216. paddlex/repo_apis/PaddleTS_api/ts_fc/config.py +4 -4
  217. paddlex/repo_apis/PaddleVideo_api/video_cls/config.py +1 -1
  218. paddlex/repo_apis/PaddleVideo_api/video_cls/model.py +1 -1
  219. paddlex/repo_apis/PaddleVideo_api/video_cls/runner.py +1 -1
  220. paddlex/repo_apis/PaddleVideo_api/video_det/config.py +1 -1
  221. paddlex/repo_apis/PaddleVideo_api/video_det/model.py +1 -1
  222. paddlex/repo_apis/PaddleVideo_api/video_det/runner.py +1 -1
  223. paddlex/repo_apis/base/config.py +1 -1
  224. paddlex/repo_manager/core.py +3 -3
  225. paddlex/repo_manager/meta.py +6 -2
  226. paddlex/repo_manager/repo.py +17 -16
  227. paddlex/utils/custom_device_list.py +26 -2
  228. paddlex/utils/deps.py +3 -3
  229. paddlex/utils/device.py +5 -13
  230. paddlex/utils/env.py +4 -0
  231. paddlex/utils/flags.py +11 -4
  232. paddlex/utils/fonts/__init__.py +34 -4
  233. paddlex/utils/misc.py +1 -1
  234. paddlex/utils/subclass_register.py +2 -2
  235. {paddlex-3.0.0rc1.dist-info → paddlex-3.0.2.dist-info}/METADATA +349 -208
  236. {paddlex-3.0.0rc1.dist-info → paddlex-3.0.2.dist-info}/RECORD +240 -211
  237. {paddlex-3.0.0rc1.dist-info → paddlex-3.0.2.dist-info}/WHEEL +1 -1
  238. {paddlex-3.0.0rc1.dist-info → paddlex-3.0.2.dist-info}/entry_points.txt +1 -0
  239. {paddlex-3.0.0rc1.dist-info/licenses → paddlex-3.0.2.dist-info}/LICENSE +0 -0
  240. {paddlex-3.0.0rc1.dist-info → paddlex-3.0.2.dist-info}/top_level.txt +0 -0
@@ -13,7 +13,6 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import copy
16
- from pathlib import Path
17
16
  from typing import Dict
18
17
 
19
18
  import numpy as np
@@ -31,15 +30,6 @@ class LayoutParsingResult(BaseCVResult, HtmlMixin, XlsxMixin):
31
30
  HtmlMixin.__init__(self)
32
31
  XlsxMixin.__init__(self)
33
32
 
34
- def _get_input_fn(self):
35
- fn = super()._get_input_fn()
36
- if (page_idx := self["page_index"]) is not None:
37
- fp = Path(fn)
38
- stem, suffix = fp.stem, fp.suffix
39
- return f"{stem}_{page_idx}{suffix}"
40
- else:
41
- return fn
42
-
43
33
  def _to_img(self) -> Dict[str, np.ndarray]:
44
34
  res_img_dict = {}
45
35
  model_settings = self["model_settings"]
@@ -47,12 +37,11 @@ class LayoutParsingResult(BaseCVResult, HtmlMixin, XlsxMixin):
47
37
  res_img_dict.update(**self["doc_preprocessor_res"].img)
48
38
  res_img_dict["layout_det_res"] = self["layout_det_res"].img["res"]
49
39
 
50
- if model_settings["use_general_ocr"] or model_settings["use_table_recognition"]:
51
- res_img_dict["overall_ocr_res"] = self["overall_ocr_res"].img["ocr_res_img"]
40
+ res_img_dict["overall_ocr_res"] = self["overall_ocr_res"].img["ocr_res_img"]
52
41
 
53
42
  if model_settings["use_table_recognition"] and len(self["table_res_list"]) > 0:
54
43
  table_cell_img = Image.fromarray(
55
- copy.deepcopy(self["doc_preprocessor_res"]["output_img"])
44
+ copy.deepcopy(self["doc_preprocessor_res"]["output_img"][:, :, ::-1])
56
45
  )
57
46
  table_draw = ImageDraw.Draw(table_cell_img)
58
47
  rectangle_color = (255, 0, 0)
@@ -106,8 +95,7 @@ class LayoutParsingResult(BaseCVResult, HtmlMixin, XlsxMixin):
106
95
  if self["model_settings"]["use_doc_preprocessor"]:
107
96
  data["doc_preprocessor_res"] = self["doc_preprocessor_res"].str["res"]
108
97
  data["layout_det_res"] = self["layout_det_res"].str["res"]
109
- if model_settings["use_general_ocr"] or model_settings["use_table_recognition"]:
110
- data["overall_ocr_res"] = self["overall_ocr_res"].str["res"]
98
+ data["overall_ocr_res"] = self["overall_ocr_res"].str["res"]
111
99
  if model_settings["use_table_recognition"] and len(self["table_res_list"]) > 0:
112
100
  data["table_res_list"] = []
113
101
  for sno in range(len(self["table_res_list"])):
@@ -149,8 +137,7 @@ class LayoutParsingResult(BaseCVResult, HtmlMixin, XlsxMixin):
149
137
  if self["model_settings"]["use_doc_preprocessor"]:
150
138
  data["doc_preprocessor_res"] = self["doc_preprocessor_res"].json["res"]
151
139
  data["layout_det_res"] = self["layout_det_res"].json["res"]
152
- if model_settings["use_general_ocr"] or model_settings["use_table_recognition"]:
153
- data["overall_ocr_res"] = self["overall_ocr_res"].json["res"]
140
+ data["overall_ocr_res"] = self["overall_ocr_res"].json["res"]
154
141
  if model_settings["use_table_recognition"] and len(self["table_res_list"]) > 0:
155
142
  data["table_res_list"] = []
156
143
  for sno in range(len(self["table_res_list"])):
@@ -15,11 +15,13 @@ from __future__ import annotations
15
15
 
16
16
  import copy
17
17
  import re
18
- from pathlib import Path
18
+ from functools import partial
19
+ from typing import List
19
20
 
20
21
  import numpy as np
21
- from PIL import Image, ImageDraw
22
+ from PIL import Image, ImageDraw, ImageFont
22
23
 
24
+ from ....utils.fonts import PINGFANG_FONT_FILE_PATH
23
25
  from ...common.result import (
24
26
  BaseCVResult,
25
27
  HtmlMixin,
@@ -27,7 +29,115 @@ from ...common.result import (
27
29
  MarkdownMixin,
28
30
  XlsxMixin,
29
31
  )
30
- from .utils import get_show_color
32
+ from .layout_objects import LayoutBlock
33
+ from .utils import get_seg_flag
34
+
35
+
36
+ def compile_title_pattern():
37
+ # Precompiled regex pattern for matching numbering at the beginning of the title
38
+ numbering_pattern = (
39
+ r"(?:" + r"[1-9][0-9]*(?:\.[1-9][0-9]*)*[\.、]?|" + r"[\(\(](?:[1-9][0-9]*|["
40
+ r"一二三四五六七八九十百千万亿零壹贰叁肆伍陆柒捌玖拾]+)[\)\)]|" + r"["
41
+ r"一二三四五六七八九十百千万亿零壹贰叁肆伍陆柒捌玖拾]+"
42
+ r"[、\.]?|" + r"(?:I|II|III|IV|V|VI|VII|VIII|IX|X)\.?" + r")"
43
+ )
44
+ return re.compile(r"^\s*(" + numbering_pattern + r")(\s*)(.*)$")
45
+
46
+
47
+ TITLE_RE_PATTERN = compile_title_pattern()
48
+
49
+
50
+ def format_title_func(block):
51
+ """
52
+ Normalize chapter title.
53
+ Add the '#' to indicate the level of the title.
54
+ If numbering exists, ensure there's exactly one space between it and the title content.
55
+ If numbering does not exist, return the original title unchanged.
56
+
57
+ :param title: Original chapter title string.
58
+ :return: Normalized chapter title string.
59
+ """
60
+ title = block.content
61
+ match = TITLE_RE_PATTERN.match(title)
62
+ if match:
63
+ numbering = match.group(1).strip()
64
+ title_content = match.group(3).lstrip()
65
+ # Return numbering and title content separated by one space
66
+ title = numbering + " " + title_content
67
+
68
+ title = title.rstrip(".")
69
+ level = (
70
+ title.count(
71
+ ".",
72
+ )
73
+ + 1
74
+ if "." in title
75
+ else 1
76
+ )
77
+ return f"#{'#' * level} {title}".replace("-\n", "").replace(
78
+ "\n",
79
+ " ",
80
+ )
81
+
82
+
83
+ def format_centered_by_html(string):
84
+ return (
85
+ f'<div style="text-align: center;">{string}</div>'.replace(
86
+ "-\n",
87
+ "",
88
+ ).replace("\n", " ")
89
+ + "\n"
90
+ )
91
+
92
+
93
+ def format_text_plain_func(block):
94
+ return block.content
95
+
96
+
97
+ def format_image_scaled_by_html_func(block, original_image_width):
98
+ img_tags = []
99
+ image_path = block.image["path"]
100
+ image_width = block.image["img"].width
101
+ scale = int(image_width / original_image_width * 100)
102
+ img_tags.append(
103
+ '<img src="{}" alt="Image" width="{}%" />'.format(
104
+ image_path.replace("-\n", "").replace("\n", " "), scale
105
+ ),
106
+ )
107
+ return "\n".join(img_tags)
108
+
109
+
110
+ def format_image_plain_func(block):
111
+ img_tags = []
112
+ image_path = block.image["path"]
113
+ img_tags.append("![]({})".format(image_path.replace("-\n", "").replace("\n", " ")))
114
+ return "\n".join(img_tags)
115
+
116
+
117
+ def format_chart2table_func(block):
118
+ lines_list = block.content.split("\n")
119
+ column_num = len(lines_list[0].split("|"))
120
+ lines_list.insert(1, "|".join(["---"] * column_num))
121
+ lines_list = [f"|{line}|" for line in lines_list]
122
+ return "\n".join(lines_list)
123
+
124
+
125
+ def simplify_table_func(table_code):
126
+ return "\n" + table_code.replace("<html>", "").replace("</html>", "").replace(
127
+ "<body>", ""
128
+ ).replace("</body>", "")
129
+
130
+
131
+ def format_first_line_func(block, templates, format_func, spliter):
132
+ lines = block.content.split(spliter)
133
+ for idx in range(len(lines)):
134
+ line = lines[idx]
135
+ if line.strip() == "":
136
+ continue
137
+ if line.lower() in templates:
138
+ lines[idx] = format_func(line)
139
+ break
140
+ return spliter.join(lines)
31
141
 
32
142
 
33
143
  class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
@@ -40,30 +150,10 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
40
150
  XlsxMixin.__init__(self)
41
151
  MarkdownMixin.__init__(self)
42
152
  JsonMixin.__init__(self)
43
- self.title_pattern = self._build_title_pattern()
44
-
45
- def _build_title_pattern(self):
46
- # Precompiled regex pattern for matching numbering at the beginning of the title
47
- numbering_pattern = (
48
- r"(?:"
49
- + r"[1-9][0-9]*(?:\.[1-9][0-9]*)*[\.、]?|"
50
- + r"[\(\(](?:[1-9][0-9]*|["
51
- r"一二三四五六七八九十百千万亿零壹贰叁肆伍陆柒捌玖拾]+)[\)\)]|" + r"["
52
- r"一二三四五六七八九十百千万亿零壹贰叁肆伍陆柒捌玖拾]+"
53
- r"[、\.]?|" + r"(?:I|II|III|IV|V|VI|VII|VIII|IX|X)\.?" + r")"
54
- )
55
- return re.compile(r"^\s*(" + numbering_pattern + r")(\s*)(.*)$")
56
-
57
- def _get_input_fn(self):
58
- fn = super()._get_input_fn()
59
- if (page_idx := self["page_index"]) is not None:
60
- fp = Path(fn)
61
- stem, suffix = fp.stem, fp.suffix
62
- return f"{stem}_{page_idx}{suffix}"
63
- else:
64
- return fn
65
153
 
66
154
  def _to_img(self) -> dict[str, np.ndarray]:
155
+ from .utils import get_show_color
156
+
67
157
  res_img_dict = {}
68
158
  model_settings = self["model_settings"]
69
159
  if model_settings["use_doc_preprocessor"]:
@@ -71,12 +161,14 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
71
161
  res_img_dict[key] = value
72
162
  res_img_dict["layout_det_res"] = self["layout_det_res"].img["res"]
73
163
 
74
- if model_settings["use_general_ocr"] or model_settings["use_table_recognition"]:
75
- res_img_dict["overall_ocr_res"] = self["overall_ocr_res"].img["ocr_res_img"]
164
+ if model_settings["use_region_detection"]:
165
+ res_img_dict["region_det_res"] = self["region_det_res"].img["res"]
166
+
167
+ res_img_dict["overall_ocr_res"] = self["overall_ocr_res"].img["ocr_res_img"]
76
168
 
77
169
  if model_settings["use_table_recognition"] and len(self["table_res_list"]) > 0:
78
170
  table_cell_img = Image.fromarray(
79
- copy.deepcopy(self["doc_preprocessor_res"]["output_img"])
171
+ copy.deepcopy(self["doc_preprocessor_res"]["output_img"][:, :, ::-1])
80
172
  )
81
173
  table_draw = ImageDraw.Draw(table_cell_img)
82
174
  rectangle_color = (255, 0, 0)
@@ -101,16 +193,23 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
101
193
  # for layout ordering image
102
194
  image = Image.fromarray(self["doc_preprocessor_res"]["output_img"][:, :, ::-1])
103
195
  draw = ImageDraw.Draw(image, "RGBA")
104
- parsing_result = self["parsing_res_list"]
196
+ font_size = int(0.018 * int(image.width)) + 2
197
+ font = ImageFont.truetype(PINGFANG_FONT_FILE_PATH, font_size, encoding="utf-8")
198
+ parsing_result: List[LayoutBlock] = self["parsing_res_list"]
105
199
  for block in parsing_result:
106
- bbox = block["block_bbox"]
107
- index = block.get("index", None)
108
- label = block["sub_label"]
109
- fill_color = get_show_color(label)
200
+ bbox = block.bbox
201
+ index = block.order_index
202
+ label = block.label
203
+ fill_color = get_show_color(label, False)
110
204
  draw.rectangle(bbox, fill=fill_color)
111
205
  if index is not None:
112
- text_position = (bbox[2] + 2, bbox[1] - 10)
113
- draw.text(text_position, str(index), fill="red")
206
+ text_position = (bbox[2] + 2, bbox[1] - font_size // 2)
207
+ if int(image.width) - bbox[2] < font_size:
208
+ text_position = (
209
+ int(bbox[2] - font_size * 1.1),
210
+ bbox[1] - font_size // 2,
211
+ )
212
+ draw.text(text_position, str(index), font=font, fill="red")
114
213
 
115
214
  res_img_dict["layout_order_res"] = image
116
215
 
@@ -134,8 +233,7 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
134
233
  if self["model_settings"]["use_doc_preprocessor"]:
135
234
  data["doc_preprocessor_res"] = self["doc_preprocessor_res"].str["res"]
136
235
  data["layout_det_res"] = self["layout_det_res"].str["res"]
137
- if model_settings["use_general_ocr"] or model_settings["use_table_recognition"]:
138
- data["overall_ocr_res"] = self["overall_ocr_res"].str["res"]
236
+ data["overall_ocr_res"] = self["overall_ocr_res"].str["res"]
139
237
  if model_settings["use_table_recognition"] and len(self["table_res_list"]) > 0:
140
238
  data["table_res_list"] = []
141
239
  for sno in range(len(self["table_res_list"])):
@@ -176,9 +274,9 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
176
274
  parsing_res_list = self["parsing_res_list"]
177
275
  parsing_res_list = [
178
276
  {
179
- "block_label": parsing_res["block_label"],
180
- "block_content": parsing_res["block_content"],
181
- "block_bbox": parsing_res["block_bbox"],
277
+ "block_label": parsing_res.label,
278
+ "block_content": parsing_res.content,
279
+ "block_bbox": parsing_res.bbox,
182
280
  }
183
281
  for parsing_res in parsing_res_list
184
282
  ]
@@ -186,8 +284,7 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
186
284
  if self["model_settings"]["use_doc_preprocessor"]:
187
285
  data["doc_preprocessor_res"] = self["doc_preprocessor_res"].json["res"]
188
286
  data["layout_det_res"] = self["layout_det_res"].json["res"]
189
- if model_settings["use_general_ocr"] or model_settings["use_table_recognition"]:
190
- data["overall_ocr_res"] = self["overall_ocr_res"].json["res"]
287
+ data["overall_ocr_res"] = self["overall_ocr_res"].json["res"]
191
288
  if model_settings["use_table_recognition"] and len(self["table_res_list"]) > 0:
192
289
  data["table_res_list"] = []
193
290
  for sno in range(len(self["table_res_list"])):
@@ -240,227 +337,144 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
240
337
  res_xlsx_dict[key] = table_res.xlsx["pred"]
241
338
  return res_xlsx_dict
242
339
 
243
- def _to_markdown(self) -> dict:
340
+ def _to_markdown(self, pretty=True) -> dict:
244
341
  """
245
342
  Save the parsing result to a Markdown file.
246
343
 
344
+ Args:
345
+ pretty (Optional[bool]): whether to pretty markdown by HTML, default by True.
346
+
247
347
  Returns:
248
348
  Dict
249
349
  """
350
+ original_image_width = self["doc_preprocessor_res"]["output_img"].shape[1]
250
351
 
251
- def _format_data(obj):
252
-
253
- def format_title(title):
254
- """
255
- Normalize chapter title.
256
- Add the '#' to indicate the level of the title.
257
- If numbering exists, ensure there's exactly one space between it and the title content.
258
- If numbering does not exist, return the original title unchanged.
259
-
260
- :param title: Original chapter title string.
261
- :return: Normalized chapter title string.
262
- """
263
- match = self.title_pattern.match(title)
264
- if match:
265
- numbering = match.group(1).strip()
266
- title_content = match.group(3).lstrip()
267
- # Return numbering and title content separated by one space
268
- title = numbering + " " + title_content
269
-
270
- title = title.rstrip(".")
271
- level = (
272
- title.count(
273
- ".",
274
- )
275
- + 1
276
- if "." in title
277
- else 1
278
- )
279
- return f"#{'#' * level} {title}".replace("-\n", "").replace(
280
- "\n",
281
- " ",
282
- )
283
-
284
- def format_centered_text(key):
285
- return (
286
- f'<div style="text-align: center;">{block[key]}</div>'.replace(
287
- "-\n",
288
- "",
289
- ).replace("\n", " ")
290
- + "\n"
352
+ if pretty:
353
+ format_text_func = lambda block: format_centered_by_html(
354
+ format_text_plain_func(block)
355
+ )
356
+ format_image_func = lambda block: format_centered_by_html(
357
+ format_image_scaled_by_html_func(
358
+ block,
359
+ original_image_width=original_image_width,
291
360
  )
361
+ )
362
+ else:
363
+ format_text_func = lambda block: block.content
364
+ format_image_func = format_image_plain_func
292
365
 
293
- def format_image(label):
294
- img_tags = []
295
- image_path = "".join(block[label].keys())
296
- img_tags.append(
297
- '<div style="text-align: center;"><img src="{}" alt="Image" /></div>'.format(
298
- image_path.replace("-\n", "").replace("\n", " "),
299
- ),
300
- )
301
- return "\n".join(img_tags)
302
-
303
- def format_first_line(templates, format_func, spliter):
304
- lines = block["block_content"].split(spliter)
305
- for idx in range(len(lines)):
306
- line = lines[idx]
307
- if line.strip() == "":
308
- continue
309
- if line.lower() in templates:
310
- lines[idx] = format_func(line)
311
- break
312
- return spliter.join(lines)
313
-
314
- def format_table():
315
- return "\n" + block["block_content"]
316
-
317
- def get_seg_flag(block, prev_block):
318
-
319
- seg_start_flag = True
320
- seg_end_flag = True
321
-
322
- block_box = block["block_bbox"]
323
- context_left_coordinate = block_box[0]
324
- context_right_coordinate = block_box[2]
325
- seg_start_coordinate = block.get("seg_start_coordinate")
326
- seg_end_coordinate = block.get("seg_end_coordinate")
327
-
328
- if prev_block is not None:
329
- prev_block_bbox = prev_block["block_bbox"]
330
- num_of_prev_lines = prev_block.get("num_of_lines")
331
- pre_block_seg_end_coordinate = prev_block.get("seg_end_coordinate")
332
- prev_end_space_small = (
333
- context_right_coordinate - pre_block_seg_end_coordinate < 10
334
- )
335
- prev_lines_more_than_one = num_of_prev_lines > 1
336
-
337
- overlap_blocks = context_left_coordinate < prev_block_bbox[2]
338
-
339
- # update context_left_coordinate and context_right_coordinate
340
- if overlap_blocks:
341
- context_left_coordinate = min(
342
- prev_block_bbox[0], context_left_coordinate
343
- )
344
- context_right_coordinate = max(
345
- prev_block_bbox[2], context_right_coordinate
346
- )
347
- prev_end_space_small = (
348
- prev_block_bbox[2] - pre_block_seg_end_coordinate < 10
349
- )
350
-
351
- current_start_space_small = (
352
- seg_start_coordinate - context_left_coordinate < 10
353
- )
366
+ if self["model_settings"].get("use_chart_recognition", False):
367
+ format_chart_func = format_chart2table_func
368
+ else:
369
+ format_chart_func = format_image_func
354
370
 
355
- if (
356
- prev_end_space_small
357
- and current_start_space_small
358
- and prev_lines_more_than_one
359
- ):
360
- seg_start_flag = False
361
- else:
362
- if seg_start_coordinate - context_left_coordinate < 10:
363
- seg_start_flag = False
364
-
365
- if context_right_coordinate - seg_end_coordinate < 10:
366
- seg_end_flag = False
367
-
368
- return seg_start_flag, seg_end_flag
369
-
370
- handlers = {
371
- "paragraph_title": lambda: format_title(block["block_content"]),
372
- "doc_title": lambda: f"# {block['block_content']}".replace(
373
- "-\n",
374
- "",
375
- ).replace("\n", " "),
376
- "table_title": lambda: format_centered_text("block_content"),
377
- "figure_title": lambda: format_centered_text("block_content"),
378
- "chart_title": lambda: format_centered_text("block_content"),
379
- "text": lambda: block["block_content"]
380
- .replace("-\n", " ")
381
- .replace("\n", " "),
382
- "abstract": lambda: format_first_line(
383
- ["摘要", "abstract"], lambda l: f"## {l}\n", " "
384
- ),
385
- "content": lambda: block["block_content"]
386
- .replace("-\n", " \n")
387
- .replace("\n", " \n"),
388
- "image": lambda: format_image("block_image"),
389
- "chart": lambda: format_image("block_image"),
390
- "formula": lambda: f"$${block['block_content']}$$",
391
- "table": format_table,
392
- "reference": lambda: format_first_line(
393
- ["参考文献", "references"], lambda l: f"## {l}", "\n"
394
- ),
395
- "algorithm": lambda: block["block_content"].strip("\n"),
396
- "seal": lambda: f"Words of Seals:\n{block['block_content']}",
397
- }
398
- parsing_res_list = obj["parsing_res_list"]
399
- markdown_content = ""
400
- last_label = None
401
- seg_start_flag = None
402
- seg_end_flag = None
403
- prev_block = None
404
- page_first_element_seg_start_flag = None
405
- page_last_element_seg_end_flag = None
406
- parsing_res_list = sorted(
407
- parsing_res_list,
408
- key=lambda x: x.get("sub_index", 999),
371
+ if self["model_settings"].get("use_seal_recognition", False):
372
+ format_seal_func = lambda block: "\n".join(
373
+ [format_image_func(block), format_text_func(block)]
409
374
  )
410
- for block in parsing_res_list:
411
- seg_start_flag, seg_end_flag = get_seg_flag(block, prev_block)
412
-
413
- label = block.get("block_label")
414
- page_first_element_seg_start_flag = (
415
- seg_start_flag
416
- if (page_first_element_seg_start_flag is None)
417
- else page_first_element_seg_start_flag
375
+ else:
376
+ format_seal_func = format_image_func
377
+
378
+ if self["model_settings"].get("use_table_recognition", False):
379
+ if pretty:
380
+ format_table_func = lambda block: "\n" + format_text_func(
381
+ block
382
+ ).replace("<table>", '<table border="1">')
383
+ else:
384
+ format_table_func = lambda block: simplify_table_func(
385
+ "\n" + block.content
418
386
  )
419
- handler = handlers.get(label)
420
- if handler:
421
- prev_block = block
422
- if label == last_label == "text" and seg_start_flag == False:
423
- last_char_of_markdown = (
424
- markdown_content[-1] if markdown_content else ""
425
- )
426
- first_char_of_handler = handler()[0] if handler() else ""
427
- last_is_chinese_char = (
428
- re.match(r"[\u4e00-\u9fff]", last_char_of_markdown)
429
- if last_char_of_markdown
430
- else False
431
- )
432
- first_is_chinese_char = (
433
- re.match(r"[\u4e00-\u9fff]", first_char_of_handler)
434
- if first_char_of_handler
435
- else False
436
- )
437
- if not (last_is_chinese_char or first_is_chinese_char):
438
- markdown_content += " " + handler()
439
- else:
440
- markdown_content += handler()
441
- else:
442
- markdown_content += (
443
- "\n\n" + handler() if markdown_content else handler()
444
- )
445
- last_label = label
446
- page_last_element_seg_end_flag = seg_end_flag
447
-
448
- return markdown_content, (
449
- page_first_element_seg_start_flag,
450
- page_last_element_seg_end_flag,
387
+ else:
388
+ format_table_func = format_image_func
389
+
390
+ if self["model_settings"].get("use_formula_recognition", False):
391
+ format_formula_func = lambda block: f"$${block.content}$$"
392
+ else:
393
+ format_formula_func = format_image_func
394
+
395
+ handle_funcs_dict = {
396
+ "paragraph_title": format_title_func,
397
+ "abstract_title": format_title_func,
398
+ "reference_title": format_title_func,
399
+ "content_title": format_title_func,
400
+ "doc_title": lambda block: f"# {block.content}".replace(
401
+ "-\n",
402
+ "",
403
+ ).replace("\n", " "),
404
+ "table_title": format_text_func,
405
+ "figure_title": format_text_func,
406
+ "chart_title": format_text_func,
407
+ "vision_footnote": lambda block: block.content.replace(
408
+ "\n\n", "\n"
409
+ ).replace("\n", "\n\n"),
410
+ "text": lambda block: block.content.replace("\n\n", "\n").replace(
411
+ "\n", "\n\n"
412
+ ),
413
+ "abstract": partial(
414
+ format_first_line_func,
415
+ templates=["摘要", "abstract"],
416
+ format_func=lambda l: f"## {l}\n",
417
+ spliter=" ",
418
+ ),
419
+ "content": lambda block: block.content.replace("-\n", " \n").replace(
420
+ "\n", " \n"
421
+ ),
422
+ "image": format_image_func,
423
+ "chart": format_chart_func,
424
+ "formula": format_formula_func,
425
+ "table": format_table_func,
426
+ "reference": partial(
427
+ format_first_line_func,
428
+ templates=["参考文献", "references"],
429
+ format_func=lambda l: f"## {l}",
430
+ spliter="\n",
431
+ ),
432
+ "algorithm": lambda block: block.content.strip("\n"),
433
+ "seal": format_seal_func,
434
+ }
435
+
436
+ markdown_content = ""
437
+ last_label = None
438
+ seg_start_flag = None
439
+ seg_end_flag = None
440
+ prev_block = None
441
+ page_first_element_seg_start_flag = None
442
+ page_last_element_seg_end_flag = None
443
+ markdown_info = {}
444
+ markdown_info["markdown_images"] = {}
445
+ for block in self["parsing_res_list"]:
446
+ seg_start_flag, seg_end_flag = get_seg_flag(block, prev_block)
447
+
448
+ label = block.label
449
+ if block.image is not None:
450
+ markdown_info["markdown_images"][block.image["path"]] = block.image[
451
+ "img"
452
+ ]
453
+ page_first_element_seg_start_flag = (
454
+ seg_start_flag
455
+ if (page_first_element_seg_start_flag is None)
456
+ else page_first_element_seg_start_flag
451
457
  )
452
458
 
453
- markdown_info = dict()
454
- markdown_info["markdown_texts"], (
455
- page_first_element_seg_start_flag,
456
- page_last_element_seg_end_flag,
457
- ) = _format_data(self)
459
+ handle_func = handle_funcs_dict.get(label, None)
460
+ if handle_func:
461
+ prev_block = block
462
+ if label == last_label == "text" and seg_start_flag == False:
463
+ markdown_content += handle_func(block)
464
+ else:
465
+ markdown_content += (
466
+ "\n\n" + handle_func(block)
467
+ if markdown_content
468
+ else handle_func(block)
469
+ )
470
+ last_label = label
471
+ page_last_element_seg_end_flag = seg_end_flag
472
+
473
+ markdown_info["markdown_texts"] = markdown_content
458
474
  markdown_info["page_continuation_flags"] = (
459
475
  page_first_element_seg_start_flag,
460
476
  page_last_element_seg_end_flag,
461
477
  )
462
-
463
- markdown_info["markdown_images"] = {}
464
478
  for img in self["imgs_in_doc"]:
465
479
  markdown_info["markdown_images"][img["path"]] = img["img"]
466
480