paddlex 3.0.0rc1__py3-none-any.whl → 3.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (240) hide show
  1. paddlex/.version +1 -1
  2. paddlex/__init__.py +1 -1
  3. paddlex/configs/modules/chart_parsing/PP-Chart2Table.yaml +13 -0
  4. paddlex/configs/modules/doc_vlm/PP-DocBee2-3B.yaml +14 -0
  5. paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-L.yaml +40 -0
  6. paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-M.yaml +40 -0
  7. paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-S.yaml +40 -0
  8. paddlex/configs/modules/layout_detection/PP-DocBlockLayout.yaml +40 -0
  9. paddlex/configs/modules/layout_detection/PP-DocLayout-L.yaml +2 -2
  10. paddlex/configs/modules/layout_detection/PP-DocLayout-M.yaml +2 -2
  11. paddlex/configs/modules/layout_detection/PP-DocLayout-S.yaml +2 -2
  12. paddlex/configs/modules/layout_detection/PP-DocLayout_plus-L.yaml +40 -0
  13. paddlex/configs/modules/text_detection/PP-OCRv5_mobile_det.yaml +40 -0
  14. paddlex/configs/modules/text_detection/PP-OCRv5_server_det.yaml +40 -0
  15. paddlex/configs/modules/text_recognition/PP-OCRv5_mobile_rec.yaml +39 -0
  16. paddlex/configs/modules/text_recognition/PP-OCRv5_server_rec.yaml +39 -0
  17. paddlex/configs/modules/textline_orientation/PP-LCNet_x1_0_textline_ori.yaml +41 -0
  18. paddlex/configs/pipelines/OCR.yaml +7 -6
  19. paddlex/configs/pipelines/PP-ChatOCRv3-doc.yaml +3 -1
  20. paddlex/configs/pipelines/PP-ChatOCRv4-doc.yaml +91 -34
  21. paddlex/configs/pipelines/PP-StructureV3.yaml +72 -72
  22. paddlex/configs/pipelines/doc_understanding.yaml +1 -1
  23. paddlex/configs/pipelines/formula_recognition.yaml +2 -2
  24. paddlex/configs/pipelines/layout_parsing.yaml +3 -2
  25. paddlex/configs/pipelines/seal_recognition.yaml +1 -0
  26. paddlex/configs/pipelines/table_recognition.yaml +2 -1
  27. paddlex/configs/pipelines/table_recognition_v2.yaml +7 -1
  28. paddlex/hpip_links.html +20 -20
  29. paddlex/inference/common/batch_sampler/doc_vlm_batch_sampler.py +33 -10
  30. paddlex/inference/common/batch_sampler/image_batch_sampler.py +34 -25
  31. paddlex/inference/common/result/mixin.py +19 -12
  32. paddlex/inference/models/base/predictor/base_predictor.py +2 -8
  33. paddlex/inference/models/common/static_infer.py +29 -73
  34. paddlex/inference/models/common/tokenizer/__init__.py +2 -0
  35. paddlex/inference/models/common/tokenizer/clip_tokenizer.py +1 -1
  36. paddlex/inference/models/common/tokenizer/gpt_tokenizer.py +2 -2
  37. paddlex/inference/models/common/tokenizer/qwen2_5_tokenizer.py +112 -0
  38. paddlex/inference/models/common/tokenizer/qwen2_tokenizer.py +7 -1
  39. paddlex/inference/models/common/tokenizer/qwen_tokenizer.py +288 -0
  40. paddlex/inference/models/common/tokenizer/tokenizer_utils.py +13 -13
  41. paddlex/inference/models/common/tokenizer/tokenizer_utils_base.py +3 -3
  42. paddlex/inference/models/common/tokenizer/vocab.py +7 -7
  43. paddlex/inference/models/common/ts/funcs.py +19 -8
  44. paddlex/inference/models/common/vlm/conversion_utils.py +99 -0
  45. paddlex/inference/models/common/vlm/fusion_ops.py +205 -0
  46. paddlex/inference/models/common/vlm/generation/configuration_utils.py +1 -1
  47. paddlex/inference/models/common/vlm/generation/logits_process.py +1 -1
  48. paddlex/inference/models/common/vlm/generation/utils.py +1 -1
  49. paddlex/inference/models/common/vlm/transformers/configuration_utils.py +3 -3
  50. paddlex/inference/models/common/vlm/transformers/conversion_utils.py +3 -3
  51. paddlex/inference/models/common/vlm/transformers/model_outputs.py +2 -2
  52. paddlex/inference/models/common/vlm/transformers/model_utils.py +7 -31
  53. paddlex/inference/models/doc_vlm/modeling/GOT_ocr_2_0.py +830 -0
  54. paddlex/inference/models/doc_vlm/modeling/__init__.py +2 -0
  55. paddlex/inference/models/doc_vlm/modeling/qwen2.py +1606 -0
  56. paddlex/inference/models/doc_vlm/modeling/qwen2_5_vl.py +3006 -0
  57. paddlex/inference/models/doc_vlm/modeling/qwen2_vl.py +0 -105
  58. paddlex/inference/models/doc_vlm/predictor.py +79 -24
  59. paddlex/inference/models/doc_vlm/processors/GOT_ocr_2_0.py +97 -0
  60. paddlex/inference/models/doc_vlm/processors/__init__.py +2 -0
  61. paddlex/inference/models/doc_vlm/processors/common.py +189 -0
  62. paddlex/inference/models/doc_vlm/processors/qwen2_5_vl.py +548 -0
  63. paddlex/inference/models/doc_vlm/processors/qwen2_vl.py +21 -176
  64. paddlex/inference/models/formula_recognition/predictor.py +8 -2
  65. paddlex/inference/models/formula_recognition/processors.py +90 -77
  66. paddlex/inference/models/formula_recognition/result.py +28 -27
  67. paddlex/inference/models/image_feature/processors.py +3 -4
  68. paddlex/inference/models/keypoint_detection/predictor.py +3 -0
  69. paddlex/inference/models/object_detection/predictor.py +2 -0
  70. paddlex/inference/models/object_detection/processors.py +28 -3
  71. paddlex/inference/models/object_detection/utils.py +2 -0
  72. paddlex/inference/models/table_structure_recognition/result.py +0 -10
  73. paddlex/inference/models/text_detection/predictor.py +8 -0
  74. paddlex/inference/models/text_detection/processors.py +44 -10
  75. paddlex/inference/models/text_detection/result.py +0 -10
  76. paddlex/inference/models/text_recognition/result.py +1 -1
  77. paddlex/inference/pipelines/__init__.py +9 -5
  78. paddlex/inference/pipelines/_parallel.py +172 -0
  79. paddlex/inference/pipelines/anomaly_detection/pipeline.py +16 -6
  80. paddlex/inference/pipelines/attribute_recognition/pipeline.py +11 -1
  81. paddlex/inference/pipelines/base.py +14 -4
  82. paddlex/inference/pipelines/components/faisser.py +1 -1
  83. paddlex/inference/pipelines/doc_preprocessor/pipeline.py +53 -27
  84. paddlex/inference/pipelines/formula_recognition/pipeline.py +120 -82
  85. paddlex/inference/pipelines/formula_recognition/result.py +1 -11
  86. paddlex/inference/pipelines/image_classification/pipeline.py +16 -6
  87. paddlex/inference/pipelines/image_multilabel_classification/pipeline.py +16 -6
  88. paddlex/inference/pipelines/instance_segmentation/pipeline.py +16 -6
  89. paddlex/inference/pipelines/keypoint_detection/pipeline.py +16 -6
  90. paddlex/inference/pipelines/layout_parsing/layout_objects.py +859 -0
  91. paddlex/inference/pipelines/layout_parsing/pipeline.py +34 -47
  92. paddlex/inference/pipelines/layout_parsing/pipeline_v2.py +832 -260
  93. paddlex/inference/pipelines/layout_parsing/result.py +4 -17
  94. paddlex/inference/pipelines/layout_parsing/result_v2.py +259 -245
  95. paddlex/inference/pipelines/layout_parsing/setting.py +88 -0
  96. paddlex/inference/pipelines/layout_parsing/utils.py +391 -2028
  97. paddlex/inference/pipelines/layout_parsing/xycut_enhanced/__init__.py +16 -0
  98. paddlex/inference/pipelines/layout_parsing/xycut_enhanced/utils.py +1199 -0
  99. paddlex/inference/pipelines/layout_parsing/xycut_enhanced/xycuts.py +615 -0
  100. paddlex/inference/pipelines/m_3d_bev_detection/pipeline.py +2 -2
  101. paddlex/inference/pipelines/multilingual_speech_recognition/pipeline.py +2 -2
  102. paddlex/inference/pipelines/object_detection/pipeline.py +16 -6
  103. paddlex/inference/pipelines/ocr/pipeline.py +127 -70
  104. paddlex/inference/pipelines/ocr/result.py +21 -18
  105. paddlex/inference/pipelines/open_vocabulary_detection/pipeline.py +2 -2
  106. paddlex/inference/pipelines/open_vocabulary_segmentation/pipeline.py +2 -2
  107. paddlex/inference/pipelines/pp_chatocr/pipeline_base.py +2 -2
  108. paddlex/inference/pipelines/pp_chatocr/pipeline_v3.py +2 -5
  109. paddlex/inference/pipelines/pp_chatocr/pipeline_v4.py +6 -6
  110. paddlex/inference/pipelines/rotated_object_detection/pipeline.py +16 -6
  111. paddlex/inference/pipelines/seal_recognition/pipeline.py +109 -53
  112. paddlex/inference/pipelines/semantic_segmentation/pipeline.py +16 -6
  113. paddlex/inference/pipelines/small_object_detection/pipeline.py +16 -6
  114. paddlex/inference/pipelines/table_recognition/pipeline.py +26 -18
  115. paddlex/inference/pipelines/table_recognition/pipeline_v2.py +624 -53
  116. paddlex/inference/pipelines/table_recognition/result.py +1 -1
  117. paddlex/inference/pipelines/table_recognition/table_recognition_post_processing_v2.py +9 -5
  118. paddlex/inference/pipelines/ts_anomaly_detection/pipeline.py +2 -2
  119. paddlex/inference/pipelines/ts_classification/pipeline.py +2 -2
  120. paddlex/inference/pipelines/ts_forecasting/pipeline.py +2 -2
  121. paddlex/inference/pipelines/video_classification/pipeline.py +2 -2
  122. paddlex/inference/pipelines/video_detection/pipeline.py +2 -2
  123. paddlex/inference/serving/basic_serving/_app.py +46 -13
  124. paddlex/inference/serving/basic_serving/_pipeline_apps/_common/common.py +5 -1
  125. paddlex/inference/serving/basic_serving/_pipeline_apps/layout_parsing.py +0 -1
  126. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv3_doc.py +0 -1
  127. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv4_doc.py +1 -1
  128. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_structurev3.py +6 -2
  129. paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition.py +1 -5
  130. paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition_v2.py +4 -5
  131. paddlex/inference/serving/infra/utils.py +20 -22
  132. paddlex/inference/serving/schemas/formula_recognition.py +1 -1
  133. paddlex/inference/serving/schemas/layout_parsing.py +1 -2
  134. paddlex/inference/serving/schemas/pp_chatocrv3_doc.py +1 -2
  135. paddlex/inference/serving/schemas/pp_chatocrv4_doc.py +2 -2
  136. paddlex/inference/serving/schemas/pp_structurev3.py +10 -6
  137. paddlex/inference/serving/schemas/seal_recognition.py +1 -1
  138. paddlex/inference/serving/schemas/table_recognition.py +2 -6
  139. paddlex/inference/serving/schemas/table_recognition_v2.py +5 -6
  140. paddlex/inference/utils/hpi.py +30 -16
  141. paddlex/inference/utils/hpi_model_info_collection.json +666 -162
  142. paddlex/inference/utils/io/readers.py +12 -12
  143. paddlex/inference/utils/misc.py +20 -0
  144. paddlex/inference/utils/mkldnn_blocklist.py +59 -0
  145. paddlex/inference/utils/official_models.py +140 -5
  146. paddlex/inference/utils/pp_option.py +74 -9
  147. paddlex/model.py +2 -2
  148. paddlex/modules/__init__.py +1 -1
  149. paddlex/modules/anomaly_detection/evaluator.py +2 -2
  150. paddlex/modules/base/__init__.py +1 -1
  151. paddlex/modules/base/evaluator.py +5 -5
  152. paddlex/modules/base/trainer.py +1 -1
  153. paddlex/modules/doc_vlm/dataset_checker.py +2 -2
  154. paddlex/modules/doc_vlm/evaluator.py +2 -2
  155. paddlex/modules/doc_vlm/exportor.py +2 -2
  156. paddlex/modules/doc_vlm/model_list.py +1 -1
  157. paddlex/modules/doc_vlm/trainer.py +2 -2
  158. paddlex/modules/face_recognition/evaluator.py +2 -2
  159. paddlex/modules/formula_recognition/evaluator.py +5 -2
  160. paddlex/modules/formula_recognition/model_list.py +3 -0
  161. paddlex/modules/formula_recognition/trainer.py +3 -0
  162. paddlex/modules/general_recognition/evaluator.py +1 -1
  163. paddlex/modules/image_classification/evaluator.py +2 -2
  164. paddlex/modules/image_classification/model_list.py +1 -0
  165. paddlex/modules/instance_segmentation/evaluator.py +1 -1
  166. paddlex/modules/keypoint_detection/evaluator.py +1 -1
  167. paddlex/modules/m_3d_bev_detection/evaluator.py +2 -2
  168. paddlex/modules/multilabel_classification/evaluator.py +2 -2
  169. paddlex/modules/object_detection/dataset_checker/dataset_src/convert_dataset.py +4 -4
  170. paddlex/modules/object_detection/evaluator.py +2 -2
  171. paddlex/modules/object_detection/model_list.py +2 -0
  172. paddlex/modules/semantic_segmentation/dataset_checker/__init__.py +12 -2
  173. paddlex/modules/semantic_segmentation/evaluator.py +2 -2
  174. paddlex/modules/table_recognition/evaluator.py +2 -2
  175. paddlex/modules/text_detection/evaluator.py +2 -2
  176. paddlex/modules/text_detection/model_list.py +2 -0
  177. paddlex/modules/text_recognition/evaluator.py +2 -2
  178. paddlex/modules/text_recognition/model_list.py +2 -0
  179. paddlex/modules/ts_anomaly_detection/evaluator.py +2 -2
  180. paddlex/modules/ts_classification/dataset_checker/dataset_src/split_dataset.py +1 -1
  181. paddlex/modules/ts_classification/evaluator.py +2 -2
  182. paddlex/modules/ts_forecast/evaluator.py +2 -2
  183. paddlex/modules/video_classification/evaluator.py +2 -2
  184. paddlex/modules/video_detection/evaluator.py +2 -2
  185. paddlex/ops/__init__.py +8 -5
  186. paddlex/paddlex_cli.py +19 -13
  187. paddlex/repo_apis/Paddle3D_api/bev_fusion/model.py +2 -2
  188. paddlex/repo_apis/PaddleClas_api/cls/config.py +1 -1
  189. paddlex/repo_apis/PaddleClas_api/cls/model.py +1 -1
  190. paddlex/repo_apis/PaddleClas_api/cls/register.py +10 -0
  191. paddlex/repo_apis/PaddleClas_api/cls/runner.py +1 -1
  192. paddlex/repo_apis/PaddleDetection_api/instance_seg/model.py +1 -1
  193. paddlex/repo_apis/PaddleDetection_api/instance_seg/runner.py +1 -1
  194. paddlex/repo_apis/PaddleDetection_api/object_det/config.py +1 -1
  195. paddlex/repo_apis/PaddleDetection_api/object_det/model.py +1 -1
  196. paddlex/repo_apis/PaddleDetection_api/object_det/official_categories.py +25 -0
  197. paddlex/repo_apis/PaddleDetection_api/object_det/register.py +30 -0
  198. paddlex/repo_apis/PaddleDetection_api/object_det/runner.py +1 -1
  199. paddlex/repo_apis/PaddleOCR_api/formula_rec/config.py +3 -3
  200. paddlex/repo_apis/PaddleOCR_api/formula_rec/model.py +5 -9
  201. paddlex/repo_apis/PaddleOCR_api/formula_rec/register.py +27 -0
  202. paddlex/repo_apis/PaddleOCR_api/formula_rec/runner.py +1 -1
  203. paddlex/repo_apis/PaddleOCR_api/table_rec/model.py +1 -1
  204. paddlex/repo_apis/PaddleOCR_api/table_rec/runner.py +1 -1
  205. paddlex/repo_apis/PaddleOCR_api/text_det/model.py +1 -1
  206. paddlex/repo_apis/PaddleOCR_api/text_det/register.py +18 -0
  207. paddlex/repo_apis/PaddleOCR_api/text_det/runner.py +1 -1
  208. paddlex/repo_apis/PaddleOCR_api/text_rec/config.py +3 -3
  209. paddlex/repo_apis/PaddleOCR_api/text_rec/model.py +5 -9
  210. paddlex/repo_apis/PaddleOCR_api/text_rec/register.py +18 -0
  211. paddlex/repo_apis/PaddleOCR_api/text_rec/runner.py +1 -1
  212. paddlex/repo_apis/PaddleSeg_api/seg/model.py +1 -1
  213. paddlex/repo_apis/PaddleSeg_api/seg/runner.py +1 -1
  214. paddlex/repo_apis/PaddleTS_api/ts_ad/config.py +3 -3
  215. paddlex/repo_apis/PaddleTS_api/ts_cls/config.py +2 -2
  216. paddlex/repo_apis/PaddleTS_api/ts_fc/config.py +4 -4
  217. paddlex/repo_apis/PaddleVideo_api/video_cls/config.py +1 -1
  218. paddlex/repo_apis/PaddleVideo_api/video_cls/model.py +1 -1
  219. paddlex/repo_apis/PaddleVideo_api/video_cls/runner.py +1 -1
  220. paddlex/repo_apis/PaddleVideo_api/video_det/config.py +1 -1
  221. paddlex/repo_apis/PaddleVideo_api/video_det/model.py +1 -1
  222. paddlex/repo_apis/PaddleVideo_api/video_det/runner.py +1 -1
  223. paddlex/repo_apis/base/config.py +1 -1
  224. paddlex/repo_manager/core.py +3 -3
  225. paddlex/repo_manager/meta.py +6 -2
  226. paddlex/repo_manager/repo.py +17 -16
  227. paddlex/utils/custom_device_list.py +26 -2
  228. paddlex/utils/deps.py +3 -3
  229. paddlex/utils/device.py +5 -13
  230. paddlex/utils/env.py +4 -0
  231. paddlex/utils/flags.py +11 -4
  232. paddlex/utils/fonts/__init__.py +34 -4
  233. paddlex/utils/misc.py +1 -1
  234. paddlex/utils/subclass_register.py +2 -2
  235. {paddlex-3.0.0rc1.dist-info → paddlex-3.0.2.dist-info}/METADATA +349 -208
  236. {paddlex-3.0.0rc1.dist-info → paddlex-3.0.2.dist-info}/RECORD +240 -211
  237. {paddlex-3.0.0rc1.dist-info → paddlex-3.0.2.dist-info}/WHEEL +1 -1
  238. {paddlex-3.0.0rc1.dist-info → paddlex-3.0.2.dist-info}/entry_points.txt +1 -0
  239. {paddlex-3.0.0rc1.dist-info/licenses → paddlex-3.0.2.dist-info}/LICENSE +0 -0
  240. {paddlex-3.0.0rc1.dist-info → paddlex-3.0.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,859 @@
1
+ # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ from typing import Any, List, Union
15
+
16
+ import numpy as np
17
+
18
+ from .setting import BLOCK_LABEL_MAP, LINE_SETTINGS
19
+ from .utils import (
20
+ caculate_euclidean_dist,
21
+ calculate_projection_overlap_ratio,
22
+ is_english_letter,
23
+ is_non_breaking_punctuation,
24
+ is_numeric,
25
+ )
26
+
27
+ __all__ = [
28
+ "TextSpan",
29
+ "TextLine",
30
+ "LayoutBlock",
31
+ "LayoutRegion",
32
+ ]
33
+
34
+
35
+ class TextSpan(object):
36
+ """Text span class"""
37
+
38
+ def __init__(self, box, text, label):
39
+ """
40
+ Initialize a TextSpan object.
41
+
42
+ Args:
43
+ box (list): The bounding box of the text span.
44
+ text (str): The text content of the text span.
45
+ label (int): The label of the text span.
46
+ """
47
+ self.box = box
48
+ self.text = text
49
+ self.label = label
50
+
51
+ def __str__(self) -> str:
52
+ return f"{self.text}"
53
+
54
+ def __repr__(self) -> str:
55
+ return f"{self.text}"
56
+
57
+
58
+ class TextLine(object):
59
+ """Text line class"""
60
+
61
+ def __init__(self, spans: List[TextSpan] = [], direction="horizontal"):
62
+ """
63
+ Initialize a TextLine object.
64
+
65
+ Args:
66
+ spans (List[TextSpan]): A list of TextSpan objects. Defaults to [].
67
+ direction (str): The direction of the text line. Defaults to "horizontal".
68
+ """
69
+ self.spans = spans
70
+ self.direction = direction
71
+ self.region_box = self.get_region_box()
72
+ self.need_new_line = False
73
+
74
+ @property
75
+ def labels(self):
76
+ return [span.label for span in self.spans]
77
+
78
+ @property
79
+ def boxes(self):
80
+ return [span.box for span in self.spans]
81
+
82
+ @property
83
+ def height(self):
84
+ start_idx = 1 if self.direction == "horizontal" else 0
85
+ end_idx = 3 if self.direction == "horizontal" else 2
86
+ return abs(self.region_box[end_idx] - self.region_box[start_idx])
87
+
88
+ @property
89
+ def width(self):
90
+ start_idx = 0 if self.direction == "horizontal" else 1
91
+ end_idx = 2 if self.direction == "horizontal" else 3
92
+ return abs(self.region_box[end_idx] - self.region_box[start_idx])
93
+
94
+ def __str__(self) -> str:
95
+ return f"{' '.join([str(span.text) for span in self.spans])}\n"
96
+
97
+ def __repr__(self) -> str:
98
+ return f"{' '.join([str(span.text) for span in self.spans])}\n"
99
+
100
+ def add_span(self, span: Union[TextSpan, List[TextSpan]]):
101
+ """
102
+ Add a span to the text line.
103
+
104
+ Args:
105
+ span (Union[TextSpan, List[TextSpan]]): A single TextSpan object or a list of TextSpan objects.
106
+ """
107
+ if isinstance(span, list):
108
+ self.spans.extend(span)
109
+ else:
110
+ self.spans.append(span)
111
+ self.region_box = self.get_region_box()
112
+
113
+ def get_region_box(self):
114
+ """
115
+ Get the region box of the text line.
116
+
117
+ Returns:
118
+ list: The region box of the text line.
119
+ """
120
+ if not self.spans:
121
+ return None # or an empty list, or however you want to handle no spans
122
+
123
+ # Initialize min and max values with the first span's box
124
+ x_min, y_min, x_max, y_max = self.spans[0].box
125
+
126
+ for span in self.spans:
127
+ x_min = min(x_min, span.box[0])
128
+ y_min = min(y_min, span.box[1])
129
+ x_max = max(x_max, span.box[2])
130
+ y_max = max(y_max, span.box[3])
131
+
132
+ return [x_min, y_min, x_max, y_max]
133
+
134
+ def get_texts(
135
+ self,
136
+ block_label: str,
137
+ block_text_width: int,
138
+ block_start_coordinate: int,
139
+ block_stop_coordinate: int,
140
+ ori_image,
141
+ text_rec_model=None,
142
+ text_rec_score_thresh=None,
143
+ ):
144
+ """
145
+ Get the text of the text line.
146
+
147
+ Args:
148
+ block_label (str): The label of the block.
149
+ block_text_width (int): The width of the block.
150
+ block_start_coordinate (int): The starting coordinate of the block.
151
+ block_stop_coordinate (int): The stopping coordinate of the block.
152
+ ori_image (np.ndarray): The original image.
153
+ text_rec_model (Any): The text recognition model.
154
+ text_rec_score_thresh (float): The text recognition score threshold.
155
+
156
+ Returns:
157
+ str: The text of the text line.
158
+ """
159
+ span_box_start_index = 0 if self.direction == "horizontal" else 1
160
+ lines_start_index = 1 if self.direction == "horizontal" else 3
161
+ self.spans.sort(
162
+ key=lambda span: (
163
+ span.box[span_box_start_index] // 2,
164
+ (
165
+ span.box[lines_start_index]
166
+ if self.direction == "horizontal"
167
+ else -span.box[lines_start_index]
168
+ ),
169
+ )
170
+ )
171
+ if "formula" in self.labels:
172
+ sort_index = 0 if self.direction == "horizontal" else 1
173
+ splited_spans = self.split_boxes_by_projection()
174
+ if len(self.spans) != len(splited_spans):
175
+ splited_spans.sort(key=lambda span: span.box[sort_index])
176
+ new_spans = []
177
+ for span in splited_spans:
178
+ bbox = span.box
179
+ if span.label == "text":
180
+ crop_img = ori_image[
181
+ int(bbox[1]) : int(bbox[3]),
182
+ int(bbox[0]) : int(bbox[2]),
183
+ ]
184
+ crop_img_rec_res = next(text_rec_model([crop_img]))
185
+ crop_img_rec_score = crop_img_rec_res["rec_score"]
186
+ crop_img_rec_text = crop_img_rec_res["rec_text"]
187
+ span.text = crop_img_rec_text
188
+ if crop_img_rec_score < text_rec_score_thresh:
189
+ continue
190
+ new_spans.append(span)
191
+ self.spans = new_spans
192
+ line_text = self.format_line(
193
+ block_text_width,
194
+ block_start_coordinate,
195
+ block_stop_coordinate,
196
+ line_gap_limit=self.height * 1.5,
197
+ block_label=block_label,
198
+ )
199
+ return line_text
200
+
201
+ def is_projection_contained(self, box_a, box_b, start_idx, end_idx):
202
+ """Check if box_a completely contains box_b in the x-direction."""
203
+ return box_a[start_idx] <= box_b[start_idx] and box_a[end_idx] >= box_b[end_idx]
204
+
205
+ def split_boxes_by_projection(self, offset=1e-5):
206
+ """
207
+ Check if there is any complete containment in the x-direction
208
+ between the bounding boxes and split the containing box accordingly.
209
+
210
+ Args:
211
+ offset (float): A small offset value to ensure that the split boxes are not too close to the original boxes.
212
+ Returns:
213
+ A new list of boxes, including split boxes, with the same `rec_text` and `label` attributes.
214
+ """
215
+
216
+ new_spans = []
217
+ if self.direction == "horizontal":
218
+ projection_start_index, projection_end_index = 0, 2
219
+ else:
220
+ projection_start_index, projection_end_index = 1, 3
221
+
222
+ for i in range(len(self.spans)):
223
+ span = self.spans[i]
224
+ is_split = False
225
+ for j in range(i, len(self.spans)):
226
+ box_b = self.spans[j].box
227
+ box_a, text, label = span.box, span.text, span.label
228
+ if self.is_projection_contained(
229
+ box_a, box_b, projection_start_index, projection_end_index
230
+ ):
231
+ is_split = True
232
+ # Split box_a based on the x-coordinates of box_b
233
+ if box_a[projection_start_index] < box_b[projection_start_index]:
234
+ w = (
235
+ box_b[projection_start_index]
236
+ - offset
237
+ - box_a[projection_start_index]
238
+ )
239
+ if w > 1:
240
+ new_bbox = box_a.copy()
241
+ new_bbox[projection_end_index] = (
242
+ box_b[projection_start_index] - offset
243
+ )
244
+ new_spans.append(
245
+ TextSpan(
246
+ box=np.array(new_bbox),
247
+ text=text,
248
+ label=label,
249
+ )
250
+ )
251
+ if box_a[projection_end_index] > box_b[projection_end_index]:
252
+ w = (
253
+ box_a[projection_end_index]
254
+ - box_b[projection_end_index]
255
+ + offset
256
+ )
257
+ if w > 1:
258
+ box_a[projection_start_index] = (
259
+ box_b[projection_end_index] + offset
260
+ )
261
+ span = TextSpan(
262
+ box=np.array(box_a),
263
+ text=text,
264
+ label=label,
265
+ )
266
+ if j == len(self.spans) - 1 and is_split:
267
+ new_spans.append(span)
268
+ if not is_split:
269
+ new_spans.append(span)
270
+
271
+ return new_spans
272
+
273
+ def format_line(
274
+ self,
275
+ block_text_width: int,
276
+ block_start_coordinate: int,
277
+ block_stop_coordinate: int,
278
+ line_gap_limit: int = 10,
279
+ block_label: str = "text",
280
+ ) -> str:
281
+ """
282
+ Format a line of text spans based on layout constraints.
283
+
284
+ Args:
285
+ block_text_width (int): The width of the block.
286
+ block_start_coordinate (int): The starting coordinate of the block.
287
+ block_stop_coordinate (int): The stopping coordinate of the block.
288
+ line_gap_limit (int): The limit for the number of pixels after the last span that should be considered part of the last line. Default is 10.
289
+ block_label (str): The label associated with the entire block. Default is 'text'.
290
+ Returns:
291
+ str: Formatted line of text.
292
+ """
293
+ first_span_box = self.spans[0].box
294
+ last_span_box = self.spans[-1].box
295
+
296
+ line_text = ""
297
+ for span in self.spans:
298
+ if span.label == "formula" and block_label != "formula":
299
+ formula_rec = span.text
300
+ if not formula_rec.startswith("$") and not formula_rec.endswith("$"):
301
+ if len(self.spans) > 1:
302
+ span.text = f"${span.text}$"
303
+ else:
304
+ span.text = f"\n${span.text}$"
305
+ line_text += span.text
306
+ if (
307
+ len(span.text) > 0
308
+ and is_english_letter(line_text[-1])
309
+ or span.label == "formula"
310
+ ):
311
+ line_text += " "
312
+
313
+ if self.direction == "horizontal":
314
+ text_stop_index = 2
315
+ else:
316
+ text_stop_index = 3
317
+
318
+ if line_text.endswith(" "):
319
+ line_text = line_text[:-1]
320
+
321
+ if len(line_text) == 0:
322
+ return ""
323
+
324
+ last_char = line_text[-1]
325
+
326
+ if (
327
+ not is_english_letter(last_char)
328
+ and not is_non_breaking_punctuation(last_char)
329
+ and not is_numeric(last_char)
330
+ ) or (
331
+ block_stop_coordinate - last_span_box[text_stop_index]
332
+ > block_text_width * 0.3
333
+ ):
334
+ if (
335
+ self.direction == "horizontal"
336
+ and block_stop_coordinate - last_span_box[text_stop_index]
337
+ > line_gap_limit
338
+ ) or (
339
+ self.direction == "vertical"
340
+ and (
341
+ block_stop_coordinate - last_span_box[text_stop_index]
342
+ > line_gap_limit
343
+ or first_span_box[1] - block_start_coordinate > line_gap_limit
344
+ )
345
+ ):
346
+ self.need_new_line = True
347
+
348
+ if line_text.endswith("-"):
349
+ line_text = line_text[:-1]
350
+ return line_text
351
+
352
+ if (len(line_text) > 0 and is_english_letter(last_char)) or line_text.endswith(
353
+ "$"
354
+ ):
355
+ line_text += " "
356
+ if (
357
+ len(line_text) > 0
358
+ and not is_english_letter(last_char)
359
+ and not is_numeric(last_char)
360
+ ) or self.direction == "vertical":
361
+ if (
362
+ block_stop_coordinate - last_span_box[text_stop_index]
363
+ > block_text_width * 0.3
364
+ and len(line_text) > 0
365
+ and not is_non_breaking_punctuation(last_char)
366
+ ):
367
+ line_text += "\n"
368
+ self.need_new_line = True
369
+ elif (
370
+ block_stop_coordinate - last_span_box[text_stop_index]
371
+ > (block_stop_coordinate - block_start_coordinate) * 0.5
372
+ ):
373
+ line_text += "\n"
374
+ self.need_new_line = True
375
+
376
+ return line_text
377
+
378
+
379
+ class LayoutBlock(object):
380
+ """Layout Block Class"""
381
+
382
+ def __init__(self, label, bbox, content="") -> None:
383
+ """
384
+ Initialize a LayoutBlock object.
385
+
386
+ Args:
387
+ label (str): Label assigned to the block.
388
+ bbox (list): Bounding box coordinates of the block.
389
+ content (str, optional): Content of the block. Defaults to an empty string.
390
+ """
391
+ self.label = label
392
+ self.order_label = None
393
+ self.bbox = list(map(int, bbox))
394
+ self.content = content
395
+ self.seg_start_coordinate = float("inf")
396
+ self.seg_end_coordinate = float("-inf")
397
+ self.width = bbox[2] - bbox[0]
398
+ self.height = bbox[3] - bbox[1]
399
+ self.area = float(self.width) * float(self.height)
400
+ self.num_of_lines = 1
401
+ self.image = None
402
+ self.index = None
403
+ self.order_index = None
404
+ self.text_line_width = 1
405
+ self.text_line_height = 1
406
+ self.child_blocks = []
407
+ self.update_direction()
408
+
409
+ def __str__(self) -> str:
410
+ _str = f"\n\n#################\nindex:\t{self.index}\nlabel:\t{self.label}\nregion_label:\t{self.order_label}\nbbox:\t{self.bbox}\ncontent:\t{self.content}\n#################"
411
+ return _str
412
+
413
+ def __repr__(self) -> str:
414
+ _str = f"\n\n#################\nindex:\t{self.index}\nlabel:\t{self.label}\nregion_label:\t{self.order_label}\nbbox:\t{self.bbox}\ncontent:\t{self.content}\n#################"
415
+ return _str
416
+
417
+ def to_dict(self) -> dict:
418
+ return self.__dict__
419
+
420
+ def update_direction(self, direction=None) -> None:
421
+ """
422
+ Update the direction of the block based on its bounding box.
423
+
424
+ Args:
425
+ direction (str, optional): Direction of the block. If not provided, it will be determined automatically using the bounding box. Defaults to None.
426
+ """
427
+ if not direction:
428
+ direction = self.get_bbox_direction()
429
+ self.direction = direction
430
+ self.update_direction_info()
431
+
432
+ def update_direction_info(self) -> None:
433
+ """Update the direction information of the block based on its direction."""
434
+ if self.direction == "horizontal":
435
+ self.secondary_direction = "vertical"
436
+ self.short_side_length = self.height
437
+ self.long_side_length = self.width
438
+ self.start_coordinate = self.bbox[0]
439
+ self.end_coordinate = self.bbox[2]
440
+ self.secondary_direction_start_coordinate = self.bbox[1]
441
+ self.secondary_direction_end_coordinate = self.bbox[3]
442
+ else:
443
+ self.secondary_direction = "horizontal"
444
+ self.short_side_length = self.width
445
+ self.long_side_length = self.height
446
+ self.start_coordinate = self.bbox[1]
447
+ self.end_coordinate = self.bbox[3]
448
+ self.secondary_direction_start_coordinate = self.bbox[0]
449
+ self.secondary_direction_end_coordinate = self.bbox[2]
450
+
451
+ def append_child_block(self, child_block) -> None:
452
+ """
453
+ Append a child block to the current block.
454
+
455
+ Args:
456
+ child_block (LayoutBlock): Child block to be added.
457
+ Returns:
458
+ None
459
+ """
460
+ if not self.child_blocks:
461
+ self.ori_bbox = self.bbox.copy()
462
+ x1, y1, x2, y2 = self.bbox
463
+ x1_child, y1_child, x2_child, y2_child = child_block.bbox
464
+ union_bbox = (
465
+ min(x1, x1_child),
466
+ min(y1, y1_child),
467
+ max(x2, x2_child),
468
+ max(y2, y2_child),
469
+ )
470
+ self.bbox = union_bbox
471
+ self.update_direction_info()
472
+ child_blocks = [child_block]
473
+ if child_block.child_blocks:
474
+ child_blocks.extend(child_block.get_child_blocks())
475
+ self.child_blocks.extend(child_blocks)
476
+
477
+ def get_child_blocks(self) -> list:
478
+ """Get all child blocks of the current block."""
479
+ self.bbox = self.ori_bbox
480
+ child_blocks = self.child_blocks.copy()
481
+ self.child_blocks = []
482
+ return child_blocks
483
+
484
+ def get_centroid(self) -> tuple:
485
+ """Get the centroid of the bounding box of the block."""
486
+ x1, y1, x2, y2 = self.bbox
487
+ centroid = ((x1 + x2) / 2, (y1 + y2) / 2)
488
+ return centroid
489
+
490
+ def get_bbox_direction(self, direction_ratio: float = 1.0) -> str:
491
+ """
492
+ Determine if a bounding box is horizontal or vertical.
493
+
494
+ Args:
495
+ direction_ratio (float): Ratio for determining direction. Default is 1.0.
496
+
497
+ Returns:
498
+ str: "horizontal" or "vertical".
499
+ """
500
+ return (
501
+ "horizontal" if self.width * direction_ratio >= self.height else "vertical"
502
+ )
503
+
504
+ def calculate_text_line_direction(
505
+ self, bboxes: List[List[int]], direction_ratio: float = 1.5
506
+ ) -> bool:
507
+ """
508
+ Calculate the direction of the text based on the bounding boxes.
509
+
510
+ Args:
511
+ bboxes (list): A list of bounding boxes.
512
+ direction_ratio (float): Ratio for determining direction. Default is 1.5.
513
+
514
+ Returns:
515
+ str: "horizontal" or "vertical".
516
+ """
517
+
518
+ horizontal_box_num = 0
519
+ for bbox in bboxes:
520
+ if len(bbox) != 4:
521
+ raise ValueError(
522
+ "Invalid bounding box format. Expected a list of length 4."
523
+ )
524
+ x1, y1, x2, y2 = bbox
525
+ width = x2 - x1
526
+ height = y2 - y1
527
+ horizontal_box_num += 1 if width * direction_ratio >= height else 0
528
+
529
+ return "horizontal" if horizontal_box_num >= len(bboxes) * 0.5 else "vertical"
530
+
531
+ def group_boxes_into_lines(
532
+ self, ocr_rec_res, line_height_iou_threshold
533
+ ) -> List[TextLine]:
534
+ """
535
+ Group the bounding boxes into lines based on their direction.
536
+
537
+ Args:
538
+ ocr_rec_res (dict): The result of OCR recognition.
539
+ line_height_iou_threshold (float): The minimum IOU value required for two spans to belong to the same line.
540
+
541
+ Returns:
542
+ list: A list of TextLines.
543
+ """
544
+ rec_boxes = ocr_rec_res["boxes"]
545
+ rec_texts = ocr_rec_res["rec_texts"]
546
+ rec_labels = ocr_rec_res["rec_labels"]
547
+
548
+ text_boxes = [
549
+ rec_boxes[i] for i in range(len(rec_boxes)) if rec_labels[i] == "text"
550
+ ]
551
+ direction = self.calculate_text_line_direction(text_boxes)
552
+ self.update_direction(direction)
553
+
554
+ spans = [TextSpan(*span) for span in zip(rec_boxes, rec_texts, rec_labels)]
555
+
556
+ if not spans:
557
+ return []
558
+
559
+ # sort spans by direction
560
+ if self.direction == "vertical":
561
+ spans.sort(
562
+ key=lambda span: span.box[0], reverse=True
563
+ ) # sort by x coordinate
564
+ match_direction = "horizontal"
565
+ else:
566
+ spans.sort(
567
+ key=lambda span: span.box[1], reverse=False
568
+ ) # sort by y coordinate
569
+ match_direction = "vertical"
570
+
571
+ lines = []
572
+ current_line = TextLine([spans[0]], direction=self.direction)
573
+
574
+ for span in spans[1:]:
575
+ overlap_ratio = calculate_projection_overlap_ratio(
576
+ current_line.region_box, span.box, match_direction, mode="small"
577
+ )
578
+
579
+ if overlap_ratio >= line_height_iou_threshold:
580
+ current_line.add_span(span)
581
+ else:
582
+ lines.append(current_line)
583
+ current_line = TextLine([span], direction=self.direction)
584
+
585
+ lines.append(current_line)
586
+
587
+ if lines and self.direction == "vertical":
588
+ line_heights = np.array([line.height for line in lines])
589
+ min_height = np.min(line_heights)
590
+ max_height = np.max(line_heights)
591
+
592
+ # if height is too large, filter out the line
593
+ if max_height > min_height * 2:
594
+ normal_height_threshold = min_height * 1.1
595
+ normal_height_count = np.sum(line_heights < normal_height_threshold)
596
+
597
+ # if the number of lines with height less than the threshold is less than 40%, then filter out the line
598
+ if normal_height_count < len(lines) * 0.4:
599
+ keep_condition = line_heights <= normal_height_threshold
600
+ lines = [line for line, keep in zip(lines, keep_condition) if keep]
601
+
602
+ # calculate the average height of the text line
603
+ if lines:
604
+ line_heights = [line.height for line in lines]
605
+ line_widths = [line.width for line in lines]
606
+ self.text_line_height = np.mean(line_heights)
607
+ self.text_line_width = np.mean(line_widths)
608
+ else:
609
+ self.text_line_height = 0
610
+ self.text_line_width = 0
611
+
612
+ return lines
613
+
614
+ def update_text_content(
615
+ self,
616
+ image: list,
617
+ ocr_rec_res: dict,
618
+ text_rec_model: Any,
619
+ text_rec_score_thresh: Union[float, None] = None,
620
+ ) -> None:
621
+ """
622
+ Update the text content of the block based on the OCR result.
623
+
624
+ Args:
625
+ image (list): The input image.
626
+ ocr_rec_res (dict): The result of OCR recognition.
627
+ text_rec_model (Any): The model used for text recognition.
628
+ text_rec_score_thresh (Union[float, None]): The score threshold for text recognition. If None, use the default setting.
629
+
630
+ Returns:
631
+ None
632
+ """
633
+
634
+ if len(ocr_rec_res["rec_texts"]) == 0:
635
+ self.content = ""
636
+ return
637
+
638
+ lines = self.group_boxes_into_lines(
639
+ ocr_rec_res,
640
+ LINE_SETTINGS.get("line_height_iou_threshold", 0.8),
641
+ )
642
+
643
+ # words start coordinate and stop coordinate in the line
644
+ coord_start_idx = 0 if self.direction == "horizontal" else 1
645
+ coord_end_idx = coord_start_idx + 2
646
+
647
+ if self.label == "reference":
648
+ rec_boxes = ocr_rec_res["boxes"]
649
+ block_start = min([box[coord_start_idx] for box in rec_boxes])
650
+ block_stop = max([box[coord_end_idx] for box in rec_boxes])
651
+ else:
652
+ block_start = self.bbox[coord_start_idx]
653
+ block_stop = self.bbox[coord_end_idx]
654
+
655
+ text_lines = []
656
+ text_width_list = []
657
+ need_new_line_num = 0
658
+
659
+ for line_idx, line in enumerate(lines):
660
+ line: TextLine = line
661
+ text_width_list.append(line.width)
662
+ # get text from line
663
+ line_text = line.get_texts(
664
+ block_label=self.label,
665
+ block_text_width=max(text_width_list),
666
+ block_start_coordinate=block_start,
667
+ block_stop_coordinate=block_stop,
668
+ ori_image=image,
669
+ text_rec_model=text_rec_model,
670
+ text_rec_score_thresh=text_rec_score_thresh,
671
+ )
672
+
673
+ if line.need_new_line:
674
+ need_new_line_num += 1
675
+
676
+ # set segment start and end coordinate
677
+ if line_idx == 0:
678
+ self.seg_start_coordinate = line.spans[0].box[0]
679
+ elif line_idx == len(lines) - 1:
680
+ self.seg_end_coordinate = line.spans[-1].box[2]
681
+
682
+ text_lines.append(line_text)
683
+
684
+ delim = LINE_SETTINGS["delimiter_map"].get(self.label, "")
685
+
686
+ if delim == "":
687
+ content = ""
688
+ pre_line_end = False
689
+ last_char = ""
690
+ for idx, line_text in enumerate(text_lines):
691
+ if len(line_text) == 0:
692
+ continue
693
+
694
+ line: TextLine = lines[idx]
695
+ if pre_line_end:
696
+ start_gep_len = line.region_box[coord_start_idx] - block_start
697
+ if (
698
+ (
699
+ start_gep_len > line.height * 1.5
700
+ and not is_english_letter(last_char)
701
+ and not is_numeric(last_char)
702
+ )
703
+ or start_gep_len > (block_stop - block_start) * 0.4
704
+ ) and not content.endswith("\n"):
705
+ line_text = "\n" + line_text
706
+ content += f"{line_text}"
707
+
708
+ if len(line_text) > 2 and line_text.endswith(" "):
709
+ last_char = line_text[-2]
710
+ else:
711
+ last_char = line_text[-1]
712
+ if (
713
+ len(line_text) > 0
714
+ and not line_text.endswith("\n")
715
+ and not is_english_letter(last_char)
716
+ and not is_non_breaking_punctuation(last_char)
717
+ and not is_numeric(last_char)
718
+ and need_new_line_num > len(text_lines) * 0.5
719
+ ) or need_new_line_num > len(text_lines) * 0.6:
720
+ content += f"\n"
721
+ if (
722
+ block_stop - line.region_box[coord_end_idx]
723
+ > (block_stop - block_start) * 0.3
724
+ ):
725
+ pre_line_end = True
726
+ else:
727
+ content = delim.join(text_lines)
728
+
729
+ self.content = content
730
+ self.num_of_lines = len(text_lines)
731
+
732
+
733
+ class LayoutRegion(LayoutBlock):
734
+ """LayoutRegion class"""
735
+
736
+ def __init__(
737
+ self,
738
+ bbox,
739
+ blocks: List[LayoutBlock] = [],
740
+ ) -> None:
741
+ """
742
+ Initialize a LayoutRegion object.
743
+
744
+ Args:
745
+ bbox (List[int]): The bounding box of the region.
746
+ blocks (List[LayoutBlock]): A list of blocks that belong to this region.
747
+ """
748
+ super().__init__("region", bbox, content="")
749
+ self.bbox = bbox
750
+ self.block_map = {}
751
+ self.direction = "horizontal"
752
+ self.doc_title_block_idxes = []
753
+ self.paragraph_title_block_idxes = []
754
+ self.vision_block_idxes = []
755
+ self.unordered_block_idxes = []
756
+ self.vision_title_block_idxes = []
757
+ self.normal_text_block_idxes = []
758
+ self.euclidean_distance = float(np.inf)
759
+ self.header_block_idxes = []
760
+ self.footer_block_idxes = []
761
+ self.text_line_width = 20
762
+ self.text_line_height = 10
763
+ self.num_of_lines = 10
764
+ self.init_region_info_from_layout(blocks)
765
+ self.update_euclidean_distance()
766
+
767
+ def init_region_info_from_layout(self, blocks: List[LayoutBlock]) -> None:
768
+ """Initialize the information about the layout region from the given blocks.
769
+
770
+ Args:
771
+ blocks (List[LayoutBlock]): A list of blocks that belong to this region.
772
+ Returns:
773
+ None
774
+ """
775
+ horizontal_normal_text_block_num = 0
776
+ text_line_height_list = []
777
+ text_line_width_list = []
778
+ for idx, block in enumerate(blocks):
779
+ self.block_map[idx] = block
780
+ block.index = idx
781
+ if block.label in BLOCK_LABEL_MAP["header_labels"]:
782
+ self.header_block_idxes.append(idx)
783
+ elif block.label in BLOCK_LABEL_MAP["doc_title_labels"]:
784
+ self.doc_title_block_idxes.append(idx)
785
+ elif block.label in BLOCK_LABEL_MAP["paragraph_title_labels"]:
786
+ self.paragraph_title_block_idxes.append(idx)
787
+ elif block.label in BLOCK_LABEL_MAP["vision_labels"]:
788
+ self.vision_block_idxes.append(idx)
789
+ elif block.label in BLOCK_LABEL_MAP["vision_title_labels"]:
790
+ self.vision_title_block_idxes.append(idx)
791
+ elif block.label in BLOCK_LABEL_MAP["footer_labels"]:
792
+ self.footer_block_idxes.append(idx)
793
+ elif block.label in BLOCK_LABEL_MAP["unordered_labels"]:
794
+ self.unordered_block_idxes.append(idx)
795
+ else:
796
+ self.normal_text_block_idxes.append(idx)
797
+ text_line_height_list.append(block.text_line_height)
798
+ text_line_width_list.append(block.text_line_width)
799
+ if block.direction == "horizontal":
800
+ horizontal_normal_text_block_num += 1
801
+ direction = (
802
+ "horizontal"
803
+ if horizontal_normal_text_block_num
804
+ >= len(self.normal_text_block_idxes) * 0.5
805
+ else "vertical"
806
+ )
807
+ self.update_direction(direction)
808
+ self.text_line_width = (
809
+ np.mean(text_line_width_list) if text_line_width_list else 20
810
+ )
811
+ self.text_line_height = (
812
+ np.mean(text_line_height_list) if text_line_height_list else 10
813
+ )
814
+
815
+ def update_euclidean_distance(self):
816
+ """Update euclidean distance between each block and the reference point"""
817
+ blocks: List[LayoutBlock] = list(self.block_map.values())
818
+ if self.direction == "horizontal":
819
+ ref_point = (0, 0)
820
+ block_distance = [
821
+ caculate_euclidean_dist((block.bbox[0], block.bbox[1]), ref_point)
822
+ for block in blocks
823
+ ]
824
+ else:
825
+ ref_point = (self.bbox[2], 0)
826
+ block_distance = [
827
+ caculate_euclidean_dist((block.bbox[2], block.bbox[1]), ref_point)
828
+ for block in blocks
829
+ ]
830
+ self.euclidean_distance = min(block_distance) if len(block_distance) > 0 else 0
831
+
832
+ def update_direction(self, direction=None):
833
+ """
834
+ Update the direction of the layout region.
835
+
836
+ Args:
837
+ direction (str): The new direction of the layout region.
838
+ """
839
+ super().update_direction(direction=direction)
840
+ if self.direction == "horizontal":
841
+ self.direction_start_index = 0
842
+ self.direction_end_index = 2
843
+ self.secondary_direction_start_index = 1
844
+ self.secondary_direction_end_index = 3
845
+ self.secondary_direction = "vertical"
846
+ else:
847
+ self.direction_start_index = 1
848
+ self.direction_end_index = 3
849
+ self.secondary_direction_start_index = 0
850
+ self.secondary_direction_end_index = 2
851
+ self.secondary_direction = "horizontal"
852
+
853
+ self.direction_center_coordinate = (
854
+ self.bbox[self.direction_start_index] + self.bbox[self.direction_end_index]
855
+ ) / 2
856
+ self.secondary_direction_center_coordinate = (
857
+ self.bbox[self.secondary_direction_start_index]
858
+ + self.bbox[self.secondary_direction_end_index]
859
+ ) / 2