paddlex 3.0.0rc1__py3-none-any.whl → 3.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- paddlex/.version +1 -1
- paddlex/__init__.py +1 -1
- paddlex/configs/modules/chart_parsing/PP-Chart2Table.yaml +13 -0
- paddlex/configs/modules/doc_vlm/PP-DocBee2-3B.yaml +14 -0
- paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-L.yaml +40 -0
- paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-M.yaml +40 -0
- paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-S.yaml +40 -0
- paddlex/configs/modules/layout_detection/PP-DocBlockLayout.yaml +40 -0
- paddlex/configs/modules/layout_detection/PP-DocLayout-L.yaml +2 -2
- paddlex/configs/modules/layout_detection/PP-DocLayout-M.yaml +2 -2
- paddlex/configs/modules/layout_detection/PP-DocLayout-S.yaml +2 -2
- paddlex/configs/modules/layout_detection/PP-DocLayout_plus-L.yaml +40 -0
- paddlex/configs/modules/text_detection/PP-OCRv5_mobile_det.yaml +40 -0
- paddlex/configs/modules/text_detection/PP-OCRv5_server_det.yaml +40 -0
- paddlex/configs/modules/text_recognition/PP-OCRv5_mobile_rec.yaml +39 -0
- paddlex/configs/modules/text_recognition/PP-OCRv5_server_rec.yaml +39 -0
- paddlex/configs/modules/textline_orientation/PP-LCNet_x1_0_textline_ori.yaml +41 -0
- paddlex/configs/pipelines/OCR.yaml +7 -6
- paddlex/configs/pipelines/PP-ChatOCRv3-doc.yaml +3 -1
- paddlex/configs/pipelines/PP-ChatOCRv4-doc.yaml +91 -34
- paddlex/configs/pipelines/PP-StructureV3.yaml +72 -72
- paddlex/configs/pipelines/doc_understanding.yaml +1 -1
- paddlex/configs/pipelines/formula_recognition.yaml +2 -2
- paddlex/configs/pipelines/layout_parsing.yaml +3 -2
- paddlex/configs/pipelines/seal_recognition.yaml +1 -0
- paddlex/configs/pipelines/table_recognition.yaml +2 -1
- paddlex/configs/pipelines/table_recognition_v2.yaml +7 -1
- paddlex/hpip_links.html +20 -20
- paddlex/inference/common/batch_sampler/doc_vlm_batch_sampler.py +33 -10
- paddlex/inference/common/batch_sampler/image_batch_sampler.py +34 -25
- paddlex/inference/common/result/mixin.py +19 -12
- paddlex/inference/models/base/predictor/base_predictor.py +2 -8
- paddlex/inference/models/common/static_infer.py +29 -73
- paddlex/inference/models/common/tokenizer/__init__.py +2 -0
- paddlex/inference/models/common/tokenizer/clip_tokenizer.py +1 -1
- paddlex/inference/models/common/tokenizer/gpt_tokenizer.py +2 -2
- paddlex/inference/models/common/tokenizer/qwen2_5_tokenizer.py +112 -0
- paddlex/inference/models/common/tokenizer/qwen2_tokenizer.py +7 -1
- paddlex/inference/models/common/tokenizer/qwen_tokenizer.py +288 -0
- paddlex/inference/models/common/tokenizer/tokenizer_utils.py +13 -13
- paddlex/inference/models/common/tokenizer/tokenizer_utils_base.py +3 -3
- paddlex/inference/models/common/tokenizer/vocab.py +7 -7
- paddlex/inference/models/common/ts/funcs.py +19 -8
- paddlex/inference/models/common/vlm/conversion_utils.py +99 -0
- paddlex/inference/models/common/vlm/fusion_ops.py +205 -0
- paddlex/inference/models/common/vlm/generation/configuration_utils.py +1 -1
- paddlex/inference/models/common/vlm/generation/logits_process.py +1 -1
- paddlex/inference/models/common/vlm/generation/utils.py +1 -1
- paddlex/inference/models/common/vlm/transformers/configuration_utils.py +3 -3
- paddlex/inference/models/common/vlm/transformers/conversion_utils.py +3 -3
- paddlex/inference/models/common/vlm/transformers/model_outputs.py +2 -2
- paddlex/inference/models/common/vlm/transformers/model_utils.py +7 -31
- paddlex/inference/models/doc_vlm/modeling/GOT_ocr_2_0.py +830 -0
- paddlex/inference/models/doc_vlm/modeling/__init__.py +2 -0
- paddlex/inference/models/doc_vlm/modeling/qwen2.py +1606 -0
- paddlex/inference/models/doc_vlm/modeling/qwen2_5_vl.py +3006 -0
- paddlex/inference/models/doc_vlm/modeling/qwen2_vl.py +0 -105
- paddlex/inference/models/doc_vlm/predictor.py +79 -24
- paddlex/inference/models/doc_vlm/processors/GOT_ocr_2_0.py +97 -0
- paddlex/inference/models/doc_vlm/processors/__init__.py +2 -0
- paddlex/inference/models/doc_vlm/processors/common.py +189 -0
- paddlex/inference/models/doc_vlm/processors/qwen2_5_vl.py +548 -0
- paddlex/inference/models/doc_vlm/processors/qwen2_vl.py +21 -176
- paddlex/inference/models/formula_recognition/predictor.py +8 -2
- paddlex/inference/models/formula_recognition/processors.py +90 -77
- paddlex/inference/models/formula_recognition/result.py +28 -27
- paddlex/inference/models/image_feature/processors.py +3 -4
- paddlex/inference/models/keypoint_detection/predictor.py +3 -0
- paddlex/inference/models/object_detection/predictor.py +2 -0
- paddlex/inference/models/object_detection/processors.py +28 -3
- paddlex/inference/models/object_detection/utils.py +2 -0
- paddlex/inference/models/table_structure_recognition/result.py +0 -10
- paddlex/inference/models/text_detection/predictor.py +8 -0
- paddlex/inference/models/text_detection/processors.py +44 -10
- paddlex/inference/models/text_detection/result.py +0 -10
- paddlex/inference/models/text_recognition/result.py +1 -1
- paddlex/inference/pipelines/__init__.py +9 -5
- paddlex/inference/pipelines/_parallel.py +172 -0
- paddlex/inference/pipelines/anomaly_detection/pipeline.py +16 -6
- paddlex/inference/pipelines/attribute_recognition/pipeline.py +11 -1
- paddlex/inference/pipelines/base.py +14 -4
- paddlex/inference/pipelines/components/faisser.py +1 -1
- paddlex/inference/pipelines/doc_preprocessor/pipeline.py +53 -27
- paddlex/inference/pipelines/formula_recognition/pipeline.py +120 -82
- paddlex/inference/pipelines/formula_recognition/result.py +1 -11
- paddlex/inference/pipelines/image_classification/pipeline.py +16 -6
- paddlex/inference/pipelines/image_multilabel_classification/pipeline.py +16 -6
- paddlex/inference/pipelines/instance_segmentation/pipeline.py +16 -6
- paddlex/inference/pipelines/keypoint_detection/pipeline.py +16 -6
- paddlex/inference/pipelines/layout_parsing/layout_objects.py +859 -0
- paddlex/inference/pipelines/layout_parsing/pipeline.py +34 -47
- paddlex/inference/pipelines/layout_parsing/pipeline_v2.py +832 -260
- paddlex/inference/pipelines/layout_parsing/result.py +4 -17
- paddlex/inference/pipelines/layout_parsing/result_v2.py +259 -245
- paddlex/inference/pipelines/layout_parsing/setting.py +88 -0
- paddlex/inference/pipelines/layout_parsing/utils.py +391 -2028
- paddlex/inference/pipelines/layout_parsing/xycut_enhanced/__init__.py +16 -0
- paddlex/inference/pipelines/layout_parsing/xycut_enhanced/utils.py +1199 -0
- paddlex/inference/pipelines/layout_parsing/xycut_enhanced/xycuts.py +615 -0
- paddlex/inference/pipelines/m_3d_bev_detection/pipeline.py +2 -2
- paddlex/inference/pipelines/multilingual_speech_recognition/pipeline.py +2 -2
- paddlex/inference/pipelines/object_detection/pipeline.py +16 -6
- paddlex/inference/pipelines/ocr/pipeline.py +127 -70
- paddlex/inference/pipelines/ocr/result.py +21 -18
- paddlex/inference/pipelines/open_vocabulary_detection/pipeline.py +2 -2
- paddlex/inference/pipelines/open_vocabulary_segmentation/pipeline.py +2 -2
- paddlex/inference/pipelines/pp_chatocr/pipeline_base.py +2 -2
- paddlex/inference/pipelines/pp_chatocr/pipeline_v3.py +2 -5
- paddlex/inference/pipelines/pp_chatocr/pipeline_v4.py +6 -6
- paddlex/inference/pipelines/rotated_object_detection/pipeline.py +16 -6
- paddlex/inference/pipelines/seal_recognition/pipeline.py +109 -53
- paddlex/inference/pipelines/semantic_segmentation/pipeline.py +16 -6
- paddlex/inference/pipelines/small_object_detection/pipeline.py +16 -6
- paddlex/inference/pipelines/table_recognition/pipeline.py +26 -18
- paddlex/inference/pipelines/table_recognition/pipeline_v2.py +624 -53
- paddlex/inference/pipelines/table_recognition/result.py +1 -1
- paddlex/inference/pipelines/table_recognition/table_recognition_post_processing_v2.py +9 -5
- paddlex/inference/pipelines/ts_anomaly_detection/pipeline.py +2 -2
- paddlex/inference/pipelines/ts_classification/pipeline.py +2 -2
- paddlex/inference/pipelines/ts_forecasting/pipeline.py +2 -2
- paddlex/inference/pipelines/video_classification/pipeline.py +2 -2
- paddlex/inference/pipelines/video_detection/pipeline.py +2 -2
- paddlex/inference/serving/basic_serving/_app.py +46 -13
- paddlex/inference/serving/basic_serving/_pipeline_apps/_common/common.py +5 -1
- paddlex/inference/serving/basic_serving/_pipeline_apps/layout_parsing.py +0 -1
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv3_doc.py +0 -1
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv4_doc.py +1 -1
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_structurev3.py +6 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition.py +1 -5
- paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition_v2.py +4 -5
- paddlex/inference/serving/infra/utils.py +20 -22
- paddlex/inference/serving/schemas/formula_recognition.py +1 -1
- paddlex/inference/serving/schemas/layout_parsing.py +1 -2
- paddlex/inference/serving/schemas/pp_chatocrv3_doc.py +1 -2
- paddlex/inference/serving/schemas/pp_chatocrv4_doc.py +2 -2
- paddlex/inference/serving/schemas/pp_structurev3.py +10 -6
- paddlex/inference/serving/schemas/seal_recognition.py +1 -1
- paddlex/inference/serving/schemas/table_recognition.py +2 -6
- paddlex/inference/serving/schemas/table_recognition_v2.py +5 -6
- paddlex/inference/utils/hpi.py +30 -16
- paddlex/inference/utils/hpi_model_info_collection.json +666 -162
- paddlex/inference/utils/io/readers.py +12 -12
- paddlex/inference/utils/misc.py +20 -0
- paddlex/inference/utils/mkldnn_blocklist.py +59 -0
- paddlex/inference/utils/official_models.py +140 -5
- paddlex/inference/utils/pp_option.py +74 -9
- paddlex/model.py +2 -2
- paddlex/modules/__init__.py +1 -1
- paddlex/modules/anomaly_detection/evaluator.py +2 -2
- paddlex/modules/base/__init__.py +1 -1
- paddlex/modules/base/evaluator.py +5 -5
- paddlex/modules/base/trainer.py +1 -1
- paddlex/modules/doc_vlm/dataset_checker.py +2 -2
- paddlex/modules/doc_vlm/evaluator.py +2 -2
- paddlex/modules/doc_vlm/exportor.py +2 -2
- paddlex/modules/doc_vlm/model_list.py +1 -1
- paddlex/modules/doc_vlm/trainer.py +2 -2
- paddlex/modules/face_recognition/evaluator.py +2 -2
- paddlex/modules/formula_recognition/evaluator.py +5 -2
- paddlex/modules/formula_recognition/model_list.py +3 -0
- paddlex/modules/formula_recognition/trainer.py +3 -0
- paddlex/modules/general_recognition/evaluator.py +1 -1
- paddlex/modules/image_classification/evaluator.py +2 -2
- paddlex/modules/image_classification/model_list.py +1 -0
- paddlex/modules/instance_segmentation/evaluator.py +1 -1
- paddlex/modules/keypoint_detection/evaluator.py +1 -1
- paddlex/modules/m_3d_bev_detection/evaluator.py +2 -2
- paddlex/modules/multilabel_classification/evaluator.py +2 -2
- paddlex/modules/object_detection/dataset_checker/dataset_src/convert_dataset.py +4 -4
- paddlex/modules/object_detection/evaluator.py +2 -2
- paddlex/modules/object_detection/model_list.py +2 -0
- paddlex/modules/semantic_segmentation/dataset_checker/__init__.py +12 -2
- paddlex/modules/semantic_segmentation/evaluator.py +2 -2
- paddlex/modules/table_recognition/evaluator.py +2 -2
- paddlex/modules/text_detection/evaluator.py +2 -2
- paddlex/modules/text_detection/model_list.py +2 -0
- paddlex/modules/text_recognition/evaluator.py +2 -2
- paddlex/modules/text_recognition/model_list.py +2 -0
- paddlex/modules/ts_anomaly_detection/evaluator.py +2 -2
- paddlex/modules/ts_classification/dataset_checker/dataset_src/split_dataset.py +1 -1
- paddlex/modules/ts_classification/evaluator.py +2 -2
- paddlex/modules/ts_forecast/evaluator.py +2 -2
- paddlex/modules/video_classification/evaluator.py +2 -2
- paddlex/modules/video_detection/evaluator.py +2 -2
- paddlex/ops/__init__.py +8 -5
- paddlex/paddlex_cli.py +19 -13
- paddlex/repo_apis/Paddle3D_api/bev_fusion/model.py +2 -2
- paddlex/repo_apis/PaddleClas_api/cls/config.py +1 -1
- paddlex/repo_apis/PaddleClas_api/cls/model.py +1 -1
- paddlex/repo_apis/PaddleClas_api/cls/register.py +10 -0
- paddlex/repo_apis/PaddleClas_api/cls/runner.py +1 -1
- paddlex/repo_apis/PaddleDetection_api/instance_seg/model.py +1 -1
- paddlex/repo_apis/PaddleDetection_api/instance_seg/runner.py +1 -1
- paddlex/repo_apis/PaddleDetection_api/object_det/config.py +1 -1
- paddlex/repo_apis/PaddleDetection_api/object_det/model.py +1 -1
- paddlex/repo_apis/PaddleDetection_api/object_det/official_categories.py +25 -0
- paddlex/repo_apis/PaddleDetection_api/object_det/register.py +30 -0
- paddlex/repo_apis/PaddleDetection_api/object_det/runner.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/formula_rec/config.py +3 -3
- paddlex/repo_apis/PaddleOCR_api/formula_rec/model.py +5 -9
- paddlex/repo_apis/PaddleOCR_api/formula_rec/register.py +27 -0
- paddlex/repo_apis/PaddleOCR_api/formula_rec/runner.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/table_rec/model.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/table_rec/runner.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/text_det/model.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/text_det/register.py +18 -0
- paddlex/repo_apis/PaddleOCR_api/text_det/runner.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/text_rec/config.py +3 -3
- paddlex/repo_apis/PaddleOCR_api/text_rec/model.py +5 -9
- paddlex/repo_apis/PaddleOCR_api/text_rec/register.py +18 -0
- paddlex/repo_apis/PaddleOCR_api/text_rec/runner.py +1 -1
- paddlex/repo_apis/PaddleSeg_api/seg/model.py +1 -1
- paddlex/repo_apis/PaddleSeg_api/seg/runner.py +1 -1
- paddlex/repo_apis/PaddleTS_api/ts_ad/config.py +3 -3
- paddlex/repo_apis/PaddleTS_api/ts_cls/config.py +2 -2
- paddlex/repo_apis/PaddleTS_api/ts_fc/config.py +4 -4
- paddlex/repo_apis/PaddleVideo_api/video_cls/config.py +1 -1
- paddlex/repo_apis/PaddleVideo_api/video_cls/model.py +1 -1
- paddlex/repo_apis/PaddleVideo_api/video_cls/runner.py +1 -1
- paddlex/repo_apis/PaddleVideo_api/video_det/config.py +1 -1
- paddlex/repo_apis/PaddleVideo_api/video_det/model.py +1 -1
- paddlex/repo_apis/PaddleVideo_api/video_det/runner.py +1 -1
- paddlex/repo_apis/base/config.py +1 -1
- paddlex/repo_manager/core.py +3 -3
- paddlex/repo_manager/meta.py +6 -2
- paddlex/repo_manager/repo.py +17 -16
- paddlex/utils/custom_device_list.py +26 -2
- paddlex/utils/deps.py +3 -3
- paddlex/utils/device.py +5 -13
- paddlex/utils/env.py +4 -0
- paddlex/utils/flags.py +11 -4
- paddlex/utils/fonts/__init__.py +34 -4
- paddlex/utils/misc.py +1 -1
- paddlex/utils/subclass_register.py +2 -2
- {paddlex-3.0.0rc1.dist-info → paddlex-3.0.2.dist-info}/METADATA +349 -208
- {paddlex-3.0.0rc1.dist-info → paddlex-3.0.2.dist-info}/RECORD +240 -211
- {paddlex-3.0.0rc1.dist-info → paddlex-3.0.2.dist-info}/WHEEL +1 -1
- {paddlex-3.0.0rc1.dist-info → paddlex-3.0.2.dist-info}/entry_points.txt +1 -0
- {paddlex-3.0.0rc1.dist-info/licenses → paddlex-3.0.2.dist-info}/LICENSE +0 -0
- {paddlex-3.0.0rc1.dist-info → paddlex-3.0.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,859 @@
|
|
1
|
+
# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
from typing import Any, List, Union
|
15
|
+
|
16
|
+
import numpy as np
|
17
|
+
|
18
|
+
from .setting import BLOCK_LABEL_MAP, LINE_SETTINGS
|
19
|
+
from .utils import (
|
20
|
+
caculate_euclidean_dist,
|
21
|
+
calculate_projection_overlap_ratio,
|
22
|
+
is_english_letter,
|
23
|
+
is_non_breaking_punctuation,
|
24
|
+
is_numeric,
|
25
|
+
)
|
26
|
+
|
27
|
+
__all__ = [
|
28
|
+
"TextSpan",
|
29
|
+
"TextLine",
|
30
|
+
"LayoutBlock",
|
31
|
+
"LayoutRegion",
|
32
|
+
]
|
33
|
+
|
34
|
+
|
35
|
+
class TextSpan(object):
|
36
|
+
"""Text span class"""
|
37
|
+
|
38
|
+
def __init__(self, box, text, label):
|
39
|
+
"""
|
40
|
+
Initialize a TextSpan object.
|
41
|
+
|
42
|
+
Args:
|
43
|
+
box (list): The bounding box of the text span.
|
44
|
+
text (str): The text content of the text span.
|
45
|
+
label (int): The label of the text span.
|
46
|
+
"""
|
47
|
+
self.box = box
|
48
|
+
self.text = text
|
49
|
+
self.label = label
|
50
|
+
|
51
|
+
def __str__(self) -> str:
|
52
|
+
return f"{self.text}"
|
53
|
+
|
54
|
+
def __repr__(self) -> str:
|
55
|
+
return f"{self.text}"
|
56
|
+
|
57
|
+
|
58
|
+
class TextLine(object):
|
59
|
+
"""Text line class"""
|
60
|
+
|
61
|
+
def __init__(self, spans: List[TextSpan] = [], direction="horizontal"):
|
62
|
+
"""
|
63
|
+
Initialize a TextLine object.
|
64
|
+
|
65
|
+
Args:
|
66
|
+
spans (List[TextSpan]): A list of TextSpan objects. Defaults to [].
|
67
|
+
direction (str): The direction of the text line. Defaults to "horizontal".
|
68
|
+
"""
|
69
|
+
self.spans = spans
|
70
|
+
self.direction = direction
|
71
|
+
self.region_box = self.get_region_box()
|
72
|
+
self.need_new_line = False
|
73
|
+
|
74
|
+
@property
|
75
|
+
def labels(self):
|
76
|
+
return [span.label for span in self.spans]
|
77
|
+
|
78
|
+
@property
|
79
|
+
def boxes(self):
|
80
|
+
return [span.box for span in self.spans]
|
81
|
+
|
82
|
+
@property
|
83
|
+
def height(self):
|
84
|
+
start_idx = 1 if self.direction == "horizontal" else 0
|
85
|
+
end_idx = 3 if self.direction == "horizontal" else 2
|
86
|
+
return abs(self.region_box[end_idx] - self.region_box[start_idx])
|
87
|
+
|
88
|
+
@property
|
89
|
+
def width(self):
|
90
|
+
start_idx = 0 if self.direction == "horizontal" else 1
|
91
|
+
end_idx = 2 if self.direction == "horizontal" else 3
|
92
|
+
return abs(self.region_box[end_idx] - self.region_box[start_idx])
|
93
|
+
|
94
|
+
def __str__(self) -> str:
|
95
|
+
return f"{' '.join([str(span.text) for span in self.spans])}\n"
|
96
|
+
|
97
|
+
def __repr__(self) -> str:
|
98
|
+
return f"{' '.join([str(span.text) for span in self.spans])}\n"
|
99
|
+
|
100
|
+
def add_span(self, span: Union[TextSpan, List[TextSpan]]):
|
101
|
+
"""
|
102
|
+
Add a span to the text line.
|
103
|
+
|
104
|
+
Args:
|
105
|
+
span (Union[TextSpan, List[TextSpan]]): A single TextSpan object or a list of TextSpan objects.
|
106
|
+
"""
|
107
|
+
if isinstance(span, list):
|
108
|
+
self.spans.extend(span)
|
109
|
+
else:
|
110
|
+
self.spans.append(span)
|
111
|
+
self.region_box = self.get_region_box()
|
112
|
+
|
113
|
+
def get_region_box(self):
|
114
|
+
"""
|
115
|
+
Get the region box of the text line.
|
116
|
+
|
117
|
+
Returns:
|
118
|
+
list: The region box of the text line.
|
119
|
+
"""
|
120
|
+
if not self.spans:
|
121
|
+
return None # or an empty list, or however you want to handle no spans
|
122
|
+
|
123
|
+
# Initialize min and max values with the first span's box
|
124
|
+
x_min, y_min, x_max, y_max = self.spans[0].box
|
125
|
+
|
126
|
+
for span in self.spans:
|
127
|
+
x_min = min(x_min, span.box[0])
|
128
|
+
y_min = min(y_min, span.box[1])
|
129
|
+
x_max = max(x_max, span.box[2])
|
130
|
+
y_max = max(y_max, span.box[3])
|
131
|
+
|
132
|
+
return [x_min, y_min, x_max, y_max]
|
133
|
+
|
134
|
+
def get_texts(
|
135
|
+
self,
|
136
|
+
block_label: str,
|
137
|
+
block_text_width: int,
|
138
|
+
block_start_coordinate: int,
|
139
|
+
block_stop_coordinate: int,
|
140
|
+
ori_image,
|
141
|
+
text_rec_model=None,
|
142
|
+
text_rec_score_thresh=None,
|
143
|
+
):
|
144
|
+
"""
|
145
|
+
Get the text of the text line.
|
146
|
+
|
147
|
+
Args:
|
148
|
+
block_label (str): The label of the block.
|
149
|
+
block_text_width (int): The width of the block.
|
150
|
+
block_start_coordinate (int): The starting coordinate of the block.
|
151
|
+
block_stop_coordinate (int): The stopping coordinate of the block.
|
152
|
+
ori_image (np.ndarray): The original image.
|
153
|
+
text_rec_model (Any): The text recognition model.
|
154
|
+
text_rec_score_thresh (float): The text recognition score threshold.
|
155
|
+
|
156
|
+
Returns:
|
157
|
+
str: The text of the text line.
|
158
|
+
"""
|
159
|
+
span_box_start_index = 0 if self.direction == "horizontal" else 1
|
160
|
+
lines_start_index = 1 if self.direction == "horizontal" else 3
|
161
|
+
self.spans.sort(
|
162
|
+
key=lambda span: (
|
163
|
+
span.box[span_box_start_index] // 2,
|
164
|
+
(
|
165
|
+
span.box[lines_start_index]
|
166
|
+
if self.direction == "horizontal"
|
167
|
+
else -span.box[lines_start_index]
|
168
|
+
),
|
169
|
+
)
|
170
|
+
)
|
171
|
+
if "formula" in self.labels:
|
172
|
+
sort_index = 0 if self.direction == "horizontal" else 1
|
173
|
+
splited_spans = self.split_boxes_by_projection()
|
174
|
+
if len(self.spans) != len(splited_spans):
|
175
|
+
splited_spans.sort(key=lambda span: span.box[sort_index])
|
176
|
+
new_spans = []
|
177
|
+
for span in splited_spans:
|
178
|
+
bbox = span.box
|
179
|
+
if span.label == "text":
|
180
|
+
crop_img = ori_image[
|
181
|
+
int(bbox[1]) : int(bbox[3]),
|
182
|
+
int(bbox[0]) : int(bbox[2]),
|
183
|
+
]
|
184
|
+
crop_img_rec_res = next(text_rec_model([crop_img]))
|
185
|
+
crop_img_rec_score = crop_img_rec_res["rec_score"]
|
186
|
+
crop_img_rec_text = crop_img_rec_res["rec_text"]
|
187
|
+
span.text = crop_img_rec_text
|
188
|
+
if crop_img_rec_score < text_rec_score_thresh:
|
189
|
+
continue
|
190
|
+
new_spans.append(span)
|
191
|
+
self.spans = new_spans
|
192
|
+
line_text = self.format_line(
|
193
|
+
block_text_width,
|
194
|
+
block_start_coordinate,
|
195
|
+
block_stop_coordinate,
|
196
|
+
line_gap_limit=self.height * 1.5,
|
197
|
+
block_label=block_label,
|
198
|
+
)
|
199
|
+
return line_text
|
200
|
+
|
201
|
+
def is_projection_contained(self, box_a, box_b, start_idx, end_idx):
|
202
|
+
"""Check if box_a completely contains box_b in the x-direction."""
|
203
|
+
return box_a[start_idx] <= box_b[start_idx] and box_a[end_idx] >= box_b[end_idx]
|
204
|
+
|
205
|
+
def split_boxes_by_projection(self, offset=1e-5):
|
206
|
+
"""
|
207
|
+
Check if there is any complete containment in the x-direction
|
208
|
+
between the bounding boxes and split the containing box accordingly.
|
209
|
+
|
210
|
+
Args:
|
211
|
+
offset (float): A small offset value to ensure that the split boxes are not too close to the original boxes.
|
212
|
+
Returns:
|
213
|
+
A new list of boxes, including split boxes, with the same `rec_text` and `label` attributes.
|
214
|
+
"""
|
215
|
+
|
216
|
+
new_spans = []
|
217
|
+
if self.direction == "horizontal":
|
218
|
+
projection_start_index, projection_end_index = 0, 2
|
219
|
+
else:
|
220
|
+
projection_start_index, projection_end_index = 1, 3
|
221
|
+
|
222
|
+
for i in range(len(self.spans)):
|
223
|
+
span = self.spans[i]
|
224
|
+
is_split = False
|
225
|
+
for j in range(i, len(self.spans)):
|
226
|
+
box_b = self.spans[j].box
|
227
|
+
box_a, text, label = span.box, span.text, span.label
|
228
|
+
if self.is_projection_contained(
|
229
|
+
box_a, box_b, projection_start_index, projection_end_index
|
230
|
+
):
|
231
|
+
is_split = True
|
232
|
+
# Split box_a based on the x-coordinates of box_b
|
233
|
+
if box_a[projection_start_index] < box_b[projection_start_index]:
|
234
|
+
w = (
|
235
|
+
box_b[projection_start_index]
|
236
|
+
- offset
|
237
|
+
- box_a[projection_start_index]
|
238
|
+
)
|
239
|
+
if w > 1:
|
240
|
+
new_bbox = box_a.copy()
|
241
|
+
new_bbox[projection_end_index] = (
|
242
|
+
box_b[projection_start_index] - offset
|
243
|
+
)
|
244
|
+
new_spans.append(
|
245
|
+
TextSpan(
|
246
|
+
box=np.array(new_bbox),
|
247
|
+
text=text,
|
248
|
+
label=label,
|
249
|
+
)
|
250
|
+
)
|
251
|
+
if box_a[projection_end_index] > box_b[projection_end_index]:
|
252
|
+
w = (
|
253
|
+
box_a[projection_end_index]
|
254
|
+
- box_b[projection_end_index]
|
255
|
+
+ offset
|
256
|
+
)
|
257
|
+
if w > 1:
|
258
|
+
box_a[projection_start_index] = (
|
259
|
+
box_b[projection_end_index] + offset
|
260
|
+
)
|
261
|
+
span = TextSpan(
|
262
|
+
box=np.array(box_a),
|
263
|
+
text=text,
|
264
|
+
label=label,
|
265
|
+
)
|
266
|
+
if j == len(self.spans) - 1 and is_split:
|
267
|
+
new_spans.append(span)
|
268
|
+
if not is_split:
|
269
|
+
new_spans.append(span)
|
270
|
+
|
271
|
+
return new_spans
|
272
|
+
|
273
|
+
def format_line(
|
274
|
+
self,
|
275
|
+
block_text_width: int,
|
276
|
+
block_start_coordinate: int,
|
277
|
+
block_stop_coordinate: int,
|
278
|
+
line_gap_limit: int = 10,
|
279
|
+
block_label: str = "text",
|
280
|
+
) -> str:
|
281
|
+
"""
|
282
|
+
Format a line of text spans based on layout constraints.
|
283
|
+
|
284
|
+
Args:
|
285
|
+
block_text_width (int): The width of the block.
|
286
|
+
block_start_coordinate (int): The starting coordinate of the block.
|
287
|
+
block_stop_coordinate (int): The stopping coordinate of the block.
|
288
|
+
line_gap_limit (int): The limit for the number of pixels after the last span that should be considered part of the last line. Default is 10.
|
289
|
+
block_label (str): The label associated with the entire block. Default is 'text'.
|
290
|
+
Returns:
|
291
|
+
str: Formatted line of text.
|
292
|
+
"""
|
293
|
+
first_span_box = self.spans[0].box
|
294
|
+
last_span_box = self.spans[-1].box
|
295
|
+
|
296
|
+
line_text = ""
|
297
|
+
for span in self.spans:
|
298
|
+
if span.label == "formula" and block_label != "formula":
|
299
|
+
formula_rec = span.text
|
300
|
+
if not formula_rec.startswith("$") and not formula_rec.endswith("$"):
|
301
|
+
if len(self.spans) > 1:
|
302
|
+
span.text = f"${span.text}$"
|
303
|
+
else:
|
304
|
+
span.text = f"\n${span.text}$"
|
305
|
+
line_text += span.text
|
306
|
+
if (
|
307
|
+
len(span.text) > 0
|
308
|
+
and is_english_letter(line_text[-1])
|
309
|
+
or span.label == "formula"
|
310
|
+
):
|
311
|
+
line_text += " "
|
312
|
+
|
313
|
+
if self.direction == "horizontal":
|
314
|
+
text_stop_index = 2
|
315
|
+
else:
|
316
|
+
text_stop_index = 3
|
317
|
+
|
318
|
+
if line_text.endswith(" "):
|
319
|
+
line_text = line_text[:-1]
|
320
|
+
|
321
|
+
if len(line_text) == 0:
|
322
|
+
return ""
|
323
|
+
|
324
|
+
last_char = line_text[-1]
|
325
|
+
|
326
|
+
if (
|
327
|
+
not is_english_letter(last_char)
|
328
|
+
and not is_non_breaking_punctuation(last_char)
|
329
|
+
and not is_numeric(last_char)
|
330
|
+
) or (
|
331
|
+
block_stop_coordinate - last_span_box[text_stop_index]
|
332
|
+
> block_text_width * 0.3
|
333
|
+
):
|
334
|
+
if (
|
335
|
+
self.direction == "horizontal"
|
336
|
+
and block_stop_coordinate - last_span_box[text_stop_index]
|
337
|
+
> line_gap_limit
|
338
|
+
) or (
|
339
|
+
self.direction == "vertical"
|
340
|
+
and (
|
341
|
+
block_stop_coordinate - last_span_box[text_stop_index]
|
342
|
+
> line_gap_limit
|
343
|
+
or first_span_box[1] - block_start_coordinate > line_gap_limit
|
344
|
+
)
|
345
|
+
):
|
346
|
+
self.need_new_line = True
|
347
|
+
|
348
|
+
if line_text.endswith("-"):
|
349
|
+
line_text = line_text[:-1]
|
350
|
+
return line_text
|
351
|
+
|
352
|
+
if (len(line_text) > 0 and is_english_letter(last_char)) or line_text.endswith(
|
353
|
+
"$"
|
354
|
+
):
|
355
|
+
line_text += " "
|
356
|
+
if (
|
357
|
+
len(line_text) > 0
|
358
|
+
and not is_english_letter(last_char)
|
359
|
+
and not is_numeric(last_char)
|
360
|
+
) or self.direction == "vertical":
|
361
|
+
if (
|
362
|
+
block_stop_coordinate - last_span_box[text_stop_index]
|
363
|
+
> block_text_width * 0.3
|
364
|
+
and len(line_text) > 0
|
365
|
+
and not is_non_breaking_punctuation(last_char)
|
366
|
+
):
|
367
|
+
line_text += "\n"
|
368
|
+
self.need_new_line = True
|
369
|
+
elif (
|
370
|
+
block_stop_coordinate - last_span_box[text_stop_index]
|
371
|
+
> (block_stop_coordinate - block_start_coordinate) * 0.5
|
372
|
+
):
|
373
|
+
line_text += "\n"
|
374
|
+
self.need_new_line = True
|
375
|
+
|
376
|
+
return line_text
|
377
|
+
|
378
|
+
|
379
|
+
class LayoutBlock(object):
|
380
|
+
"""Layout Block Class"""
|
381
|
+
|
382
|
+
def __init__(self, label, bbox, content="") -> None:
|
383
|
+
"""
|
384
|
+
Initialize a LayoutBlock object.
|
385
|
+
|
386
|
+
Args:
|
387
|
+
label (str): Label assigned to the block.
|
388
|
+
bbox (list): Bounding box coordinates of the block.
|
389
|
+
content (str, optional): Content of the block. Defaults to an empty string.
|
390
|
+
"""
|
391
|
+
self.label = label
|
392
|
+
self.order_label = None
|
393
|
+
self.bbox = list(map(int, bbox))
|
394
|
+
self.content = content
|
395
|
+
self.seg_start_coordinate = float("inf")
|
396
|
+
self.seg_end_coordinate = float("-inf")
|
397
|
+
self.width = bbox[2] - bbox[0]
|
398
|
+
self.height = bbox[3] - bbox[1]
|
399
|
+
self.area = float(self.width) * float(self.height)
|
400
|
+
self.num_of_lines = 1
|
401
|
+
self.image = None
|
402
|
+
self.index = None
|
403
|
+
self.order_index = None
|
404
|
+
self.text_line_width = 1
|
405
|
+
self.text_line_height = 1
|
406
|
+
self.child_blocks = []
|
407
|
+
self.update_direction()
|
408
|
+
|
409
|
+
def __str__(self) -> str:
|
410
|
+
_str = f"\n\n#################\nindex:\t{self.index}\nlabel:\t{self.label}\nregion_label:\t{self.order_label}\nbbox:\t{self.bbox}\ncontent:\t{self.content}\n#################"
|
411
|
+
return _str
|
412
|
+
|
413
|
+
def __repr__(self) -> str:
|
414
|
+
_str = f"\n\n#################\nindex:\t{self.index}\nlabel:\t{self.label}\nregion_label:\t{self.order_label}\nbbox:\t{self.bbox}\ncontent:\t{self.content}\n#################"
|
415
|
+
return _str
|
416
|
+
|
417
|
+
def to_dict(self) -> dict:
|
418
|
+
return self.__dict__
|
419
|
+
|
420
|
+
def update_direction(self, direction=None) -> None:
|
421
|
+
"""
|
422
|
+
Update the direction of the block based on its bounding box.
|
423
|
+
|
424
|
+
Args:
|
425
|
+
direction (str, optional): Direction of the block. If not provided, it will be determined automatically using the bounding box. Defaults to None.
|
426
|
+
"""
|
427
|
+
if not direction:
|
428
|
+
direction = self.get_bbox_direction()
|
429
|
+
self.direction = direction
|
430
|
+
self.update_direction_info()
|
431
|
+
|
432
|
+
def update_direction_info(self) -> None:
|
433
|
+
"""Update the direction information of the block based on its direction."""
|
434
|
+
if self.direction == "horizontal":
|
435
|
+
self.secondary_direction = "vertical"
|
436
|
+
self.short_side_length = self.height
|
437
|
+
self.long_side_length = self.width
|
438
|
+
self.start_coordinate = self.bbox[0]
|
439
|
+
self.end_coordinate = self.bbox[2]
|
440
|
+
self.secondary_direction_start_coordinate = self.bbox[1]
|
441
|
+
self.secondary_direction_end_coordinate = self.bbox[3]
|
442
|
+
else:
|
443
|
+
self.secondary_direction = "horizontal"
|
444
|
+
self.short_side_length = self.width
|
445
|
+
self.long_side_length = self.height
|
446
|
+
self.start_coordinate = self.bbox[1]
|
447
|
+
self.end_coordinate = self.bbox[3]
|
448
|
+
self.secondary_direction_start_coordinate = self.bbox[0]
|
449
|
+
self.secondary_direction_end_coordinate = self.bbox[2]
|
450
|
+
|
451
|
+
def append_child_block(self, child_block) -> None:
|
452
|
+
"""
|
453
|
+
Append a child block to the current block.
|
454
|
+
|
455
|
+
Args:
|
456
|
+
child_block (LayoutBlock): Child block to be added.
|
457
|
+
Returns:
|
458
|
+
None
|
459
|
+
"""
|
460
|
+
if not self.child_blocks:
|
461
|
+
self.ori_bbox = self.bbox.copy()
|
462
|
+
x1, y1, x2, y2 = self.bbox
|
463
|
+
x1_child, y1_child, x2_child, y2_child = child_block.bbox
|
464
|
+
union_bbox = (
|
465
|
+
min(x1, x1_child),
|
466
|
+
min(y1, y1_child),
|
467
|
+
max(x2, x2_child),
|
468
|
+
max(y2, y2_child),
|
469
|
+
)
|
470
|
+
self.bbox = union_bbox
|
471
|
+
self.update_direction_info()
|
472
|
+
child_blocks = [child_block]
|
473
|
+
if child_block.child_blocks:
|
474
|
+
child_blocks.extend(child_block.get_child_blocks())
|
475
|
+
self.child_blocks.extend(child_blocks)
|
476
|
+
|
477
|
+
def get_child_blocks(self) -> list:
|
478
|
+
"""Get all child blocks of the current block."""
|
479
|
+
self.bbox = self.ori_bbox
|
480
|
+
child_blocks = self.child_blocks.copy()
|
481
|
+
self.child_blocks = []
|
482
|
+
return child_blocks
|
483
|
+
|
484
|
+
def get_centroid(self) -> tuple:
|
485
|
+
"""Get the centroid of the bounding box of the block."""
|
486
|
+
x1, y1, x2, y2 = self.bbox
|
487
|
+
centroid = ((x1 + x2) / 2, (y1 + y2) / 2)
|
488
|
+
return centroid
|
489
|
+
|
490
|
+
def get_bbox_direction(self, direction_ratio: float = 1.0) -> str:
|
491
|
+
"""
|
492
|
+
Determine if a bounding box is horizontal or vertical.
|
493
|
+
|
494
|
+
Args:
|
495
|
+
direction_ratio (float): Ratio for determining direction. Default is 1.0.
|
496
|
+
|
497
|
+
Returns:
|
498
|
+
str: "horizontal" or "vertical".
|
499
|
+
"""
|
500
|
+
return (
|
501
|
+
"horizontal" if self.width * direction_ratio >= self.height else "vertical"
|
502
|
+
)
|
503
|
+
|
504
|
+
def calculate_text_line_direction(
|
505
|
+
self, bboxes: List[List[int]], direction_ratio: float = 1.5
|
506
|
+
) -> bool:
|
507
|
+
"""
|
508
|
+
Calculate the direction of the text based on the bounding boxes.
|
509
|
+
|
510
|
+
Args:
|
511
|
+
bboxes (list): A list of bounding boxes.
|
512
|
+
direction_ratio (float): Ratio for determining direction. Default is 1.5.
|
513
|
+
|
514
|
+
Returns:
|
515
|
+
str: "horizontal" or "vertical".
|
516
|
+
"""
|
517
|
+
|
518
|
+
horizontal_box_num = 0
|
519
|
+
for bbox in bboxes:
|
520
|
+
if len(bbox) != 4:
|
521
|
+
raise ValueError(
|
522
|
+
"Invalid bounding box format. Expected a list of length 4."
|
523
|
+
)
|
524
|
+
x1, y1, x2, y2 = bbox
|
525
|
+
width = x2 - x1
|
526
|
+
height = y2 - y1
|
527
|
+
horizontal_box_num += 1 if width * direction_ratio >= height else 0
|
528
|
+
|
529
|
+
return "horizontal" if horizontal_box_num >= len(bboxes) * 0.5 else "vertical"
|
530
|
+
|
531
|
+
def group_boxes_into_lines(
|
532
|
+
self, ocr_rec_res, line_height_iou_threshold
|
533
|
+
) -> List[TextLine]:
|
534
|
+
"""
|
535
|
+
Group the bounding boxes into lines based on their direction.
|
536
|
+
|
537
|
+
Args:
|
538
|
+
ocr_rec_res (dict): The result of OCR recognition.
|
539
|
+
line_height_iou_threshold (float): The minimum IOU value required for two spans to belong to the same line.
|
540
|
+
|
541
|
+
Returns:
|
542
|
+
list: A list of TextLines.
|
543
|
+
"""
|
544
|
+
rec_boxes = ocr_rec_res["boxes"]
|
545
|
+
rec_texts = ocr_rec_res["rec_texts"]
|
546
|
+
rec_labels = ocr_rec_res["rec_labels"]
|
547
|
+
|
548
|
+
text_boxes = [
|
549
|
+
rec_boxes[i] for i in range(len(rec_boxes)) if rec_labels[i] == "text"
|
550
|
+
]
|
551
|
+
direction = self.calculate_text_line_direction(text_boxes)
|
552
|
+
self.update_direction(direction)
|
553
|
+
|
554
|
+
spans = [TextSpan(*span) for span in zip(rec_boxes, rec_texts, rec_labels)]
|
555
|
+
|
556
|
+
if not spans:
|
557
|
+
return []
|
558
|
+
|
559
|
+
# sort spans by direction
|
560
|
+
if self.direction == "vertical":
|
561
|
+
spans.sort(
|
562
|
+
key=lambda span: span.box[0], reverse=True
|
563
|
+
) # sort by x coordinate
|
564
|
+
match_direction = "horizontal"
|
565
|
+
else:
|
566
|
+
spans.sort(
|
567
|
+
key=lambda span: span.box[1], reverse=False
|
568
|
+
) # sort by y coordinate
|
569
|
+
match_direction = "vertical"
|
570
|
+
|
571
|
+
lines = []
|
572
|
+
current_line = TextLine([spans[0]], direction=self.direction)
|
573
|
+
|
574
|
+
for span in spans[1:]:
|
575
|
+
overlap_ratio = calculate_projection_overlap_ratio(
|
576
|
+
current_line.region_box, span.box, match_direction, mode="small"
|
577
|
+
)
|
578
|
+
|
579
|
+
if overlap_ratio >= line_height_iou_threshold:
|
580
|
+
current_line.add_span(span)
|
581
|
+
else:
|
582
|
+
lines.append(current_line)
|
583
|
+
current_line = TextLine([span], direction=self.direction)
|
584
|
+
|
585
|
+
lines.append(current_line)
|
586
|
+
|
587
|
+
if lines and self.direction == "vertical":
|
588
|
+
line_heights = np.array([line.height for line in lines])
|
589
|
+
min_height = np.min(line_heights)
|
590
|
+
max_height = np.max(line_heights)
|
591
|
+
|
592
|
+
# if height is too large, filter out the line
|
593
|
+
if max_height > min_height * 2:
|
594
|
+
normal_height_threshold = min_height * 1.1
|
595
|
+
normal_height_count = np.sum(line_heights < normal_height_threshold)
|
596
|
+
|
597
|
+
# if the number of lines with height less than the threshold is less than 40%, then filter out the line
|
598
|
+
if normal_height_count < len(lines) * 0.4:
|
599
|
+
keep_condition = line_heights <= normal_height_threshold
|
600
|
+
lines = [line for line, keep in zip(lines, keep_condition) if keep]
|
601
|
+
|
602
|
+
# calculate the average height of the text line
|
603
|
+
if lines:
|
604
|
+
line_heights = [line.height for line in lines]
|
605
|
+
line_widths = [line.width for line in lines]
|
606
|
+
self.text_line_height = np.mean(line_heights)
|
607
|
+
self.text_line_width = np.mean(line_widths)
|
608
|
+
else:
|
609
|
+
self.text_line_height = 0
|
610
|
+
self.text_line_width = 0
|
611
|
+
|
612
|
+
return lines
|
613
|
+
|
614
|
+
def update_text_content(
|
615
|
+
self,
|
616
|
+
image: list,
|
617
|
+
ocr_rec_res: dict,
|
618
|
+
text_rec_model: Any,
|
619
|
+
text_rec_score_thresh: Union[float, None] = None,
|
620
|
+
) -> None:
|
621
|
+
"""
|
622
|
+
Update the text content of the block based on the OCR result.
|
623
|
+
|
624
|
+
Args:
|
625
|
+
image (list): The input image.
|
626
|
+
ocr_rec_res (dict): The result of OCR recognition.
|
627
|
+
text_rec_model (Any): The model used for text recognition.
|
628
|
+
text_rec_score_thresh (Union[float, None]): The score threshold for text recognition. If None, use the default setting.
|
629
|
+
|
630
|
+
Returns:
|
631
|
+
None
|
632
|
+
"""
|
633
|
+
|
634
|
+
if len(ocr_rec_res["rec_texts"]) == 0:
|
635
|
+
self.content = ""
|
636
|
+
return
|
637
|
+
|
638
|
+
lines = self.group_boxes_into_lines(
|
639
|
+
ocr_rec_res,
|
640
|
+
LINE_SETTINGS.get("line_height_iou_threshold", 0.8),
|
641
|
+
)
|
642
|
+
|
643
|
+
# words start coordinate and stop coordinate in the line
|
644
|
+
coord_start_idx = 0 if self.direction == "horizontal" else 1
|
645
|
+
coord_end_idx = coord_start_idx + 2
|
646
|
+
|
647
|
+
if self.label == "reference":
|
648
|
+
rec_boxes = ocr_rec_res["boxes"]
|
649
|
+
block_start = min([box[coord_start_idx] for box in rec_boxes])
|
650
|
+
block_stop = max([box[coord_end_idx] for box in rec_boxes])
|
651
|
+
else:
|
652
|
+
block_start = self.bbox[coord_start_idx]
|
653
|
+
block_stop = self.bbox[coord_end_idx]
|
654
|
+
|
655
|
+
text_lines = []
|
656
|
+
text_width_list = []
|
657
|
+
need_new_line_num = 0
|
658
|
+
|
659
|
+
for line_idx, line in enumerate(lines):
|
660
|
+
line: TextLine = line
|
661
|
+
text_width_list.append(line.width)
|
662
|
+
# get text from line
|
663
|
+
line_text = line.get_texts(
|
664
|
+
block_label=self.label,
|
665
|
+
block_text_width=max(text_width_list),
|
666
|
+
block_start_coordinate=block_start,
|
667
|
+
block_stop_coordinate=block_stop,
|
668
|
+
ori_image=image,
|
669
|
+
text_rec_model=text_rec_model,
|
670
|
+
text_rec_score_thresh=text_rec_score_thresh,
|
671
|
+
)
|
672
|
+
|
673
|
+
if line.need_new_line:
|
674
|
+
need_new_line_num += 1
|
675
|
+
|
676
|
+
# set segment start and end coordinate
|
677
|
+
if line_idx == 0:
|
678
|
+
self.seg_start_coordinate = line.spans[0].box[0]
|
679
|
+
elif line_idx == len(lines) - 1:
|
680
|
+
self.seg_end_coordinate = line.spans[-1].box[2]
|
681
|
+
|
682
|
+
text_lines.append(line_text)
|
683
|
+
|
684
|
+
delim = LINE_SETTINGS["delimiter_map"].get(self.label, "")
|
685
|
+
|
686
|
+
if delim == "":
|
687
|
+
content = ""
|
688
|
+
pre_line_end = False
|
689
|
+
last_char = ""
|
690
|
+
for idx, line_text in enumerate(text_lines):
|
691
|
+
if len(line_text) == 0:
|
692
|
+
continue
|
693
|
+
|
694
|
+
line: TextLine = lines[idx]
|
695
|
+
if pre_line_end:
|
696
|
+
start_gep_len = line.region_box[coord_start_idx] - block_start
|
697
|
+
if (
|
698
|
+
(
|
699
|
+
start_gep_len > line.height * 1.5
|
700
|
+
and not is_english_letter(last_char)
|
701
|
+
and not is_numeric(last_char)
|
702
|
+
)
|
703
|
+
or start_gep_len > (block_stop - block_start) * 0.4
|
704
|
+
) and not content.endswith("\n"):
|
705
|
+
line_text = "\n" + line_text
|
706
|
+
content += f"{line_text}"
|
707
|
+
|
708
|
+
if len(line_text) > 2 and line_text.endswith(" "):
|
709
|
+
last_char = line_text[-2]
|
710
|
+
else:
|
711
|
+
last_char = line_text[-1]
|
712
|
+
if (
|
713
|
+
len(line_text) > 0
|
714
|
+
and not line_text.endswith("\n")
|
715
|
+
and not is_english_letter(last_char)
|
716
|
+
and not is_non_breaking_punctuation(last_char)
|
717
|
+
and not is_numeric(last_char)
|
718
|
+
and need_new_line_num > len(text_lines) * 0.5
|
719
|
+
) or need_new_line_num > len(text_lines) * 0.6:
|
720
|
+
content += f"\n"
|
721
|
+
if (
|
722
|
+
block_stop - line.region_box[coord_end_idx]
|
723
|
+
> (block_stop - block_start) * 0.3
|
724
|
+
):
|
725
|
+
pre_line_end = True
|
726
|
+
else:
|
727
|
+
content = delim.join(text_lines)
|
728
|
+
|
729
|
+
self.content = content
|
730
|
+
self.num_of_lines = len(text_lines)
|
731
|
+
|
732
|
+
|
733
|
+
class LayoutRegion(LayoutBlock):
|
734
|
+
"""LayoutRegion class"""
|
735
|
+
|
736
|
+
def __init__(
|
737
|
+
self,
|
738
|
+
bbox,
|
739
|
+
blocks: List[LayoutBlock] = [],
|
740
|
+
) -> None:
|
741
|
+
"""
|
742
|
+
Initialize a LayoutRegion object.
|
743
|
+
|
744
|
+
Args:
|
745
|
+
bbox (List[int]): The bounding box of the region.
|
746
|
+
blocks (List[LayoutBlock]): A list of blocks that belong to this region.
|
747
|
+
"""
|
748
|
+
super().__init__("region", bbox, content="")
|
749
|
+
self.bbox = bbox
|
750
|
+
self.block_map = {}
|
751
|
+
self.direction = "horizontal"
|
752
|
+
self.doc_title_block_idxes = []
|
753
|
+
self.paragraph_title_block_idxes = []
|
754
|
+
self.vision_block_idxes = []
|
755
|
+
self.unordered_block_idxes = []
|
756
|
+
self.vision_title_block_idxes = []
|
757
|
+
self.normal_text_block_idxes = []
|
758
|
+
self.euclidean_distance = float(np.inf)
|
759
|
+
self.header_block_idxes = []
|
760
|
+
self.footer_block_idxes = []
|
761
|
+
self.text_line_width = 20
|
762
|
+
self.text_line_height = 10
|
763
|
+
self.num_of_lines = 10
|
764
|
+
self.init_region_info_from_layout(blocks)
|
765
|
+
self.update_euclidean_distance()
|
766
|
+
|
767
|
+
def init_region_info_from_layout(self, blocks: List[LayoutBlock]) -> None:
|
768
|
+
"""Initialize the information about the layout region from the given blocks.
|
769
|
+
|
770
|
+
Args:
|
771
|
+
blocks (List[LayoutBlock]): A list of blocks that belong to this region.
|
772
|
+
Returns:
|
773
|
+
None
|
774
|
+
"""
|
775
|
+
horizontal_normal_text_block_num = 0
|
776
|
+
text_line_height_list = []
|
777
|
+
text_line_width_list = []
|
778
|
+
for idx, block in enumerate(blocks):
|
779
|
+
self.block_map[idx] = block
|
780
|
+
block.index = idx
|
781
|
+
if block.label in BLOCK_LABEL_MAP["header_labels"]:
|
782
|
+
self.header_block_idxes.append(idx)
|
783
|
+
elif block.label in BLOCK_LABEL_MAP["doc_title_labels"]:
|
784
|
+
self.doc_title_block_idxes.append(idx)
|
785
|
+
elif block.label in BLOCK_LABEL_MAP["paragraph_title_labels"]:
|
786
|
+
self.paragraph_title_block_idxes.append(idx)
|
787
|
+
elif block.label in BLOCK_LABEL_MAP["vision_labels"]:
|
788
|
+
self.vision_block_idxes.append(idx)
|
789
|
+
elif block.label in BLOCK_LABEL_MAP["vision_title_labels"]:
|
790
|
+
self.vision_title_block_idxes.append(idx)
|
791
|
+
elif block.label in BLOCK_LABEL_MAP["footer_labels"]:
|
792
|
+
self.footer_block_idxes.append(idx)
|
793
|
+
elif block.label in BLOCK_LABEL_MAP["unordered_labels"]:
|
794
|
+
self.unordered_block_idxes.append(idx)
|
795
|
+
else:
|
796
|
+
self.normal_text_block_idxes.append(idx)
|
797
|
+
text_line_height_list.append(block.text_line_height)
|
798
|
+
text_line_width_list.append(block.text_line_width)
|
799
|
+
if block.direction == "horizontal":
|
800
|
+
horizontal_normal_text_block_num += 1
|
801
|
+
direction = (
|
802
|
+
"horizontal"
|
803
|
+
if horizontal_normal_text_block_num
|
804
|
+
>= len(self.normal_text_block_idxes) * 0.5
|
805
|
+
else "vertical"
|
806
|
+
)
|
807
|
+
self.update_direction(direction)
|
808
|
+
self.text_line_width = (
|
809
|
+
np.mean(text_line_width_list) if text_line_width_list else 20
|
810
|
+
)
|
811
|
+
self.text_line_height = (
|
812
|
+
np.mean(text_line_height_list) if text_line_height_list else 10
|
813
|
+
)
|
814
|
+
|
815
|
+
def update_euclidean_distance(self):
|
816
|
+
"""Update euclidean distance between each block and the reference point"""
|
817
|
+
blocks: List[LayoutBlock] = list(self.block_map.values())
|
818
|
+
if self.direction == "horizontal":
|
819
|
+
ref_point = (0, 0)
|
820
|
+
block_distance = [
|
821
|
+
caculate_euclidean_dist((block.bbox[0], block.bbox[1]), ref_point)
|
822
|
+
for block in blocks
|
823
|
+
]
|
824
|
+
else:
|
825
|
+
ref_point = (self.bbox[2], 0)
|
826
|
+
block_distance = [
|
827
|
+
caculate_euclidean_dist((block.bbox[2], block.bbox[1]), ref_point)
|
828
|
+
for block in blocks
|
829
|
+
]
|
830
|
+
self.euclidean_distance = min(block_distance) if len(block_distance) > 0 else 0
|
831
|
+
|
832
|
+
def update_direction(self, direction=None):
|
833
|
+
"""
|
834
|
+
Update the direction of the layout region.
|
835
|
+
|
836
|
+
Args:
|
837
|
+
direction (str): The new direction of the layout region.
|
838
|
+
"""
|
839
|
+
super().update_direction(direction=direction)
|
840
|
+
if self.direction == "horizontal":
|
841
|
+
self.direction_start_index = 0
|
842
|
+
self.direction_end_index = 2
|
843
|
+
self.secondary_direction_start_index = 1
|
844
|
+
self.secondary_direction_end_index = 3
|
845
|
+
self.secondary_direction = "vertical"
|
846
|
+
else:
|
847
|
+
self.direction_start_index = 1
|
848
|
+
self.direction_end_index = 3
|
849
|
+
self.secondary_direction_start_index = 0
|
850
|
+
self.secondary_direction_end_index = 2
|
851
|
+
self.secondary_direction = "horizontal"
|
852
|
+
|
853
|
+
self.direction_center_coordinate = (
|
854
|
+
self.bbox[self.direction_start_index] + self.bbox[self.direction_end_index]
|
855
|
+
) / 2
|
856
|
+
self.secondary_direction_center_coordinate = (
|
857
|
+
self.bbox[self.secondary_direction_start_index]
|
858
|
+
+ self.bbox[self.secondary_direction_end_index]
|
859
|
+
) / 2
|