paddlex 3.0.0rc1__py3-none-any.whl → 3.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- paddlex/.version +1 -1
- paddlex/__init__.py +1 -1
- paddlex/configs/modules/chart_parsing/PP-Chart2Table.yaml +13 -0
- paddlex/configs/modules/doc_vlm/PP-DocBee2-3B.yaml +14 -0
- paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-L.yaml +40 -0
- paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-M.yaml +40 -0
- paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-S.yaml +40 -0
- paddlex/configs/modules/layout_detection/PP-DocBlockLayout.yaml +40 -0
- paddlex/configs/modules/layout_detection/PP-DocLayout-L.yaml +2 -2
- paddlex/configs/modules/layout_detection/PP-DocLayout-M.yaml +2 -2
- paddlex/configs/modules/layout_detection/PP-DocLayout-S.yaml +2 -2
- paddlex/configs/modules/layout_detection/PP-DocLayout_plus-L.yaml +40 -0
- paddlex/configs/modules/text_detection/PP-OCRv5_mobile_det.yaml +40 -0
- paddlex/configs/modules/text_detection/PP-OCRv5_server_det.yaml +40 -0
- paddlex/configs/modules/text_recognition/PP-OCRv5_mobile_rec.yaml +39 -0
- paddlex/configs/modules/text_recognition/PP-OCRv5_server_rec.yaml +39 -0
- paddlex/configs/modules/textline_orientation/PP-LCNet_x1_0_textline_ori.yaml +41 -0
- paddlex/configs/pipelines/OCR.yaml +7 -6
- paddlex/configs/pipelines/PP-ChatOCRv3-doc.yaml +3 -1
- paddlex/configs/pipelines/PP-ChatOCRv4-doc.yaml +91 -34
- paddlex/configs/pipelines/PP-StructureV3.yaml +72 -72
- paddlex/configs/pipelines/doc_understanding.yaml +1 -1
- paddlex/configs/pipelines/formula_recognition.yaml +2 -2
- paddlex/configs/pipelines/layout_parsing.yaml +3 -2
- paddlex/configs/pipelines/seal_recognition.yaml +1 -0
- paddlex/configs/pipelines/table_recognition.yaml +2 -1
- paddlex/configs/pipelines/table_recognition_v2.yaml +7 -1
- paddlex/hpip_links.html +20 -20
- paddlex/inference/common/batch_sampler/doc_vlm_batch_sampler.py +33 -10
- paddlex/inference/common/batch_sampler/image_batch_sampler.py +34 -25
- paddlex/inference/common/result/mixin.py +19 -12
- paddlex/inference/models/base/predictor/base_predictor.py +2 -8
- paddlex/inference/models/common/static_infer.py +11 -59
- paddlex/inference/models/common/tokenizer/__init__.py +2 -0
- paddlex/inference/models/common/tokenizer/clip_tokenizer.py +1 -1
- paddlex/inference/models/common/tokenizer/gpt_tokenizer.py +2 -2
- paddlex/inference/models/common/tokenizer/qwen2_5_tokenizer.py +112 -0
- paddlex/inference/models/common/tokenizer/qwen2_tokenizer.py +7 -1
- paddlex/inference/models/common/tokenizer/qwen_tokenizer.py +288 -0
- paddlex/inference/models/common/tokenizer/tokenizer_utils.py +13 -13
- paddlex/inference/models/common/tokenizer/tokenizer_utils_base.py +3 -3
- paddlex/inference/models/common/tokenizer/vocab.py +7 -7
- paddlex/inference/models/common/vlm/conversion_utils.py +99 -0
- paddlex/inference/models/common/vlm/fusion_ops.py +205 -0
- paddlex/inference/models/common/vlm/generation/configuration_utils.py +1 -1
- paddlex/inference/models/common/vlm/generation/logits_process.py +1 -1
- paddlex/inference/models/common/vlm/generation/utils.py +1 -1
- paddlex/inference/models/common/vlm/transformers/configuration_utils.py +3 -3
- paddlex/inference/models/common/vlm/transformers/conversion_utils.py +3 -3
- paddlex/inference/models/common/vlm/transformers/model_outputs.py +2 -2
- paddlex/inference/models/common/vlm/transformers/model_utils.py +7 -31
- paddlex/inference/models/doc_vlm/modeling/GOT_ocr_2_0.py +830 -0
- paddlex/inference/models/doc_vlm/modeling/__init__.py +2 -0
- paddlex/inference/models/doc_vlm/modeling/qwen2.py +1606 -0
- paddlex/inference/models/doc_vlm/modeling/qwen2_5_vl.py +3006 -0
- paddlex/inference/models/doc_vlm/modeling/qwen2_vl.py +0 -105
- paddlex/inference/models/doc_vlm/predictor.py +79 -24
- paddlex/inference/models/doc_vlm/processors/GOT_ocr_2_0.py +97 -0
- paddlex/inference/models/doc_vlm/processors/__init__.py +2 -0
- paddlex/inference/models/doc_vlm/processors/common.py +189 -0
- paddlex/inference/models/doc_vlm/processors/qwen2_5_vl.py +548 -0
- paddlex/inference/models/doc_vlm/processors/qwen2_vl.py +21 -176
- paddlex/inference/models/formula_recognition/predictor.py +7 -1
- paddlex/inference/models/formula_recognition/processors.py +92 -79
- paddlex/inference/models/formula_recognition/result.py +28 -27
- paddlex/inference/models/image_feature/processors.py +3 -4
- paddlex/inference/models/keypoint_detection/predictor.py +3 -0
- paddlex/inference/models/object_detection/predictor.py +2 -0
- paddlex/inference/models/object_detection/processors.py +28 -3
- paddlex/inference/models/object_detection/utils.py +2 -0
- paddlex/inference/models/table_structure_recognition/result.py +0 -10
- paddlex/inference/models/text_detection/predictor.py +8 -0
- paddlex/inference/models/text_detection/processors.py +44 -10
- paddlex/inference/models/text_detection/result.py +0 -10
- paddlex/inference/pipelines/__init__.py +9 -5
- paddlex/inference/pipelines/_parallel.py +172 -0
- paddlex/inference/pipelines/anomaly_detection/pipeline.py +16 -6
- paddlex/inference/pipelines/attribute_recognition/pipeline.py +11 -1
- paddlex/inference/pipelines/base.py +14 -4
- paddlex/inference/pipelines/components/faisser.py +1 -1
- paddlex/inference/pipelines/doc_preprocessor/pipeline.py +53 -27
- paddlex/inference/pipelines/formula_recognition/pipeline.py +120 -82
- paddlex/inference/pipelines/formula_recognition/result.py +1 -11
- paddlex/inference/pipelines/image_classification/pipeline.py +16 -6
- paddlex/inference/pipelines/image_multilabel_classification/pipeline.py +16 -6
- paddlex/inference/pipelines/instance_segmentation/pipeline.py +16 -6
- paddlex/inference/pipelines/keypoint_detection/pipeline.py +16 -6
- paddlex/inference/pipelines/layout_parsing/pipeline.py +34 -47
- paddlex/inference/pipelines/layout_parsing/pipeline_v2.py +893 -260
- paddlex/inference/pipelines/layout_parsing/result.py +4 -17
- paddlex/inference/pipelines/layout_parsing/result_v2.py +523 -245
- paddlex/inference/pipelines/layout_parsing/setting.py +87 -0
- paddlex/inference/pipelines/layout_parsing/utils.py +565 -1998
- paddlex/inference/pipelines/layout_parsing/xycut_enhanced/__init__.py +16 -0
- paddlex/inference/pipelines/layout_parsing/xycut_enhanced/utils.py +1144 -0
- paddlex/inference/pipelines/layout_parsing/xycut_enhanced/xycuts.py +563 -0
- paddlex/inference/pipelines/m_3d_bev_detection/pipeline.py +2 -2
- paddlex/inference/pipelines/multilingual_speech_recognition/pipeline.py +2 -2
- paddlex/inference/pipelines/object_detection/pipeline.py +16 -6
- paddlex/inference/pipelines/ocr/pipeline.py +127 -70
- paddlex/inference/pipelines/ocr/result.py +19 -16
- paddlex/inference/pipelines/open_vocabulary_detection/pipeline.py +2 -2
- paddlex/inference/pipelines/open_vocabulary_segmentation/pipeline.py +2 -2
- paddlex/inference/pipelines/pp_chatocr/pipeline_base.py +2 -2
- paddlex/inference/pipelines/pp_chatocr/pipeline_v3.py +2 -5
- paddlex/inference/pipelines/pp_chatocr/pipeline_v4.py +5 -5
- paddlex/inference/pipelines/rotated_object_detection/pipeline.py +16 -6
- paddlex/inference/pipelines/seal_recognition/pipeline.py +109 -53
- paddlex/inference/pipelines/semantic_segmentation/pipeline.py +16 -6
- paddlex/inference/pipelines/small_object_detection/pipeline.py +16 -6
- paddlex/inference/pipelines/table_recognition/pipeline.py +26 -18
- paddlex/inference/pipelines/table_recognition/pipeline_v2.py +624 -53
- paddlex/inference/pipelines/table_recognition/result.py +1 -1
- paddlex/inference/pipelines/table_recognition/table_recognition_post_processing_v2.py +9 -5
- paddlex/inference/pipelines/ts_anomaly_detection/pipeline.py +2 -2
- paddlex/inference/pipelines/ts_classification/pipeline.py +2 -2
- paddlex/inference/pipelines/ts_forecasting/pipeline.py +2 -2
- paddlex/inference/pipelines/video_classification/pipeline.py +2 -2
- paddlex/inference/pipelines/video_detection/pipeline.py +2 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/_common/common.py +5 -1
- paddlex/inference/serving/basic_serving/_pipeline_apps/layout_parsing.py +0 -1
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv3_doc.py +0 -1
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv4_doc.py +1 -1
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_structurev3.py +6 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition.py +1 -5
- paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition_v2.py +4 -5
- paddlex/inference/serving/infra/utils.py +20 -22
- paddlex/inference/serving/schemas/formula_recognition.py +1 -1
- paddlex/inference/serving/schemas/layout_parsing.py +1 -2
- paddlex/inference/serving/schemas/pp_chatocrv3_doc.py +1 -2
- paddlex/inference/serving/schemas/pp_chatocrv4_doc.py +2 -2
- paddlex/inference/serving/schemas/pp_structurev3.py +10 -6
- paddlex/inference/serving/schemas/seal_recognition.py +1 -1
- paddlex/inference/serving/schemas/table_recognition.py +2 -6
- paddlex/inference/serving/schemas/table_recognition_v2.py +5 -6
- paddlex/inference/utils/hpi.py +8 -1
- paddlex/inference/utils/hpi_model_info_collection.json +81 -2
- paddlex/inference/utils/io/readers.py +12 -12
- paddlex/inference/utils/mkldnn_blocklist.py +25 -0
- paddlex/inference/utils/official_models.py +14 -0
- paddlex/inference/utils/pp_option.py +29 -8
- paddlex/model.py +2 -2
- paddlex/modules/__init__.py +1 -1
- paddlex/modules/anomaly_detection/evaluator.py +2 -2
- paddlex/modules/base/__init__.py +1 -1
- paddlex/modules/base/evaluator.py +5 -5
- paddlex/modules/base/trainer.py +1 -1
- paddlex/modules/doc_vlm/dataset_checker.py +2 -2
- paddlex/modules/doc_vlm/evaluator.py +2 -2
- paddlex/modules/doc_vlm/exportor.py +2 -2
- paddlex/modules/doc_vlm/model_list.py +1 -1
- paddlex/modules/doc_vlm/trainer.py +2 -2
- paddlex/modules/face_recognition/evaluator.py +2 -2
- paddlex/modules/formula_recognition/evaluator.py +5 -2
- paddlex/modules/formula_recognition/model_list.py +3 -0
- paddlex/modules/formula_recognition/trainer.py +3 -0
- paddlex/modules/general_recognition/evaluator.py +1 -1
- paddlex/modules/image_classification/evaluator.py +2 -2
- paddlex/modules/image_classification/model_list.py +1 -0
- paddlex/modules/instance_segmentation/evaluator.py +1 -1
- paddlex/modules/keypoint_detection/evaluator.py +1 -1
- paddlex/modules/m_3d_bev_detection/evaluator.py +2 -2
- paddlex/modules/multilabel_classification/evaluator.py +2 -2
- paddlex/modules/object_detection/dataset_checker/dataset_src/convert_dataset.py +4 -4
- paddlex/modules/object_detection/evaluator.py +2 -2
- paddlex/modules/object_detection/model_list.py +2 -0
- paddlex/modules/semantic_segmentation/evaluator.py +2 -2
- paddlex/modules/table_recognition/evaluator.py +2 -2
- paddlex/modules/text_detection/evaluator.py +2 -2
- paddlex/modules/text_detection/model_list.py +2 -0
- paddlex/modules/text_recognition/evaluator.py +2 -2
- paddlex/modules/text_recognition/model_list.py +2 -0
- paddlex/modules/ts_anomaly_detection/evaluator.py +2 -2
- paddlex/modules/ts_classification/dataset_checker/dataset_src/split_dataset.py +1 -1
- paddlex/modules/ts_classification/evaluator.py +2 -2
- paddlex/modules/ts_forecast/evaluator.py +2 -2
- paddlex/modules/video_classification/evaluator.py +2 -2
- paddlex/modules/video_detection/evaluator.py +2 -2
- paddlex/ops/__init__.py +2 -2
- paddlex/paddlex_cli.py +19 -13
- paddlex/repo_apis/Paddle3D_api/bev_fusion/model.py +2 -2
- paddlex/repo_apis/PaddleClas_api/cls/config.py +1 -1
- paddlex/repo_apis/PaddleClas_api/cls/model.py +1 -1
- paddlex/repo_apis/PaddleClas_api/cls/register.py +10 -0
- paddlex/repo_apis/PaddleClas_api/cls/runner.py +1 -1
- paddlex/repo_apis/PaddleDetection_api/instance_seg/model.py +1 -1
- paddlex/repo_apis/PaddleDetection_api/instance_seg/runner.py +1 -1
- paddlex/repo_apis/PaddleDetection_api/object_det/config.py +1 -1
- paddlex/repo_apis/PaddleDetection_api/object_det/model.py +1 -1
- paddlex/repo_apis/PaddleDetection_api/object_det/official_categories.py +25 -0
- paddlex/repo_apis/PaddleDetection_api/object_det/register.py +30 -0
- paddlex/repo_apis/PaddleDetection_api/object_det/runner.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/formula_rec/config.py +3 -3
- paddlex/repo_apis/PaddleOCR_api/formula_rec/model.py +5 -9
- paddlex/repo_apis/PaddleOCR_api/formula_rec/register.py +27 -0
- paddlex/repo_apis/PaddleOCR_api/formula_rec/runner.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/table_rec/model.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/table_rec/runner.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/text_det/model.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/text_det/register.py +18 -0
- paddlex/repo_apis/PaddleOCR_api/text_det/runner.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/text_rec/config.py +3 -3
- paddlex/repo_apis/PaddleOCR_api/text_rec/model.py +5 -9
- paddlex/repo_apis/PaddleOCR_api/text_rec/register.py +18 -0
- paddlex/repo_apis/PaddleOCR_api/text_rec/runner.py +1 -1
- paddlex/repo_apis/PaddleSeg_api/seg/model.py +1 -1
- paddlex/repo_apis/PaddleSeg_api/seg/runner.py +1 -1
- paddlex/repo_apis/PaddleTS_api/ts_ad/config.py +3 -3
- paddlex/repo_apis/PaddleTS_api/ts_cls/config.py +2 -2
- paddlex/repo_apis/PaddleTS_api/ts_fc/config.py +4 -4
- paddlex/repo_apis/PaddleVideo_api/video_cls/config.py +1 -1
- paddlex/repo_apis/PaddleVideo_api/video_cls/model.py +1 -1
- paddlex/repo_apis/PaddleVideo_api/video_cls/runner.py +1 -1
- paddlex/repo_apis/PaddleVideo_api/video_det/config.py +1 -1
- paddlex/repo_apis/PaddleVideo_api/video_det/model.py +1 -1
- paddlex/repo_apis/PaddleVideo_api/video_det/runner.py +1 -1
- paddlex/repo_apis/base/config.py +1 -1
- paddlex/repo_manager/core.py +3 -3
- paddlex/repo_manager/meta.py +6 -2
- paddlex/repo_manager/repo.py +17 -16
- paddlex/utils/custom_device_list.py +26 -2
- paddlex/utils/deps.py +1 -1
- paddlex/utils/device.py +15 -8
- paddlex/utils/env.py +4 -0
- paddlex/utils/flags.py +2 -4
- paddlex/utils/fonts/__init__.py +34 -4
- paddlex/utils/misc.py +1 -1
- {paddlex-3.0.0rc1.dist-info → paddlex-3.0.1.dist-info}/METADATA +52 -56
- {paddlex-3.0.0rc1.dist-info → paddlex-3.0.1.dist-info}/RECORD +233 -206
- {paddlex-3.0.0rc1.dist-info → paddlex-3.0.1.dist-info}/WHEEL +1 -1
- {paddlex-3.0.0rc1.dist-info → paddlex-3.0.1.dist-info}/entry_points.txt +0 -0
- {paddlex-3.0.0rc1.dist-info → paddlex-3.0.1.dist-info}/licenses/LICENSE +0 -0
- {paddlex-3.0.0rc1.dist-info → paddlex-3.0.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1144 @@
|
|
1
|
+
# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
from typing import List, Tuple
|
16
|
+
|
17
|
+
import numpy as np
|
18
|
+
|
19
|
+
from ..result_v2 import LayoutParsingBlock, LayoutParsingRegion
|
20
|
+
from ..setting import BLOCK_LABEL_MAP, XYCUT_SETTINGS
|
21
|
+
from ..utils import calculate_projection_overlap_ratio
|
22
|
+
|
23
|
+
|
24
|
+
def get_nearest_edge_distance(
|
25
|
+
bbox1: List[int],
|
26
|
+
bbox2: List[int],
|
27
|
+
weight: List[float] = [1.0, 1.0, 1.0, 1.0],
|
28
|
+
) -> Tuple[float]:
|
29
|
+
"""
|
30
|
+
Calculate the nearest edge distance between two bounding boxes, considering directional weights.
|
31
|
+
|
32
|
+
Args:
|
33
|
+
bbox1 (list): The bounding box coordinates [x1, y1, x2, y2] of the input object.
|
34
|
+
bbox2 (list): The bounding box coordinates [x1', y1', x2', y2'] of the object to match against.
|
35
|
+
weight (list, optional): directional weights for the edge distances [left, right, up, down]. Defaults to [1, 1, 1, 1].
|
36
|
+
|
37
|
+
Returns:
|
38
|
+
float: The calculated minimum edge distance between the bounding boxes.
|
39
|
+
"""
|
40
|
+
x1, y1, x2, y2 = bbox1
|
41
|
+
x1_prime, y1_prime, x2_prime, y2_prime = bbox2
|
42
|
+
min_x_distance, min_y_distance = 0, 0
|
43
|
+
horizontal_iou = calculate_projection_overlap_ratio(bbox1, bbox2, "horizontal")
|
44
|
+
vertical_iou = calculate_projection_overlap_ratio(bbox1, bbox2, "vertical")
|
45
|
+
if horizontal_iou > 0 and vertical_iou > 0:
|
46
|
+
return 0.0
|
47
|
+
if horizontal_iou == 0:
|
48
|
+
min_x_distance = min(abs(x1 - x2_prime), abs(x2 - x1_prime)) * (
|
49
|
+
weight[0] if x2 < x1_prime else weight[1]
|
50
|
+
)
|
51
|
+
if vertical_iou == 0:
|
52
|
+
min_y_distance = min(abs(y1 - y2_prime), abs(y2 - y1_prime)) * (
|
53
|
+
weight[2] if y2 < y1_prime else weight[3]
|
54
|
+
)
|
55
|
+
|
56
|
+
return min_x_distance + min_y_distance
|
57
|
+
|
58
|
+
|
59
|
+
def projection_by_bboxes(boxes: np.ndarray, axis: int) -> np.ndarray:
|
60
|
+
"""
|
61
|
+
Generate a 1D projection histogram from bounding boxes along a specified axis.
|
62
|
+
|
63
|
+
Args:
|
64
|
+
boxes: A (N, 4) array of bounding boxes defined by [x_min, y_min, x_max, y_max].
|
65
|
+
axis: Axis for projection; 0 for horizontal (x-axis), 1 for vertical (y-axis).
|
66
|
+
|
67
|
+
Returns:
|
68
|
+
A 1D numpy array representing the projection histogram based on bounding box intervals.
|
69
|
+
"""
|
70
|
+
assert axis in [0, 1]
|
71
|
+
|
72
|
+
if np.min(boxes[:, axis::2]) < 0:
|
73
|
+
max_length = abs(np.min(boxes[:, axis::2]))
|
74
|
+
else:
|
75
|
+
max_length = np.max(boxes[:, axis::2])
|
76
|
+
|
77
|
+
projection = np.zeros(max_length, dtype=int)
|
78
|
+
|
79
|
+
# Increment projection histogram over the interval defined by each bounding box
|
80
|
+
for start, end in boxes[:, axis::2]:
|
81
|
+
start = abs(start)
|
82
|
+
end = abs(end)
|
83
|
+
projection[start:end] += 1
|
84
|
+
|
85
|
+
return projection
|
86
|
+
|
87
|
+
|
88
|
+
def split_projection_profile(arr_values: np.ndarray, min_value: float, min_gap: float):
|
89
|
+
"""
|
90
|
+
Split the projection profile into segments based on specified thresholds.
|
91
|
+
|
92
|
+
Args:
|
93
|
+
arr_values: 1D array representing the projection profile.
|
94
|
+
min_value: Minimum value threshold to consider a profile segment significant.
|
95
|
+
min_gap: Minimum gap width to consider a separation between segments.
|
96
|
+
|
97
|
+
Returns:
|
98
|
+
A tuple of start and end indices for each segment that meets the criteria.
|
99
|
+
"""
|
100
|
+
# Identify indices where the projection exceeds the minimum value
|
101
|
+
significant_indices = np.where(arr_values > min_value)[0]
|
102
|
+
if not len(significant_indices):
|
103
|
+
return
|
104
|
+
|
105
|
+
# Calculate gaps between significant indices
|
106
|
+
index_diffs = significant_indices[1:] - significant_indices[:-1]
|
107
|
+
gap_indices = np.where(index_diffs > min_gap)[0]
|
108
|
+
|
109
|
+
# Determine start and end indices of segments
|
110
|
+
segment_starts = np.insert(
|
111
|
+
significant_indices[gap_indices + 1],
|
112
|
+
0,
|
113
|
+
significant_indices[0],
|
114
|
+
)
|
115
|
+
segment_ends = np.append(
|
116
|
+
significant_indices[gap_indices],
|
117
|
+
significant_indices[-1] + 1,
|
118
|
+
)
|
119
|
+
|
120
|
+
return segment_starts, segment_ends
|
121
|
+
|
122
|
+
|
123
|
+
def recursive_yx_cut(
|
124
|
+
boxes: np.ndarray, indices: List[int], res: List[int], min_gap: int = 1
|
125
|
+
):
|
126
|
+
"""
|
127
|
+
Recursively project and segment bounding boxes, starting with Y-axis and followed by X-axis.
|
128
|
+
|
129
|
+
Args:
|
130
|
+
boxes: A (N, 4) array representing bounding boxes.
|
131
|
+
indices: List of indices indicating the original position of boxes.
|
132
|
+
res: List to store indices of the final segmented bounding boxes.
|
133
|
+
min_gap (int): Minimum gap width to consider a separation between segments on the X-axis. Defaults to 1.
|
134
|
+
|
135
|
+
Returns:
|
136
|
+
None: This function modifies the `res` list in place.
|
137
|
+
"""
|
138
|
+
assert len(boxes) == len(
|
139
|
+
indices
|
140
|
+
), "The length of boxes and indices must be the same."
|
141
|
+
|
142
|
+
# Sort by y_min for Y-axis projection
|
143
|
+
y_sorted_indices = boxes[:, 1].argsort()
|
144
|
+
y_sorted_boxes = boxes[y_sorted_indices]
|
145
|
+
y_sorted_indices = np.array(indices)[y_sorted_indices]
|
146
|
+
|
147
|
+
# Perform Y-axis projection
|
148
|
+
y_projection = projection_by_bboxes(boxes=y_sorted_boxes, axis=1)
|
149
|
+
y_intervals = split_projection_profile(y_projection, 0, 1)
|
150
|
+
|
151
|
+
if not y_intervals:
|
152
|
+
return
|
153
|
+
|
154
|
+
# Process each segment defined by Y-axis projection
|
155
|
+
for y_start, y_end in zip(*y_intervals):
|
156
|
+
# Select boxes within the current y interval
|
157
|
+
y_interval_indices = (y_start <= y_sorted_boxes[:, 1]) & (
|
158
|
+
y_sorted_boxes[:, 1] < y_end
|
159
|
+
)
|
160
|
+
y_boxes_chunk = y_sorted_boxes[y_interval_indices]
|
161
|
+
y_indices_chunk = y_sorted_indices[y_interval_indices]
|
162
|
+
|
163
|
+
# Sort by x_min for X-axis projection
|
164
|
+
x_sorted_indices = y_boxes_chunk[:, 0].argsort()
|
165
|
+
x_sorted_boxes_chunk = y_boxes_chunk[x_sorted_indices]
|
166
|
+
x_sorted_indices_chunk = y_indices_chunk[x_sorted_indices]
|
167
|
+
|
168
|
+
# Perform X-axis projection
|
169
|
+
x_projection = projection_by_bboxes(boxes=x_sorted_boxes_chunk, axis=0)
|
170
|
+
x_intervals = split_projection_profile(x_projection, 0, min_gap)
|
171
|
+
|
172
|
+
if not x_intervals:
|
173
|
+
continue
|
174
|
+
|
175
|
+
# If X-axis cannot be further segmented, add current indices to results
|
176
|
+
if len(x_intervals[0]) == 1:
|
177
|
+
res.extend(x_sorted_indices_chunk)
|
178
|
+
continue
|
179
|
+
|
180
|
+
if np.min(x_sorted_boxes_chunk[:, 0]) < 0:
|
181
|
+
x_intervals = np.flip(x_intervals, axis=1)
|
182
|
+
# Recursively process each segment defined by X-axis projection
|
183
|
+
for x_start, x_end in zip(*x_intervals):
|
184
|
+
x_interval_indices = (x_start <= abs(x_sorted_boxes_chunk[:, 0])) & (
|
185
|
+
abs(x_sorted_boxes_chunk[:, 0]) < x_end
|
186
|
+
)
|
187
|
+
recursive_yx_cut(
|
188
|
+
x_sorted_boxes_chunk[x_interval_indices],
|
189
|
+
x_sorted_indices_chunk[x_interval_indices],
|
190
|
+
res,
|
191
|
+
)
|
192
|
+
|
193
|
+
|
194
|
+
def recursive_xy_cut(
|
195
|
+
boxes: np.ndarray, indices: List[int], res: List[int], min_gap: int = 1
|
196
|
+
):
|
197
|
+
"""
|
198
|
+
Recursively performs X-axis projection followed by Y-axis projection to segment bounding boxes.
|
199
|
+
|
200
|
+
Args:
|
201
|
+
boxes: A (N, 4) array representing bounding boxes with [x_min, y_min, x_max, y_max].
|
202
|
+
indices: A list of indices representing the position of boxes in the original data.
|
203
|
+
res: A list to store indices of bounding boxes that meet the criteria.
|
204
|
+
min_gap (int): Minimum gap width to consider a separation between segments on the X-axis. Defaults to 1.
|
205
|
+
|
206
|
+
Returns:
|
207
|
+
None: This function modifies the `res` list in place.
|
208
|
+
"""
|
209
|
+
# Ensure boxes and indices have the same length
|
210
|
+
assert len(boxes) == len(
|
211
|
+
indices
|
212
|
+
), "The length of boxes and indices must be the same."
|
213
|
+
|
214
|
+
# Sort by x_min to prepare for X-axis projection
|
215
|
+
x_sorted_indices = boxes[:, 0].argsort()
|
216
|
+
x_sorted_boxes = boxes[x_sorted_indices]
|
217
|
+
x_sorted_indices = np.array(indices)[x_sorted_indices]
|
218
|
+
|
219
|
+
# Perform X-axis projection
|
220
|
+
x_projection = projection_by_bboxes(boxes=x_sorted_boxes, axis=0)
|
221
|
+
x_intervals = split_projection_profile(x_projection, 0, 1)
|
222
|
+
|
223
|
+
if not x_intervals:
|
224
|
+
return
|
225
|
+
|
226
|
+
if np.min(x_sorted_boxes[:, 0]) < 0:
|
227
|
+
x_intervals = np.flip(x_intervals, axis=1)
|
228
|
+
# Process each segment defined by X-axis projection
|
229
|
+
for x_start, x_end in zip(*x_intervals):
|
230
|
+
# Select boxes within the current x interval
|
231
|
+
x_interval_indices = (x_start <= abs(x_sorted_boxes[:, 0])) & (
|
232
|
+
abs(x_sorted_boxes[:, 0]) < x_end
|
233
|
+
)
|
234
|
+
x_boxes_chunk = x_sorted_boxes[x_interval_indices]
|
235
|
+
x_indices_chunk = x_sorted_indices[x_interval_indices]
|
236
|
+
|
237
|
+
# Sort selected boxes by y_min to prepare for Y-axis projection
|
238
|
+
y_sorted_indices = x_boxes_chunk[:, 1].argsort()
|
239
|
+
y_sorted_boxes_chunk = x_boxes_chunk[y_sorted_indices]
|
240
|
+
y_sorted_indices_chunk = x_indices_chunk[y_sorted_indices]
|
241
|
+
|
242
|
+
# Perform Y-axis projection
|
243
|
+
y_projection = projection_by_bboxes(boxes=y_sorted_boxes_chunk, axis=1)
|
244
|
+
y_intervals = split_projection_profile(y_projection, 0, min_gap)
|
245
|
+
|
246
|
+
if not y_intervals:
|
247
|
+
continue
|
248
|
+
|
249
|
+
# If Y-axis cannot be further segmented, add current indices to results
|
250
|
+
if len(y_intervals[0]) == 1:
|
251
|
+
res.extend(y_sorted_indices_chunk)
|
252
|
+
continue
|
253
|
+
|
254
|
+
# Recursively process each segment defined by Y-axis projection
|
255
|
+
for y_start, y_end in zip(*y_intervals):
|
256
|
+
y_interval_indices = (y_start <= y_sorted_boxes_chunk[:, 1]) & (
|
257
|
+
y_sorted_boxes_chunk[:, 1] < y_end
|
258
|
+
)
|
259
|
+
recursive_xy_cut(
|
260
|
+
y_sorted_boxes_chunk[y_interval_indices],
|
261
|
+
y_sorted_indices_chunk[y_interval_indices],
|
262
|
+
res,
|
263
|
+
)
|
264
|
+
|
265
|
+
|
266
|
+
def reference_insert(
|
267
|
+
block: LayoutParsingBlock,
|
268
|
+
sorted_blocks: List[LayoutParsingBlock],
|
269
|
+
**kwargs,
|
270
|
+
):
|
271
|
+
"""
|
272
|
+
Insert reference block into sorted blocks based on the distance between the block and the nearest sorted block.
|
273
|
+
|
274
|
+
Args:
|
275
|
+
block: The block to insert into the sorted blocks.
|
276
|
+
sorted_blocks: The sorted blocks where the new block will be inserted.
|
277
|
+
config: Configuration dictionary containing parameters related to the layout parsing.
|
278
|
+
median_width: Median width of the document. Defaults to 0.0.
|
279
|
+
|
280
|
+
Returns:
|
281
|
+
sorted_blocks: The updated sorted blocks after insertion.
|
282
|
+
"""
|
283
|
+
min_distance = float("inf")
|
284
|
+
nearest_sorted_block_index = 0
|
285
|
+
for sorted_block_idx, sorted_block in enumerate(sorted_blocks):
|
286
|
+
if sorted_block.bbox[3] <= block.bbox[1]:
|
287
|
+
distance = -(sorted_block.bbox[2] * 10 + sorted_block.bbox[3])
|
288
|
+
if distance < min_distance:
|
289
|
+
min_distance = distance
|
290
|
+
nearest_sorted_block_index = sorted_block_idx
|
291
|
+
|
292
|
+
sorted_blocks.insert(nearest_sorted_block_index + 1, block)
|
293
|
+
return sorted_blocks
|
294
|
+
|
295
|
+
|
296
|
+
def manhattan_insert(
|
297
|
+
block: LayoutParsingBlock,
|
298
|
+
sorted_blocks: List[LayoutParsingBlock],
|
299
|
+
**kwargs,
|
300
|
+
):
|
301
|
+
"""
|
302
|
+
Insert a block into a sorted list of blocks based on the Manhattan distance between the block and the nearest sorted block.
|
303
|
+
|
304
|
+
Args:
|
305
|
+
block: The block to insert into the sorted blocks.
|
306
|
+
sorted_blocks: The sorted blocks where the new block will be inserted.
|
307
|
+
config: Configuration dictionary containing parameters related to the layout parsing.
|
308
|
+
median_width: Median width of the document. Defaults to 0.0.
|
309
|
+
|
310
|
+
Returns:
|
311
|
+
sorted_blocks: The updated sorted blocks after insertion.
|
312
|
+
"""
|
313
|
+
min_distance = float("inf")
|
314
|
+
nearest_sorted_block_index = 0
|
315
|
+
for sorted_block_idx, sorted_block in enumerate(sorted_blocks):
|
316
|
+
distance = _manhattan_distance(block.bbox, sorted_block.bbox)
|
317
|
+
if distance < min_distance:
|
318
|
+
min_distance = distance
|
319
|
+
nearest_sorted_block_index = sorted_block_idx
|
320
|
+
|
321
|
+
sorted_blocks.insert(nearest_sorted_block_index + 1, block)
|
322
|
+
return sorted_blocks
|
323
|
+
|
324
|
+
|
325
|
+
def weighted_distance_insert(
|
326
|
+
block: LayoutParsingBlock,
|
327
|
+
sorted_blocks: List[LayoutParsingBlock],
|
328
|
+
region: LayoutParsingRegion,
|
329
|
+
):
|
330
|
+
"""
|
331
|
+
Insert a block into a sorted list of blocks based on the weighted distance between the block and the nearest sorted block.
|
332
|
+
|
333
|
+
Args:
|
334
|
+
block: The block to insert into the sorted blocks.
|
335
|
+
sorted_blocks: The sorted blocks where the new block will be inserted.
|
336
|
+
config: Configuration dictionary containing parameters related to the layout parsing.
|
337
|
+
median_width: Median width of the document. Defaults to 0.0.
|
338
|
+
|
339
|
+
Returns:
|
340
|
+
sorted_blocks: The updated sorted blocks after insertion.
|
341
|
+
"""
|
342
|
+
|
343
|
+
tolerance_len = XYCUT_SETTINGS["edge_distance_compare_tolerance_len"]
|
344
|
+
x1, y1, x2, y2 = block.bbox
|
345
|
+
min_weighted_distance, min_edge_distance, min_up_edge_distance = (
|
346
|
+
float("inf"),
|
347
|
+
float("inf"),
|
348
|
+
float("inf"),
|
349
|
+
)
|
350
|
+
nearest_sorted_block_index = 0
|
351
|
+
for sorted_block_idx, sorted_block in enumerate(sorted_blocks):
|
352
|
+
|
353
|
+
x1_prime, y1_prime, x2_prime, y2_prime = sorted_block.bbox
|
354
|
+
|
355
|
+
# Calculate edge distance
|
356
|
+
weight = _get_weights(block.order_label, block.direction)
|
357
|
+
edge_distance = get_nearest_edge_distance(block.bbox, sorted_block.bbox, weight)
|
358
|
+
|
359
|
+
if block.label in BLOCK_LABEL_MAP["doc_title_labels"]:
|
360
|
+
disperse = max(1, region.text_line_width)
|
361
|
+
tolerance_len = max(tolerance_len, disperse)
|
362
|
+
if block.label == "abstract":
|
363
|
+
tolerance_len *= 2
|
364
|
+
edge_distance = max(0.1, edge_distance) * 10
|
365
|
+
|
366
|
+
# Calculate up edge distances
|
367
|
+
up_edge_distance = y1_prime if region.direction == "horizontal" else -x2_prime
|
368
|
+
left_edge_distance = x1_prime if region.direction == "horizontal" else y1_prime
|
369
|
+
is_below_sorted_block = (
|
370
|
+
y2_prime < y1 if region.direction == "horizontal" else x1_prime > x2
|
371
|
+
)
|
372
|
+
|
373
|
+
if (
|
374
|
+
block.label not in BLOCK_LABEL_MAP["unordered_labels"]
|
375
|
+
or block.label in BLOCK_LABEL_MAP["doc_title_labels"]
|
376
|
+
or block.label in BLOCK_LABEL_MAP["paragraph_title_labels"]
|
377
|
+
or block.label in BLOCK_LABEL_MAP["vision_labels"]
|
378
|
+
) and is_below_sorted_block:
|
379
|
+
up_edge_distance = -up_edge_distance
|
380
|
+
left_edge_distance = -left_edge_distance
|
381
|
+
|
382
|
+
if abs(min_up_edge_distance - up_edge_distance) <= tolerance_len:
|
383
|
+
up_edge_distance = min_up_edge_distance
|
384
|
+
|
385
|
+
# Calculate weighted distance
|
386
|
+
weighted_distance = (
|
387
|
+
+edge_distance
|
388
|
+
* XYCUT_SETTINGS["distance_weight_map"].get("edge_weight", 10**4)
|
389
|
+
+ up_edge_distance
|
390
|
+
* XYCUT_SETTINGS["distance_weight_map"].get("up_edge_weight", 1)
|
391
|
+
+ left_edge_distance
|
392
|
+
* XYCUT_SETTINGS["distance_weight_map"].get("left_edge_weight", 0.0001)
|
393
|
+
)
|
394
|
+
|
395
|
+
min_edge_distance = min(edge_distance, min_edge_distance)
|
396
|
+
min_up_edge_distance = min(up_edge_distance, min_up_edge_distance)
|
397
|
+
|
398
|
+
if weighted_distance < min_weighted_distance:
|
399
|
+
nearest_sorted_block_index = sorted_block_idx
|
400
|
+
min_weighted_distance = weighted_distance
|
401
|
+
if y1 > y1_prime or (y1 == y1_prime and x1 > x1_prime):
|
402
|
+
nearest_sorted_block_index = sorted_block_idx + 1
|
403
|
+
|
404
|
+
sorted_blocks.insert(nearest_sorted_block_index, block)
|
405
|
+
return sorted_blocks
|
406
|
+
|
407
|
+
|
408
|
+
def insert_child_blocks(
|
409
|
+
block: LayoutParsingBlock,
|
410
|
+
block_idx: int,
|
411
|
+
sorted_blocks: List[LayoutParsingBlock],
|
412
|
+
) -> List[LayoutParsingBlock]:
|
413
|
+
"""
|
414
|
+
Insert child blocks of a block into the sorted blocks list.
|
415
|
+
|
416
|
+
Args:
|
417
|
+
block: The parent block whose child blocks need to be inserted.
|
418
|
+
block_idx: Index at which the parent block exists in the sorted blocks list.
|
419
|
+
sorted_blocks: Sorted blocks list where the child blocks are to be inserted.
|
420
|
+
|
421
|
+
Returns:
|
422
|
+
sorted_blocks: Updated sorted blocks list after inserting child blocks.
|
423
|
+
"""
|
424
|
+
if block.child_blocks:
|
425
|
+
sub_blocks = block.get_child_blocks()
|
426
|
+
sub_blocks.append(block)
|
427
|
+
sub_blocks = sort_child_blocks(sub_blocks, sub_blocks[0].direction)
|
428
|
+
sorted_blocks[block_idx] = sub_blocks[0]
|
429
|
+
for block in sub_blocks[1:]:
|
430
|
+
block_idx += 1
|
431
|
+
sorted_blocks.insert(block_idx, block)
|
432
|
+
return sorted_blocks
|
433
|
+
|
434
|
+
|
435
|
+
def sort_child_blocks(blocks, direction="horizontal") -> List[LayoutParsingBlock]:
|
436
|
+
"""
|
437
|
+
Sort child blocks based on their bounding box coordinates.
|
438
|
+
|
439
|
+
Args:
|
440
|
+
blocks: A list of LayoutParsingBlock objects representing the child blocks.
|
441
|
+
direction: direction of the blocks ('horizontal' or 'vertical'). Default is 'horizontal'.
|
442
|
+
Returns:
|
443
|
+
sorted_blocks: A sorted list of LayoutParsingBlock objects.
|
444
|
+
"""
|
445
|
+
if direction == "horizontal":
|
446
|
+
# from top to bottom
|
447
|
+
blocks.sort(
|
448
|
+
key=lambda x: (
|
449
|
+
x.bbox[1], # y_min
|
450
|
+
x.bbox[0], # x_min
|
451
|
+
x.bbox[1] ** 2 + x.bbox[0] ** 2, # distance with (0,0)
|
452
|
+
),
|
453
|
+
)
|
454
|
+
else:
|
455
|
+
# from right to left
|
456
|
+
blocks.sort(
|
457
|
+
key=lambda x: (
|
458
|
+
-x.bbox[0], # x_min
|
459
|
+
x.bbox[1], # y_min
|
460
|
+
x.bbox[1] ** 2 - x.bbox[0] ** 2, # distance with (max,0)
|
461
|
+
),
|
462
|
+
)
|
463
|
+
return blocks
|
464
|
+
|
465
|
+
|
466
|
+
def _get_weights(label, direction="horizontal"):
|
467
|
+
"""Define weights based on the label and direction."""
|
468
|
+
if label == "doc_title":
|
469
|
+
return (
|
470
|
+
[1, 0.1, 0.1, 1] if direction == "horizontal" else [0.2, 0.1, 1, 1]
|
471
|
+
) # left-down , right-left
|
472
|
+
elif label in [
|
473
|
+
"paragraph_title",
|
474
|
+
"table_title",
|
475
|
+
"abstract",
|
476
|
+
"image",
|
477
|
+
"seal",
|
478
|
+
"chart",
|
479
|
+
"figure",
|
480
|
+
]:
|
481
|
+
return [1, 1, 0.1, 1] # down
|
482
|
+
else:
|
483
|
+
return [1, 1, 1, 0.1] # up
|
484
|
+
|
485
|
+
|
486
|
+
def _manhattan_distance(
|
487
|
+
point1: Tuple[float, float],
|
488
|
+
point2: Tuple[float, float],
|
489
|
+
weight_x: float = 1.0,
|
490
|
+
weight_y: float = 1.0,
|
491
|
+
) -> float:
|
492
|
+
"""
|
493
|
+
Calculate the weighted Manhattan distance between two points.
|
494
|
+
|
495
|
+
Args:
|
496
|
+
point1 (Tuple[float, float]): The first point as (x, y).
|
497
|
+
point2 (Tuple[float, float]): The second point as (x, y).
|
498
|
+
weight_x (float): The weight for the x-axis distance. Default is 1.0.
|
499
|
+
weight_y (float): The weight for the y-axis distance. Default is 1.0.
|
500
|
+
|
501
|
+
Returns:
|
502
|
+
float: The weighted Manhattan distance between the two points.
|
503
|
+
"""
|
504
|
+
return weight_x * abs(point1[0] - point2[0]) + weight_y * abs(point1[1] - point2[1])
|
505
|
+
|
506
|
+
|
507
|
+
def sort_normal_blocks(blocks, text_line_height, text_line_width, region_direction):
|
508
|
+
if region_direction == "horizontal":
|
509
|
+
blocks.sort(
|
510
|
+
key=lambda x: (
|
511
|
+
x.bbox[1] // text_line_height,
|
512
|
+
x.bbox[0] // text_line_width,
|
513
|
+
x.bbox[1] ** 2 + x.bbox[0] ** 2,
|
514
|
+
),
|
515
|
+
)
|
516
|
+
else:
|
517
|
+
blocks.sort(
|
518
|
+
key=lambda x: (
|
519
|
+
-x.bbox[0] // text_line_width,
|
520
|
+
x.bbox[1] // text_line_height,
|
521
|
+
x.bbox[1] ** 2 - x.bbox[2] ** 2, # distance with (max,0)
|
522
|
+
),
|
523
|
+
)
|
524
|
+
return blocks
|
525
|
+
|
526
|
+
|
527
|
+
def sort_normal_blocks(blocks, text_line_height, text_line_width, region_direction):
|
528
|
+
if region_direction == "horizontal":
|
529
|
+
blocks.sort(
|
530
|
+
key=lambda x: (
|
531
|
+
x.bbox[1] // text_line_height,
|
532
|
+
x.bbox[0] // text_line_width,
|
533
|
+
x.bbox[1] ** 2 + x.bbox[0] ** 2,
|
534
|
+
),
|
535
|
+
)
|
536
|
+
else:
|
537
|
+
blocks.sort(
|
538
|
+
key=lambda x: (
|
539
|
+
-x.bbox[0] // text_line_width,
|
540
|
+
x.bbox[1] // text_line_height,
|
541
|
+
-(x.bbox[2] ** 2 + x.bbox[1] ** 2),
|
542
|
+
),
|
543
|
+
)
|
544
|
+
return blocks
|
545
|
+
|
546
|
+
|
547
|
+
def get_cut_blocks(blocks, cut_direction, cut_coordinates, mask_labels=[]):
|
548
|
+
"""
|
549
|
+
Cut blocks based on the given cut direction and coordinates.
|
550
|
+
|
551
|
+
Args:
|
552
|
+
blocks (list): list of blocks to be cut.
|
553
|
+
cut_direction (str): cut direction, either "horizontal" or "vertical".
|
554
|
+
cut_coordinates (list): list of cut coordinates.
|
555
|
+
|
556
|
+
Returns:
|
557
|
+
list: a list of tuples containing the cutted blocks and their corresponding mean width。
|
558
|
+
"""
|
559
|
+
cuted_list = []
|
560
|
+
# filter out mask blocks,including header, footer, unordered and child_blocks
|
561
|
+
|
562
|
+
# 0: horizontal, 1: vertical
|
563
|
+
cut_aixis = 0 if cut_direction == "horizontal" else 1
|
564
|
+
blocks.sort(key=lambda x: x.bbox[cut_aixis + 2])
|
565
|
+
cut_coordinates.append(float("inf"))
|
566
|
+
|
567
|
+
cut_coordinates = list(set(cut_coordinates))
|
568
|
+
cut_coordinates.sort()
|
569
|
+
|
570
|
+
cut_idx = 0
|
571
|
+
for cut_coordinate in cut_coordinates:
|
572
|
+
group_blocks = []
|
573
|
+
block_idx = cut_idx
|
574
|
+
while block_idx < len(blocks):
|
575
|
+
block = blocks[block_idx]
|
576
|
+
if block.bbox[cut_aixis + 2] > cut_coordinate:
|
577
|
+
break
|
578
|
+
elif block.order_label not in mask_labels:
|
579
|
+
group_blocks.append(block)
|
580
|
+
block_idx += 1
|
581
|
+
cut_idx = block_idx
|
582
|
+
if group_blocks:
|
583
|
+
cuted_list.append(group_blocks)
|
584
|
+
|
585
|
+
return cuted_list
|
586
|
+
|
587
|
+
|
588
|
+
def add_split_block(
|
589
|
+
blocks: List[LayoutParsingBlock], region_bbox: List[int]
|
590
|
+
) -> List[LayoutParsingBlock]:
|
591
|
+
block_bboxes = np.array([block.bbox for block in blocks])
|
592
|
+
discontinuous = calculate_discontinuous_projection(
|
593
|
+
block_bboxes, direction="vertical"
|
594
|
+
)
|
595
|
+
current_interval = discontinuous[0]
|
596
|
+
for interval in discontinuous[1:]:
|
597
|
+
gap_len = interval[0] - current_interval[1]
|
598
|
+
if gap_len > 40:
|
599
|
+
x1, _, x2, __ = region_bbox
|
600
|
+
y1 = current_interval[1] + 5
|
601
|
+
y2 = interval[0] - 5
|
602
|
+
bbox = [x1, y1, x2, y2]
|
603
|
+
split_block = LayoutParsingBlock(label="split", bbox=bbox)
|
604
|
+
blocks.append(split_block)
|
605
|
+
current_interval = interval
|
606
|
+
|
607
|
+
|
608
|
+
def get_nearest_blocks(
|
609
|
+
block: LayoutParsingBlock,
|
610
|
+
ref_blocks: List[LayoutParsingBlock],
|
611
|
+
overlap_threshold,
|
612
|
+
direction="horizontal",
|
613
|
+
) -> List:
|
614
|
+
"""
|
615
|
+
Get the adjacent blocks with the same direction as the current block.
|
616
|
+
Args:
|
617
|
+
block (LayoutParsingBlock): The current block.
|
618
|
+
blocks (List[LayoutParsingBlock]): A list of all blocks.
|
619
|
+
ref_block_idxes (List[int]): A list of indices of reference blocks.
|
620
|
+
iou_threshold (float): The IOU threshold to determine if two blocks are considered adjacent.
|
621
|
+
Returns:
|
622
|
+
Int: The index of the previous block with same direction.
|
623
|
+
Int: The index of the following block with same direction.
|
624
|
+
"""
|
625
|
+
prev_blocks: List[LayoutParsingBlock] = []
|
626
|
+
post_blocks: List[LayoutParsingBlock] = []
|
627
|
+
sort_index = 1 if direction == "horizontal" else 0
|
628
|
+
for ref_block in ref_blocks:
|
629
|
+
if ref_block.index == block.index:
|
630
|
+
continue
|
631
|
+
overlap_ratio = calculate_projection_overlap_ratio(
|
632
|
+
block.bbox, ref_block.bbox, direction, mode="small"
|
633
|
+
)
|
634
|
+
if overlap_ratio > overlap_threshold:
|
635
|
+
if ref_block.bbox[sort_index] <= block.bbox[sort_index]:
|
636
|
+
prev_blocks.append(ref_block)
|
637
|
+
else:
|
638
|
+
post_blocks.append(ref_block)
|
639
|
+
|
640
|
+
if prev_blocks:
|
641
|
+
prev_blocks.sort(key=lambda x: x.bbox[sort_index], reverse=True)
|
642
|
+
if post_blocks:
|
643
|
+
post_blocks.sort(key=lambda x: x.bbox[sort_index])
|
644
|
+
|
645
|
+
return prev_blocks, post_blocks
|
646
|
+
|
647
|
+
|
648
|
+
def get_adjacent_blocks_by_direction(
|
649
|
+
blocks: List[LayoutParsingBlock],
|
650
|
+
block_idx: int,
|
651
|
+
ref_block_idxes: List[int],
|
652
|
+
iou_threshold,
|
653
|
+
) -> List:
|
654
|
+
"""
|
655
|
+
Get the adjacent blocks with the same direction as the current block.
|
656
|
+
Args:
|
657
|
+
block (LayoutParsingBlock): The current block.
|
658
|
+
blocks (List[LayoutParsingBlock]): A list of all blocks.
|
659
|
+
ref_block_idxes (List[int]): A list of indices of reference blocks.
|
660
|
+
iou_threshold (float): The IOU threshold to determine if two blocks are considered adjacent.
|
661
|
+
Returns:
|
662
|
+
Int: The index of the previous block with same direction.
|
663
|
+
Int: The index of the following block with same direction.
|
664
|
+
"""
|
665
|
+
min_prev_block_distance = float("inf")
|
666
|
+
prev_block_index = None
|
667
|
+
min_post_block_distance = float("inf")
|
668
|
+
post_block_index = None
|
669
|
+
block = blocks[block_idx]
|
670
|
+
child_labels = [
|
671
|
+
"vision_footnote",
|
672
|
+
"sub_paragraph_title",
|
673
|
+
"doc_title_text",
|
674
|
+
"vision_title",
|
675
|
+
]
|
676
|
+
|
677
|
+
# find the nearest text block with same direction to the current block
|
678
|
+
for ref_block_idx in ref_block_idxes:
|
679
|
+
ref_block = blocks[ref_block_idx]
|
680
|
+
ref_block_direction = ref_block.direction
|
681
|
+
if ref_block.order_label in child_labels:
|
682
|
+
continue
|
683
|
+
match_block_iou = calculate_projection_overlap_ratio(
|
684
|
+
block.bbox,
|
685
|
+
ref_block.bbox,
|
686
|
+
ref_block_direction,
|
687
|
+
)
|
688
|
+
|
689
|
+
child_match_distance_tolerance_len = block.short_side_length / 10
|
690
|
+
|
691
|
+
if block.order_label == "vision":
|
692
|
+
if ref_block.num_of_lines == 1:
|
693
|
+
gap_tolerance_len = ref_block.short_side_length * 2
|
694
|
+
else:
|
695
|
+
gap_tolerance_len = block.short_side_length / 10
|
696
|
+
else:
|
697
|
+
gap_tolerance_len = block.short_side_length * 2
|
698
|
+
|
699
|
+
if match_block_iou >= iou_threshold:
|
700
|
+
prev_distance = (
|
701
|
+
block.secondary_direction_start_coordinate
|
702
|
+
- ref_block.secondary_direction_end_coordinate
|
703
|
+
+ child_match_distance_tolerance_len
|
704
|
+
) // 5 + ref_block.start_coordinate / 5000
|
705
|
+
next_distance = (
|
706
|
+
ref_block.secondary_direction_start_coordinate
|
707
|
+
- block.secondary_direction_end_coordinate
|
708
|
+
+ child_match_distance_tolerance_len
|
709
|
+
) // 5 + ref_block.start_coordinate / 5000
|
710
|
+
if (
|
711
|
+
ref_block.secondary_direction_end_coordinate
|
712
|
+
<= block.secondary_direction_start_coordinate
|
713
|
+
+ child_match_distance_tolerance_len
|
714
|
+
and prev_distance < min_prev_block_distance
|
715
|
+
):
|
716
|
+
min_prev_block_distance = prev_distance
|
717
|
+
if (
|
718
|
+
block.secondary_direction_start_coordinate
|
719
|
+
- ref_block.secondary_direction_end_coordinate
|
720
|
+
< gap_tolerance_len
|
721
|
+
):
|
722
|
+
prev_block_index = ref_block_idx
|
723
|
+
elif (
|
724
|
+
ref_block.secondary_direction_start_coordinate
|
725
|
+
> block.secondary_direction_end_coordinate
|
726
|
+
- child_match_distance_tolerance_len
|
727
|
+
and next_distance < min_post_block_distance
|
728
|
+
):
|
729
|
+
min_post_block_distance = next_distance
|
730
|
+
if (
|
731
|
+
ref_block.secondary_direction_start_coordinate
|
732
|
+
- block.secondary_direction_end_coordinate
|
733
|
+
< gap_tolerance_len
|
734
|
+
):
|
735
|
+
post_block_index = ref_block_idx
|
736
|
+
|
737
|
+
diff_dist = abs(min_prev_block_distance - min_post_block_distance)
|
738
|
+
|
739
|
+
# if the difference in distance is too large, only consider the nearest one
|
740
|
+
if diff_dist * 5 > block.short_side_length:
|
741
|
+
if min_prev_block_distance < min_post_block_distance:
|
742
|
+
post_block_index = None
|
743
|
+
else:
|
744
|
+
prev_block_index = None
|
745
|
+
|
746
|
+
return prev_block_index, post_block_index
|
747
|
+
|
748
|
+
|
749
|
+
def update_doc_title_child_blocks(
|
750
|
+
block: LayoutParsingBlock,
|
751
|
+
region: LayoutParsingRegion,
|
752
|
+
) -> None:
|
753
|
+
"""
|
754
|
+
Update the child blocks of a document title block.
|
755
|
+
|
756
|
+
The child blocks need to meet the following conditions:
|
757
|
+
1. They must be adjacent
|
758
|
+
2. They must have the same direction as the parent block.
|
759
|
+
3. Their short side length should be less than 80% of the parent's short side length.
|
760
|
+
4. Their long side length should be less than 150% of the parent's long side length.
|
761
|
+
5. The child block must be text block.
|
762
|
+
6. The nearest edge distance should be less than 2 times of the text line height.
|
763
|
+
|
764
|
+
Args:
|
765
|
+
blocks (List[LayoutParsingBlock]): overall blocks.
|
766
|
+
block (LayoutParsingBlock): document title block.
|
767
|
+
prev_idx (int): previous block index, None if not exist.
|
768
|
+
post_idx (int): post block index, None if not exist.
|
769
|
+
config (dict): configurations.
|
770
|
+
|
771
|
+
Returns:
|
772
|
+
None
|
773
|
+
|
774
|
+
"""
|
775
|
+
ref_blocks = [region.block_map[idx] for idx in region.normal_text_block_idxes]
|
776
|
+
overlap_threshold = XYCUT_SETTINGS["child_block_overlap_ratio_threshold"]
|
777
|
+
prev_blocks, post_blocks = get_nearest_blocks(
|
778
|
+
block, ref_blocks, overlap_threshold, block.direction
|
779
|
+
)
|
780
|
+
prev_block = None
|
781
|
+
post_block = None
|
782
|
+
|
783
|
+
if prev_blocks:
|
784
|
+
prev_block = prev_blocks[0]
|
785
|
+
if post_blocks:
|
786
|
+
post_block = post_blocks[0]
|
787
|
+
|
788
|
+
for ref_block in [prev_block, post_block]:
|
789
|
+
if ref_block is None:
|
790
|
+
continue
|
791
|
+
with_seem_direction = ref_block.direction == block.direction
|
792
|
+
|
793
|
+
short_side_length_condition = (
|
794
|
+
ref_block.short_side_length < block.short_side_length * 0.8
|
795
|
+
)
|
796
|
+
|
797
|
+
long_side_length_condition = (
|
798
|
+
ref_block.long_side_length < block.long_side_length
|
799
|
+
or ref_block.long_side_length > 1.5 * block.long_side_length
|
800
|
+
)
|
801
|
+
|
802
|
+
nearest_edge_distance = get_nearest_edge_distance(block.bbox, ref_block.bbox)
|
803
|
+
|
804
|
+
if (
|
805
|
+
with_seem_direction
|
806
|
+
and ref_block.label in BLOCK_LABEL_MAP["text_labels"]
|
807
|
+
and short_side_length_condition
|
808
|
+
and long_side_length_condition
|
809
|
+
and ref_block.num_of_lines < 3
|
810
|
+
and nearest_edge_distance < ref_block.text_line_height * 2
|
811
|
+
):
|
812
|
+
ref_block.order_label = "doc_title_text"
|
813
|
+
block.append_child_block(ref_block)
|
814
|
+
region.normal_text_block_idxes.remove(ref_block.index)
|
815
|
+
|
816
|
+
|
817
|
+
def update_paragraph_title_child_blocks(
|
818
|
+
block: LayoutParsingBlock,
|
819
|
+
region: LayoutParsingRegion,
|
820
|
+
) -> None:
|
821
|
+
"""
|
822
|
+
Update the child blocks of a paragraph title block.
|
823
|
+
|
824
|
+
The child blocks need to meet the following conditions:
|
825
|
+
1. They must be adjacent
|
826
|
+
2. They must have the same direction as the parent block.
|
827
|
+
3. The child block must be paragraph title block.
|
828
|
+
|
829
|
+
Args:
|
830
|
+
blocks (List[LayoutParsingBlock]): overall blocks.
|
831
|
+
block (LayoutParsingBlock): document title block.
|
832
|
+
prev_idx (int): previous block index, None if not exist.
|
833
|
+
post_idx (int): post block index, None if not exist.
|
834
|
+
config (dict): configurations.
|
835
|
+
|
836
|
+
Returns:
|
837
|
+
None
|
838
|
+
|
839
|
+
"""
|
840
|
+
if block.order_label == "sub_paragraph_title":
|
841
|
+
return
|
842
|
+
ref_blocks = [
|
843
|
+
region.block_map[idx]
|
844
|
+
for idx in region.paragraph_title_block_idxes + region.normal_text_block_idxes
|
845
|
+
]
|
846
|
+
overlap_threshold = XYCUT_SETTINGS["child_block_overlap_ratio_threshold"]
|
847
|
+
prev_blocks, post_blocks = get_nearest_blocks(
|
848
|
+
block, ref_blocks, overlap_threshold, block.direction
|
849
|
+
)
|
850
|
+
for ref_blocks in [prev_blocks, post_blocks]:
|
851
|
+
for ref_block in ref_blocks:
|
852
|
+
if ref_block.label not in BLOCK_LABEL_MAP["paragraph_title_labels"]:
|
853
|
+
break
|
854
|
+
min_text_line_height = min(
|
855
|
+
block.text_line_height, ref_block.text_line_height
|
856
|
+
)
|
857
|
+
nearest_edge_distance = get_nearest_edge_distance(
|
858
|
+
block.bbox, ref_block.bbox
|
859
|
+
)
|
860
|
+
with_seem_direction = ref_block.direction == block.direction
|
861
|
+
if (
|
862
|
+
with_seem_direction
|
863
|
+
and nearest_edge_distance <= min_text_line_height * 1.5
|
864
|
+
):
|
865
|
+
ref_block.order_label = "sub_paragraph_title"
|
866
|
+
block.append_child_block(ref_block)
|
867
|
+
region.paragraph_title_block_idxes.remove(ref_block.index)
|
868
|
+
|
869
|
+
|
870
|
+
def update_vision_child_blocks(
|
871
|
+
block: LayoutParsingBlock,
|
872
|
+
region: LayoutParsingRegion,
|
873
|
+
) -> None:
|
874
|
+
"""
|
875
|
+
Update the child blocks of a paragraph title block.
|
876
|
+
|
877
|
+
The child blocks need to meet the following conditions:
|
878
|
+
- For Both:
|
879
|
+
1. They must be adjacent
|
880
|
+
2. The child block must be vision_title or text block.
|
881
|
+
- For vision_title:
|
882
|
+
1. The distance between the child block and the parent block should be less than 1/2 of the parent's height.
|
883
|
+
- For text block:
|
884
|
+
1. The distance between the child block and the parent block should be less than 15.
|
885
|
+
2. The child short_side_length should be less than the parent's short side length.
|
886
|
+
3. The child long_side_length should be less than 50% of the parent's long side length.
|
887
|
+
4. The difference between their centers is very small.
|
888
|
+
|
889
|
+
Args:
|
890
|
+
blocks (List[LayoutParsingBlock]): overall blocks.
|
891
|
+
block (LayoutParsingBlock): document title block.
|
892
|
+
ref_block_idxes (List[int]): A list of indices of reference blocks.
|
893
|
+
prev_idx (int): previous block index, None if not exist.
|
894
|
+
post_idx (int): post block index, None if not exist.
|
895
|
+
config (dict): configurations.
|
896
|
+
|
897
|
+
Returns:
|
898
|
+
None
|
899
|
+
|
900
|
+
"""
|
901
|
+
ref_blocks = [
|
902
|
+
region.block_map[idx]
|
903
|
+
for idx in region.normal_text_block_idxes + region.vision_title_block_idxes
|
904
|
+
]
|
905
|
+
overlap_threshold = XYCUT_SETTINGS["child_block_overlap_ratio_threshold"]
|
906
|
+
has_vision_footnote = False
|
907
|
+
has_vision_title = False
|
908
|
+
for direction in [block.direction, block.secondary_direction]:
|
909
|
+
prev_blocks, post_blocks = get_nearest_blocks(
|
910
|
+
block, ref_blocks, overlap_threshold, direction
|
911
|
+
)
|
912
|
+
for ref_block in prev_blocks:
|
913
|
+
if (
|
914
|
+
ref_block.label
|
915
|
+
not in BLOCK_LABEL_MAP["text_labels"]
|
916
|
+
+ BLOCK_LABEL_MAP["vision_title_labels"]
|
917
|
+
):
|
918
|
+
break
|
919
|
+
nearest_edge_distance = get_nearest_edge_distance(
|
920
|
+
block.bbox, ref_block.bbox
|
921
|
+
)
|
922
|
+
block_center = block.get_centroid()
|
923
|
+
ref_block_center = ref_block.get_centroid()
|
924
|
+
if (
|
925
|
+
ref_block.label in BLOCK_LABEL_MAP["vision_title_labels"]
|
926
|
+
and nearest_edge_distance <= ref_block.text_line_height * 2
|
927
|
+
):
|
928
|
+
has_vision_title = True
|
929
|
+
ref_block.order_label = "vision_title"
|
930
|
+
block.append_child_block(ref_block)
|
931
|
+
region.vision_title_block_idxes.remove(ref_block.index)
|
932
|
+
if ref_block.label in BLOCK_LABEL_MAP["text_labels"]:
|
933
|
+
if (
|
934
|
+
not has_vision_footnote
|
935
|
+
and ref_block.direction == block.direction
|
936
|
+
and ref_block.long_side_length < block.long_side_length
|
937
|
+
):
|
938
|
+
if (
|
939
|
+
(
|
940
|
+
nearest_edge_distance <= block.text_line_height * 2
|
941
|
+
and ref_block.short_side_length < block.short_side_length
|
942
|
+
and ref_block.long_side_length
|
943
|
+
< 0.5 * block.long_side_length
|
944
|
+
and abs(block_center[0] - ref_block_center[0]) < 10
|
945
|
+
)
|
946
|
+
or (
|
947
|
+
block.bbox[0] - ref_block.bbox[0] < 10
|
948
|
+
and ref_block.num_of_lines == 1
|
949
|
+
)
|
950
|
+
or (
|
951
|
+
block.bbox[2] - ref_block.bbox[2] < 10
|
952
|
+
and ref_block.num_of_lines == 1
|
953
|
+
)
|
954
|
+
):
|
955
|
+
has_vision_footnote = True
|
956
|
+
ref_block.order_label = "vision_footnote"
|
957
|
+
block.append_child_block(ref_block)
|
958
|
+
region.normal_text_block_idxes.remove(ref_block.index)
|
959
|
+
break
|
960
|
+
for ref_block in post_blocks:
|
961
|
+
if (
|
962
|
+
has_vision_footnote
|
963
|
+
and ref_block.label in BLOCK_LABEL_MAP["text_labels"]
|
964
|
+
):
|
965
|
+
break
|
966
|
+
nearest_edge_distance = get_nearest_edge_distance(
|
967
|
+
block.bbox, ref_block.bbox
|
968
|
+
)
|
969
|
+
block_center = block.get_centroid()
|
970
|
+
ref_block_center = ref_block.get_centroid()
|
971
|
+
if (
|
972
|
+
ref_block.label in BLOCK_LABEL_MAP["vision_title_labels"]
|
973
|
+
and nearest_edge_distance <= ref_block.text_line_height * 2
|
974
|
+
):
|
975
|
+
has_vision_title = True
|
976
|
+
ref_block.order_label = "vision_title"
|
977
|
+
block.append_child_block(ref_block)
|
978
|
+
region.vision_title_block_idxes.remove(ref_block.index)
|
979
|
+
if ref_block.label in BLOCK_LABEL_MAP["text_labels"]:
|
980
|
+
if (
|
981
|
+
not has_vision_footnote
|
982
|
+
and nearest_edge_distance <= block.text_line_height * 2
|
983
|
+
and ref_block.short_side_length < block.short_side_length
|
984
|
+
and ref_block.long_side_length < 0.5 * block.long_side_length
|
985
|
+
and ref_block.direction == block.direction
|
986
|
+
and (
|
987
|
+
abs(block_center[0] - ref_block_center[0]) < 10
|
988
|
+
or (
|
989
|
+
block.bbox[0] - ref_block.bbox[0] < 10
|
990
|
+
and ref_block.num_of_lines == 1
|
991
|
+
)
|
992
|
+
or (
|
993
|
+
block.bbox[2] - ref_block.bbox[2] < 10
|
994
|
+
and ref_block.num_of_lines == 1
|
995
|
+
)
|
996
|
+
)
|
997
|
+
):
|
998
|
+
has_vision_footnote = True
|
999
|
+
ref_block.order_label = "vision_footnote"
|
1000
|
+
block.append_child_block(ref_block)
|
1001
|
+
region.normal_text_block_idxes.remove(ref_block.index)
|
1002
|
+
break
|
1003
|
+
if has_vision_title:
|
1004
|
+
break
|
1005
|
+
|
1006
|
+
|
1007
|
+
def calculate_discontinuous_projection(
|
1008
|
+
boxes, direction="horizontal", return_num=False
|
1009
|
+
) -> List:
|
1010
|
+
"""
|
1011
|
+
Calculate the discontinuous projection of boxes along the specified direction.
|
1012
|
+
|
1013
|
+
Args:
|
1014
|
+
boxes (ndarray): Array of bounding boxes represented by [[x_min, y_min, x_max, y_max]].
|
1015
|
+
direction (str): direction along which to perform the projection ('horizontal' or 'vertical').
|
1016
|
+
|
1017
|
+
Returns:
|
1018
|
+
list: List of tuples representing the merged intervals.
|
1019
|
+
"""
|
1020
|
+
boxes = np.array(boxes)
|
1021
|
+
if direction == "horizontal":
|
1022
|
+
intervals = boxes[:, [0, 2]]
|
1023
|
+
elif direction == "vertical":
|
1024
|
+
intervals = boxes[:, [1, 3]]
|
1025
|
+
else:
|
1026
|
+
raise ValueError("direction must be 'horizontal' or 'vertical'")
|
1027
|
+
|
1028
|
+
intervals = intervals[np.argsort(intervals[:, 0])]
|
1029
|
+
|
1030
|
+
merged_intervals = []
|
1031
|
+
num = 1
|
1032
|
+
current_start, current_end = intervals[0]
|
1033
|
+
num_list = []
|
1034
|
+
|
1035
|
+
for start, end in intervals[1:]:
|
1036
|
+
if start <= current_end:
|
1037
|
+
num += 1
|
1038
|
+
current_end = max(current_end, end)
|
1039
|
+
else:
|
1040
|
+
num_list.append(num)
|
1041
|
+
merged_intervals.append((current_start, current_end))
|
1042
|
+
num = 1
|
1043
|
+
current_start, current_end = start, end
|
1044
|
+
|
1045
|
+
num_list.append(num)
|
1046
|
+
merged_intervals.append((current_start, current_end))
|
1047
|
+
if return_num:
|
1048
|
+
return merged_intervals, num_list
|
1049
|
+
return merged_intervals
|
1050
|
+
|
1051
|
+
|
1052
|
+
def is_projection_consistent(blocks, intervals, direction="horizontal"):
|
1053
|
+
|
1054
|
+
for interval in intervals:
|
1055
|
+
if direction == "horizontal":
|
1056
|
+
start_index, stop_index = 0, 2
|
1057
|
+
interval_box = [interval[0], 0, interval[1], 1]
|
1058
|
+
else:
|
1059
|
+
start_index, stop_index = 1, 3
|
1060
|
+
interval_box = [0, interval[0], 1, interval[1]]
|
1061
|
+
same_interval_bboxes = []
|
1062
|
+
for block in blocks:
|
1063
|
+
overlap_ratio = calculate_projection_overlap_ratio(
|
1064
|
+
interval_box, block.bbox, direction=direction
|
1065
|
+
)
|
1066
|
+
if overlap_ratio > 0 and block.label in BLOCK_LABEL_MAP["text_labels"]:
|
1067
|
+
same_interval_bboxes.append(block.bbox)
|
1068
|
+
start_coordinates = [bbox[start_index] for bbox in same_interval_bboxes]
|
1069
|
+
if start_coordinates:
|
1070
|
+
min_start_coordinate = min(start_coordinates)
|
1071
|
+
max_start_coordinate = max(start_coordinates)
|
1072
|
+
is_start_consistent = (
|
1073
|
+
False
|
1074
|
+
if max_start_coordinate - min_start_coordinate
|
1075
|
+
>= abs(interval[0] - interval[1]) * 0.05
|
1076
|
+
else True
|
1077
|
+
)
|
1078
|
+
stop_coordinates = [bbox[stop_index] for bbox in same_interval_bboxes]
|
1079
|
+
min_stop_coordinate = min(stop_coordinates)
|
1080
|
+
max_stop_coordinate = max(stop_coordinates)
|
1081
|
+
if (
|
1082
|
+
max_stop_coordinate - min_stop_coordinate
|
1083
|
+
>= abs(interval[0] - interval[1]) * 0.05
|
1084
|
+
and is_start_consistent
|
1085
|
+
):
|
1086
|
+
return False
|
1087
|
+
return True
|
1088
|
+
|
1089
|
+
|
1090
|
+
def shrink_overlapping_boxes(
|
1091
|
+
boxes, direction="horizontal", min_threshold=0, max_threshold=0.1
|
1092
|
+
) -> List:
|
1093
|
+
"""
|
1094
|
+
Shrink overlapping boxes along the specified direction.
|
1095
|
+
|
1096
|
+
Args:
|
1097
|
+
boxes (ndarray): Array of bounding boxes represented by [[x_min, y_min, x_max, y_max]].
|
1098
|
+
direction (str): direction along which to perform the shrinking ('horizontal' or 'vertical').
|
1099
|
+
min_threshold (float): Minimum threshold for shrinking. Default is 0.
|
1100
|
+
max_threshold (float): Maximum threshold for shrinking. Default is 0.2.
|
1101
|
+
|
1102
|
+
Returns:
|
1103
|
+
list: List of tuples representing the merged intervals.
|
1104
|
+
"""
|
1105
|
+
current_block = boxes[0]
|
1106
|
+
for block in boxes[1:]:
|
1107
|
+
x1, y1, x2, y2 = current_block.bbox
|
1108
|
+
x1_prime, y1_prime, x2_prime, y2_prime = block.bbox
|
1109
|
+
cut_iou = calculate_projection_overlap_ratio(
|
1110
|
+
current_block.bbox, block.bbox, direction=direction
|
1111
|
+
)
|
1112
|
+
match_iou = calculate_projection_overlap_ratio(
|
1113
|
+
current_block.bbox,
|
1114
|
+
block.bbox,
|
1115
|
+
direction="horizontal" if direction == "vertical" else "vertical",
|
1116
|
+
)
|
1117
|
+
if direction == "vertical":
|
1118
|
+
if (
|
1119
|
+
(match_iou > 0 and cut_iou > min_threshold and cut_iou < max_threshold)
|
1120
|
+
or y2 == y1_prime
|
1121
|
+
or abs(y2 - y1_prime) <= 3
|
1122
|
+
):
|
1123
|
+
overlap_y_min = max(y1, y1_prime)
|
1124
|
+
overlap_y_max = min(y2, y2_prime)
|
1125
|
+
split_y = int((overlap_y_min + overlap_y_max) / 2)
|
1126
|
+
overlap_y_min = split_y - 1
|
1127
|
+
overlap_y_max = split_y + 1
|
1128
|
+
current_block.bbox = [x1, y1, x2, overlap_y_min]
|
1129
|
+
block.bbox = [x1_prime, overlap_y_max, x2_prime, y2_prime]
|
1130
|
+
else:
|
1131
|
+
if (
|
1132
|
+
(match_iou > 0 and cut_iou > min_threshold and cut_iou < max_threshold)
|
1133
|
+
or x2 == x1_prime
|
1134
|
+
or abs(x2 - x1_prime) <= 3
|
1135
|
+
):
|
1136
|
+
overlap_x_min = max(x1, x1_prime)
|
1137
|
+
overlap_x_max = min(x2, x2_prime)
|
1138
|
+
split_x = int((overlap_x_min + overlap_x_max) / 2)
|
1139
|
+
overlap_x_min = split_x - 1
|
1140
|
+
overlap_x_max = split_x + 1
|
1141
|
+
current_block.bbox = [x1, y1, overlap_x_min, y2]
|
1142
|
+
block.bbox = [overlap_x_max, y1_prime, x2_prime, y2_prime]
|
1143
|
+
current_block = block
|
1144
|
+
return boxes
|