paddlex 3.0.0rc1__py3-none-any.whl → 3.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- paddlex/.version +1 -1
- paddlex/__init__.py +1 -1
- paddlex/configs/modules/chart_parsing/PP-Chart2Table.yaml +13 -0
- paddlex/configs/modules/doc_vlm/PP-DocBee2-3B.yaml +14 -0
- paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-L.yaml +40 -0
- paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-M.yaml +40 -0
- paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-S.yaml +40 -0
- paddlex/configs/modules/layout_detection/PP-DocBlockLayout.yaml +40 -0
- paddlex/configs/modules/layout_detection/PP-DocLayout-L.yaml +2 -2
- paddlex/configs/modules/layout_detection/PP-DocLayout-M.yaml +2 -2
- paddlex/configs/modules/layout_detection/PP-DocLayout-S.yaml +2 -2
- paddlex/configs/modules/layout_detection/PP-DocLayout_plus-L.yaml +40 -0
- paddlex/configs/modules/text_detection/PP-OCRv5_mobile_det.yaml +40 -0
- paddlex/configs/modules/text_detection/PP-OCRv5_server_det.yaml +40 -0
- paddlex/configs/modules/text_recognition/PP-OCRv5_mobile_rec.yaml +39 -0
- paddlex/configs/modules/text_recognition/PP-OCRv5_server_rec.yaml +39 -0
- paddlex/configs/modules/textline_orientation/PP-LCNet_x1_0_textline_ori.yaml +41 -0
- paddlex/configs/pipelines/OCR.yaml +7 -6
- paddlex/configs/pipelines/PP-ChatOCRv3-doc.yaml +3 -1
- paddlex/configs/pipelines/PP-ChatOCRv4-doc.yaml +91 -34
- paddlex/configs/pipelines/PP-StructureV3.yaml +72 -72
- paddlex/configs/pipelines/doc_understanding.yaml +1 -1
- paddlex/configs/pipelines/formula_recognition.yaml +2 -2
- paddlex/configs/pipelines/layout_parsing.yaml +3 -2
- paddlex/configs/pipelines/seal_recognition.yaml +1 -0
- paddlex/configs/pipelines/table_recognition.yaml +2 -1
- paddlex/configs/pipelines/table_recognition_v2.yaml +7 -1
- paddlex/hpip_links.html +20 -20
- paddlex/inference/common/batch_sampler/doc_vlm_batch_sampler.py +33 -10
- paddlex/inference/common/batch_sampler/image_batch_sampler.py +34 -25
- paddlex/inference/common/result/mixin.py +19 -12
- paddlex/inference/models/base/predictor/base_predictor.py +2 -8
- paddlex/inference/models/common/static_infer.py +29 -73
- paddlex/inference/models/common/tokenizer/__init__.py +2 -0
- paddlex/inference/models/common/tokenizer/clip_tokenizer.py +1 -1
- paddlex/inference/models/common/tokenizer/gpt_tokenizer.py +2 -2
- paddlex/inference/models/common/tokenizer/qwen2_5_tokenizer.py +112 -0
- paddlex/inference/models/common/tokenizer/qwen2_tokenizer.py +7 -1
- paddlex/inference/models/common/tokenizer/qwen_tokenizer.py +288 -0
- paddlex/inference/models/common/tokenizer/tokenizer_utils.py +13 -13
- paddlex/inference/models/common/tokenizer/tokenizer_utils_base.py +3 -3
- paddlex/inference/models/common/tokenizer/vocab.py +7 -7
- paddlex/inference/models/common/ts/funcs.py +19 -8
- paddlex/inference/models/common/vlm/conversion_utils.py +99 -0
- paddlex/inference/models/common/vlm/fusion_ops.py +205 -0
- paddlex/inference/models/common/vlm/generation/configuration_utils.py +1 -1
- paddlex/inference/models/common/vlm/generation/logits_process.py +1 -1
- paddlex/inference/models/common/vlm/generation/utils.py +1 -1
- paddlex/inference/models/common/vlm/transformers/configuration_utils.py +3 -3
- paddlex/inference/models/common/vlm/transformers/conversion_utils.py +3 -3
- paddlex/inference/models/common/vlm/transformers/model_outputs.py +2 -2
- paddlex/inference/models/common/vlm/transformers/model_utils.py +7 -31
- paddlex/inference/models/doc_vlm/modeling/GOT_ocr_2_0.py +830 -0
- paddlex/inference/models/doc_vlm/modeling/__init__.py +2 -0
- paddlex/inference/models/doc_vlm/modeling/qwen2.py +1606 -0
- paddlex/inference/models/doc_vlm/modeling/qwen2_5_vl.py +3006 -0
- paddlex/inference/models/doc_vlm/modeling/qwen2_vl.py +0 -105
- paddlex/inference/models/doc_vlm/predictor.py +79 -24
- paddlex/inference/models/doc_vlm/processors/GOT_ocr_2_0.py +97 -0
- paddlex/inference/models/doc_vlm/processors/__init__.py +2 -0
- paddlex/inference/models/doc_vlm/processors/common.py +189 -0
- paddlex/inference/models/doc_vlm/processors/qwen2_5_vl.py +548 -0
- paddlex/inference/models/doc_vlm/processors/qwen2_vl.py +21 -176
- paddlex/inference/models/formula_recognition/predictor.py +8 -2
- paddlex/inference/models/formula_recognition/processors.py +90 -77
- paddlex/inference/models/formula_recognition/result.py +28 -27
- paddlex/inference/models/image_feature/processors.py +3 -4
- paddlex/inference/models/keypoint_detection/predictor.py +3 -0
- paddlex/inference/models/object_detection/predictor.py +2 -0
- paddlex/inference/models/object_detection/processors.py +28 -3
- paddlex/inference/models/object_detection/utils.py +2 -0
- paddlex/inference/models/table_structure_recognition/result.py +0 -10
- paddlex/inference/models/text_detection/predictor.py +8 -0
- paddlex/inference/models/text_detection/processors.py +44 -10
- paddlex/inference/models/text_detection/result.py +0 -10
- paddlex/inference/models/text_recognition/result.py +1 -1
- paddlex/inference/pipelines/__init__.py +9 -5
- paddlex/inference/pipelines/_parallel.py +172 -0
- paddlex/inference/pipelines/anomaly_detection/pipeline.py +16 -6
- paddlex/inference/pipelines/attribute_recognition/pipeline.py +11 -1
- paddlex/inference/pipelines/base.py +14 -4
- paddlex/inference/pipelines/components/faisser.py +1 -1
- paddlex/inference/pipelines/doc_preprocessor/pipeline.py +53 -27
- paddlex/inference/pipelines/formula_recognition/pipeline.py +120 -82
- paddlex/inference/pipelines/formula_recognition/result.py +1 -11
- paddlex/inference/pipelines/image_classification/pipeline.py +16 -6
- paddlex/inference/pipelines/image_multilabel_classification/pipeline.py +16 -6
- paddlex/inference/pipelines/instance_segmentation/pipeline.py +16 -6
- paddlex/inference/pipelines/keypoint_detection/pipeline.py +16 -6
- paddlex/inference/pipelines/layout_parsing/layout_objects.py +859 -0
- paddlex/inference/pipelines/layout_parsing/pipeline.py +34 -47
- paddlex/inference/pipelines/layout_parsing/pipeline_v2.py +832 -260
- paddlex/inference/pipelines/layout_parsing/result.py +4 -17
- paddlex/inference/pipelines/layout_parsing/result_v2.py +259 -245
- paddlex/inference/pipelines/layout_parsing/setting.py +88 -0
- paddlex/inference/pipelines/layout_parsing/utils.py +391 -2028
- paddlex/inference/pipelines/layout_parsing/xycut_enhanced/__init__.py +16 -0
- paddlex/inference/pipelines/layout_parsing/xycut_enhanced/utils.py +1199 -0
- paddlex/inference/pipelines/layout_parsing/xycut_enhanced/xycuts.py +615 -0
- paddlex/inference/pipelines/m_3d_bev_detection/pipeline.py +2 -2
- paddlex/inference/pipelines/multilingual_speech_recognition/pipeline.py +2 -2
- paddlex/inference/pipelines/object_detection/pipeline.py +16 -6
- paddlex/inference/pipelines/ocr/pipeline.py +127 -70
- paddlex/inference/pipelines/ocr/result.py +21 -18
- paddlex/inference/pipelines/open_vocabulary_detection/pipeline.py +2 -2
- paddlex/inference/pipelines/open_vocabulary_segmentation/pipeline.py +2 -2
- paddlex/inference/pipelines/pp_chatocr/pipeline_base.py +2 -2
- paddlex/inference/pipelines/pp_chatocr/pipeline_v3.py +2 -5
- paddlex/inference/pipelines/pp_chatocr/pipeline_v4.py +6 -6
- paddlex/inference/pipelines/rotated_object_detection/pipeline.py +16 -6
- paddlex/inference/pipelines/seal_recognition/pipeline.py +109 -53
- paddlex/inference/pipelines/semantic_segmentation/pipeline.py +16 -6
- paddlex/inference/pipelines/small_object_detection/pipeline.py +16 -6
- paddlex/inference/pipelines/table_recognition/pipeline.py +26 -18
- paddlex/inference/pipelines/table_recognition/pipeline_v2.py +624 -53
- paddlex/inference/pipelines/table_recognition/result.py +1 -1
- paddlex/inference/pipelines/table_recognition/table_recognition_post_processing_v2.py +9 -5
- paddlex/inference/pipelines/ts_anomaly_detection/pipeline.py +2 -2
- paddlex/inference/pipelines/ts_classification/pipeline.py +2 -2
- paddlex/inference/pipelines/ts_forecasting/pipeline.py +2 -2
- paddlex/inference/pipelines/video_classification/pipeline.py +2 -2
- paddlex/inference/pipelines/video_detection/pipeline.py +2 -2
- paddlex/inference/serving/basic_serving/_app.py +46 -13
- paddlex/inference/serving/basic_serving/_pipeline_apps/_common/common.py +5 -1
- paddlex/inference/serving/basic_serving/_pipeline_apps/layout_parsing.py +0 -1
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv3_doc.py +0 -1
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv4_doc.py +1 -1
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_structurev3.py +6 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition.py +1 -5
- paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition_v2.py +4 -5
- paddlex/inference/serving/infra/utils.py +20 -22
- paddlex/inference/serving/schemas/formula_recognition.py +1 -1
- paddlex/inference/serving/schemas/layout_parsing.py +1 -2
- paddlex/inference/serving/schemas/pp_chatocrv3_doc.py +1 -2
- paddlex/inference/serving/schemas/pp_chatocrv4_doc.py +2 -2
- paddlex/inference/serving/schemas/pp_structurev3.py +10 -6
- paddlex/inference/serving/schemas/seal_recognition.py +1 -1
- paddlex/inference/serving/schemas/table_recognition.py +2 -6
- paddlex/inference/serving/schemas/table_recognition_v2.py +5 -6
- paddlex/inference/utils/hpi.py +30 -16
- paddlex/inference/utils/hpi_model_info_collection.json +666 -162
- paddlex/inference/utils/io/readers.py +12 -12
- paddlex/inference/utils/misc.py +20 -0
- paddlex/inference/utils/mkldnn_blocklist.py +59 -0
- paddlex/inference/utils/official_models.py +140 -5
- paddlex/inference/utils/pp_option.py +74 -9
- paddlex/model.py +2 -2
- paddlex/modules/__init__.py +1 -1
- paddlex/modules/anomaly_detection/evaluator.py +2 -2
- paddlex/modules/base/__init__.py +1 -1
- paddlex/modules/base/evaluator.py +5 -5
- paddlex/modules/base/trainer.py +1 -1
- paddlex/modules/doc_vlm/dataset_checker.py +2 -2
- paddlex/modules/doc_vlm/evaluator.py +2 -2
- paddlex/modules/doc_vlm/exportor.py +2 -2
- paddlex/modules/doc_vlm/model_list.py +1 -1
- paddlex/modules/doc_vlm/trainer.py +2 -2
- paddlex/modules/face_recognition/evaluator.py +2 -2
- paddlex/modules/formula_recognition/evaluator.py +5 -2
- paddlex/modules/formula_recognition/model_list.py +3 -0
- paddlex/modules/formula_recognition/trainer.py +3 -0
- paddlex/modules/general_recognition/evaluator.py +1 -1
- paddlex/modules/image_classification/evaluator.py +2 -2
- paddlex/modules/image_classification/model_list.py +1 -0
- paddlex/modules/instance_segmentation/evaluator.py +1 -1
- paddlex/modules/keypoint_detection/evaluator.py +1 -1
- paddlex/modules/m_3d_bev_detection/evaluator.py +2 -2
- paddlex/modules/multilabel_classification/evaluator.py +2 -2
- paddlex/modules/object_detection/dataset_checker/dataset_src/convert_dataset.py +4 -4
- paddlex/modules/object_detection/evaluator.py +2 -2
- paddlex/modules/object_detection/model_list.py +2 -0
- paddlex/modules/semantic_segmentation/dataset_checker/__init__.py +12 -2
- paddlex/modules/semantic_segmentation/evaluator.py +2 -2
- paddlex/modules/table_recognition/evaluator.py +2 -2
- paddlex/modules/text_detection/evaluator.py +2 -2
- paddlex/modules/text_detection/model_list.py +2 -0
- paddlex/modules/text_recognition/evaluator.py +2 -2
- paddlex/modules/text_recognition/model_list.py +2 -0
- paddlex/modules/ts_anomaly_detection/evaluator.py +2 -2
- paddlex/modules/ts_classification/dataset_checker/dataset_src/split_dataset.py +1 -1
- paddlex/modules/ts_classification/evaluator.py +2 -2
- paddlex/modules/ts_forecast/evaluator.py +2 -2
- paddlex/modules/video_classification/evaluator.py +2 -2
- paddlex/modules/video_detection/evaluator.py +2 -2
- paddlex/ops/__init__.py +8 -5
- paddlex/paddlex_cli.py +19 -13
- paddlex/repo_apis/Paddle3D_api/bev_fusion/model.py +2 -2
- paddlex/repo_apis/PaddleClas_api/cls/config.py +1 -1
- paddlex/repo_apis/PaddleClas_api/cls/model.py +1 -1
- paddlex/repo_apis/PaddleClas_api/cls/register.py +10 -0
- paddlex/repo_apis/PaddleClas_api/cls/runner.py +1 -1
- paddlex/repo_apis/PaddleDetection_api/instance_seg/model.py +1 -1
- paddlex/repo_apis/PaddleDetection_api/instance_seg/runner.py +1 -1
- paddlex/repo_apis/PaddleDetection_api/object_det/config.py +1 -1
- paddlex/repo_apis/PaddleDetection_api/object_det/model.py +1 -1
- paddlex/repo_apis/PaddleDetection_api/object_det/official_categories.py +25 -0
- paddlex/repo_apis/PaddleDetection_api/object_det/register.py +30 -0
- paddlex/repo_apis/PaddleDetection_api/object_det/runner.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/formula_rec/config.py +3 -3
- paddlex/repo_apis/PaddleOCR_api/formula_rec/model.py +5 -9
- paddlex/repo_apis/PaddleOCR_api/formula_rec/register.py +27 -0
- paddlex/repo_apis/PaddleOCR_api/formula_rec/runner.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/table_rec/model.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/table_rec/runner.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/text_det/model.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/text_det/register.py +18 -0
- paddlex/repo_apis/PaddleOCR_api/text_det/runner.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/text_rec/config.py +3 -3
- paddlex/repo_apis/PaddleOCR_api/text_rec/model.py +5 -9
- paddlex/repo_apis/PaddleOCR_api/text_rec/register.py +18 -0
- paddlex/repo_apis/PaddleOCR_api/text_rec/runner.py +1 -1
- paddlex/repo_apis/PaddleSeg_api/seg/model.py +1 -1
- paddlex/repo_apis/PaddleSeg_api/seg/runner.py +1 -1
- paddlex/repo_apis/PaddleTS_api/ts_ad/config.py +3 -3
- paddlex/repo_apis/PaddleTS_api/ts_cls/config.py +2 -2
- paddlex/repo_apis/PaddleTS_api/ts_fc/config.py +4 -4
- paddlex/repo_apis/PaddleVideo_api/video_cls/config.py +1 -1
- paddlex/repo_apis/PaddleVideo_api/video_cls/model.py +1 -1
- paddlex/repo_apis/PaddleVideo_api/video_cls/runner.py +1 -1
- paddlex/repo_apis/PaddleVideo_api/video_det/config.py +1 -1
- paddlex/repo_apis/PaddleVideo_api/video_det/model.py +1 -1
- paddlex/repo_apis/PaddleVideo_api/video_det/runner.py +1 -1
- paddlex/repo_apis/base/config.py +1 -1
- paddlex/repo_manager/core.py +3 -3
- paddlex/repo_manager/meta.py +6 -2
- paddlex/repo_manager/repo.py +17 -16
- paddlex/utils/custom_device_list.py +26 -2
- paddlex/utils/deps.py +3 -3
- paddlex/utils/device.py +5 -13
- paddlex/utils/env.py +4 -0
- paddlex/utils/flags.py +11 -4
- paddlex/utils/fonts/__init__.py +34 -4
- paddlex/utils/misc.py +1 -1
- paddlex/utils/subclass_register.py +2 -2
- {paddlex-3.0.0rc1.dist-info → paddlex-3.0.2.dist-info}/METADATA +349 -208
- {paddlex-3.0.0rc1.dist-info → paddlex-3.0.2.dist-info}/RECORD +240 -211
- {paddlex-3.0.0rc1.dist-info → paddlex-3.0.2.dist-info}/WHEEL +1 -1
- {paddlex-3.0.0rc1.dist-info → paddlex-3.0.2.dist-info}/entry_points.txt +1 -0
- {paddlex-3.0.0rc1.dist-info/licenses → paddlex-3.0.2.dist-info}/LICENSE +0 -0
- {paddlex-3.0.0rc1.dist-info → paddlex-3.0.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1199 @@
|
|
1
|
+
# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
from typing import List, Tuple
|
16
|
+
|
17
|
+
import numpy as np
|
18
|
+
|
19
|
+
from ..layout_objects import LayoutBlock, LayoutRegion
|
20
|
+
from ..setting import BLOCK_LABEL_MAP, XYCUT_SETTINGS
|
21
|
+
from ..utils import (
|
22
|
+
calculate_overlap_ratio,
|
23
|
+
calculate_projection_overlap_ratio,
|
24
|
+
get_seg_flag,
|
25
|
+
)
|
26
|
+
|
27
|
+
|
28
|
+
def get_nearest_edge_distance(
|
29
|
+
bbox1: List[int],
|
30
|
+
bbox2: List[int],
|
31
|
+
weight: List[float] = [1.0, 1.0, 1.0, 1.0],
|
32
|
+
) -> Tuple[float]:
|
33
|
+
"""
|
34
|
+
Calculate the nearest edge distance between two bounding boxes, considering directional weights.
|
35
|
+
|
36
|
+
Args:
|
37
|
+
bbox1 (list): The bounding box coordinates [x1, y1, x2, y2] of the input object.
|
38
|
+
bbox2 (list): The bounding box coordinates [x1', y1', x2', y2'] of the object to match against.
|
39
|
+
weight (list, optional): directional weights for the edge distances [left, right, up, down]. Defaults to [1, 1, 1, 1].
|
40
|
+
|
41
|
+
Returns:
|
42
|
+
float: The calculated minimum edge distance between the bounding boxes.
|
43
|
+
"""
|
44
|
+
x1, y1, x2, y2 = bbox1
|
45
|
+
x1_prime, y1_prime, x2_prime, y2_prime = bbox2
|
46
|
+
min_x_distance, min_y_distance = 0, 0
|
47
|
+
horizontal_iou = calculate_projection_overlap_ratio(bbox1, bbox2, "horizontal")
|
48
|
+
vertical_iou = calculate_projection_overlap_ratio(bbox1, bbox2, "vertical")
|
49
|
+
if horizontal_iou > 0 and vertical_iou > 0:
|
50
|
+
return 0.0
|
51
|
+
if horizontal_iou == 0:
|
52
|
+
min_x_distance = min(abs(x1 - x2_prime), abs(x2 - x1_prime)) * (
|
53
|
+
weight[0] if x2 < x1_prime else weight[1]
|
54
|
+
)
|
55
|
+
if vertical_iou == 0:
|
56
|
+
min_y_distance = min(abs(y1 - y2_prime), abs(y2 - y1_prime)) * (
|
57
|
+
weight[2] if y2 < y1_prime else weight[3]
|
58
|
+
)
|
59
|
+
|
60
|
+
return min_x_distance + min_y_distance
|
61
|
+
|
62
|
+
|
63
|
+
def projection_by_bboxes(boxes: np.ndarray, axis: int) -> np.ndarray:
|
64
|
+
"""
|
65
|
+
Generate a 1D projection histogram from bounding boxes along a specified axis.
|
66
|
+
|
67
|
+
Args:
|
68
|
+
boxes: A (N, 4) array of bounding boxes defined by [x_min, y_min, x_max, y_max].
|
69
|
+
axis: Axis for projection; 0 for horizontal (x-axis), 1 for vertical (y-axis).
|
70
|
+
|
71
|
+
Returns:
|
72
|
+
A 1D numpy array representing the projection histogram based on bounding box intervals.
|
73
|
+
"""
|
74
|
+
assert axis in [0, 1]
|
75
|
+
|
76
|
+
if np.min(boxes[:, axis::2]) < 0:
|
77
|
+
max_length = abs(np.min(boxes[:, axis::2]))
|
78
|
+
else:
|
79
|
+
max_length = np.max(boxes[:, axis::2])
|
80
|
+
|
81
|
+
projection = np.zeros(max_length, dtype=int)
|
82
|
+
|
83
|
+
# Increment projection histogram over the interval defined by each bounding box
|
84
|
+
for start, end in boxes[:, axis::2]:
|
85
|
+
start = abs(start)
|
86
|
+
end = abs(end)
|
87
|
+
projection[start:end] += 1
|
88
|
+
|
89
|
+
return projection
|
90
|
+
|
91
|
+
|
92
|
+
def split_projection_profile(arr_values: np.ndarray, min_value: float, min_gap: float):
|
93
|
+
"""
|
94
|
+
Split the projection profile into segments based on specified thresholds.
|
95
|
+
|
96
|
+
Args:
|
97
|
+
arr_values: 1D array representing the projection profile.
|
98
|
+
min_value: Minimum value threshold to consider a profile segment significant.
|
99
|
+
min_gap: Minimum gap width to consider a separation between segments.
|
100
|
+
|
101
|
+
Returns:
|
102
|
+
A tuple of start and end indices for each segment that meets the criteria.
|
103
|
+
"""
|
104
|
+
# Identify indices where the projection exceeds the minimum value
|
105
|
+
significant_indices = np.where(arr_values > min_value)[0]
|
106
|
+
if not len(significant_indices):
|
107
|
+
return
|
108
|
+
|
109
|
+
# Calculate gaps between significant indices
|
110
|
+
index_diffs = significant_indices[1:] - significant_indices[:-1]
|
111
|
+
gap_indices = np.where(index_diffs > min_gap)[0]
|
112
|
+
|
113
|
+
# Determine start and end indices of segments
|
114
|
+
segment_starts = np.insert(
|
115
|
+
significant_indices[gap_indices + 1],
|
116
|
+
0,
|
117
|
+
significant_indices[0],
|
118
|
+
)
|
119
|
+
segment_ends = np.append(
|
120
|
+
significant_indices[gap_indices],
|
121
|
+
significant_indices[-1] + 1,
|
122
|
+
)
|
123
|
+
|
124
|
+
return segment_starts, segment_ends
|
125
|
+
|
126
|
+
|
127
|
+
def recursive_yx_cut(
|
128
|
+
boxes: np.ndarray, indices: List[int], res: List[int], min_gap: int = 1
|
129
|
+
):
|
130
|
+
"""
|
131
|
+
Recursively project and segment bounding boxes, starting with Y-axis and followed by X-axis.
|
132
|
+
|
133
|
+
Args:
|
134
|
+
boxes: A (N, 4) array representing bounding boxes.
|
135
|
+
indices: List of indices indicating the original position of boxes.
|
136
|
+
res: List to store indices of the final segmented bounding boxes.
|
137
|
+
min_gap (int): Minimum gap width to consider a separation between segments on the X-axis. Defaults to 1.
|
138
|
+
|
139
|
+
Returns:
|
140
|
+
None: This function modifies the `res` list in place.
|
141
|
+
"""
|
142
|
+
assert len(boxes) == len(
|
143
|
+
indices
|
144
|
+
), "The length of boxes and indices must be the same."
|
145
|
+
|
146
|
+
# Sort by y_min for Y-axis projection
|
147
|
+
y_sorted_indices = boxes[:, 1].argsort()
|
148
|
+
y_sorted_boxes = boxes[y_sorted_indices]
|
149
|
+
y_sorted_indices = np.array(indices)[y_sorted_indices]
|
150
|
+
|
151
|
+
# Perform Y-axis projection
|
152
|
+
y_projection = projection_by_bboxes(boxes=y_sorted_boxes, axis=1)
|
153
|
+
y_intervals = split_projection_profile(y_projection, 0, 1)
|
154
|
+
|
155
|
+
if not y_intervals:
|
156
|
+
return
|
157
|
+
|
158
|
+
# Process each segment defined by Y-axis projection
|
159
|
+
for y_start, y_end in zip(*y_intervals):
|
160
|
+
# Select boxes within the current y interval
|
161
|
+
y_interval_indices = (y_start <= y_sorted_boxes[:, 1]) & (
|
162
|
+
y_sorted_boxes[:, 1] < y_end
|
163
|
+
)
|
164
|
+
y_boxes_chunk = y_sorted_boxes[y_interval_indices]
|
165
|
+
y_indices_chunk = y_sorted_indices[y_interval_indices]
|
166
|
+
|
167
|
+
# Sort by x_min for X-axis projection
|
168
|
+
x_sorted_indices = y_boxes_chunk[:, 0].argsort()
|
169
|
+
x_sorted_boxes_chunk = y_boxes_chunk[x_sorted_indices]
|
170
|
+
x_sorted_indices_chunk = y_indices_chunk[x_sorted_indices]
|
171
|
+
|
172
|
+
# Perform X-axis projection
|
173
|
+
x_projection = projection_by_bboxes(boxes=x_sorted_boxes_chunk, axis=0)
|
174
|
+
x_intervals = split_projection_profile(x_projection, 0, min_gap)
|
175
|
+
|
176
|
+
if not x_intervals:
|
177
|
+
continue
|
178
|
+
|
179
|
+
# If X-axis cannot be further segmented, add current indices to results
|
180
|
+
if len(x_intervals[0]) == 1:
|
181
|
+
res.extend(x_sorted_indices_chunk)
|
182
|
+
continue
|
183
|
+
|
184
|
+
if np.min(x_sorted_boxes_chunk[:, 0]) < 0:
|
185
|
+
x_intervals = np.flip(x_intervals, axis=1)
|
186
|
+
# Recursively process each segment defined by X-axis projection
|
187
|
+
for x_start, x_end in zip(*x_intervals):
|
188
|
+
x_interval_indices = (x_start <= abs(x_sorted_boxes_chunk[:, 0])) & (
|
189
|
+
abs(x_sorted_boxes_chunk[:, 0]) < x_end
|
190
|
+
)
|
191
|
+
recursive_yx_cut(
|
192
|
+
x_sorted_boxes_chunk[x_interval_indices],
|
193
|
+
x_sorted_indices_chunk[x_interval_indices],
|
194
|
+
res,
|
195
|
+
)
|
196
|
+
|
197
|
+
|
198
|
+
def recursive_xy_cut(
|
199
|
+
boxes: np.ndarray, indices: List[int], res: List[int], min_gap: int = 1
|
200
|
+
):
|
201
|
+
"""
|
202
|
+
Recursively performs X-axis projection followed by Y-axis projection to segment bounding boxes.
|
203
|
+
|
204
|
+
Args:
|
205
|
+
boxes: A (N, 4) array representing bounding boxes with [x_min, y_min, x_max, y_max].
|
206
|
+
indices: A list of indices representing the position of boxes in the original data.
|
207
|
+
res: A list to store indices of bounding boxes that meet the criteria.
|
208
|
+
min_gap (int): Minimum gap width to consider a separation between segments on the X-axis. Defaults to 1.
|
209
|
+
|
210
|
+
Returns:
|
211
|
+
None: This function modifies the `res` list in place.
|
212
|
+
"""
|
213
|
+
# Ensure boxes and indices have the same length
|
214
|
+
assert len(boxes) == len(
|
215
|
+
indices
|
216
|
+
), "The length of boxes and indices must be the same."
|
217
|
+
|
218
|
+
# Sort by x_min to prepare for X-axis projection
|
219
|
+
x_sorted_indices = boxes[:, 0].argsort()
|
220
|
+
x_sorted_boxes = boxes[x_sorted_indices]
|
221
|
+
x_sorted_indices = np.array(indices)[x_sorted_indices]
|
222
|
+
|
223
|
+
# Perform X-axis projection
|
224
|
+
x_projection = projection_by_bboxes(boxes=x_sorted_boxes, axis=0)
|
225
|
+
x_intervals = split_projection_profile(x_projection, 0, 1)
|
226
|
+
|
227
|
+
if not x_intervals:
|
228
|
+
return
|
229
|
+
|
230
|
+
if np.min(x_sorted_boxes[:, 0]) < 0:
|
231
|
+
x_intervals = np.flip(x_intervals, axis=1)
|
232
|
+
# Process each segment defined by X-axis projection
|
233
|
+
for x_start, x_end in zip(*x_intervals):
|
234
|
+
# Select boxes within the current x interval
|
235
|
+
x_interval_indices = (x_start <= abs(x_sorted_boxes[:, 0])) & (
|
236
|
+
abs(x_sorted_boxes[:, 0]) < x_end
|
237
|
+
)
|
238
|
+
x_boxes_chunk = x_sorted_boxes[x_interval_indices]
|
239
|
+
x_indices_chunk = x_sorted_indices[x_interval_indices]
|
240
|
+
|
241
|
+
# Sort selected boxes by y_min to prepare for Y-axis projection
|
242
|
+
y_sorted_indices = x_boxes_chunk[:, 1].argsort()
|
243
|
+
y_sorted_boxes_chunk = x_boxes_chunk[y_sorted_indices]
|
244
|
+
y_sorted_indices_chunk = x_indices_chunk[y_sorted_indices]
|
245
|
+
|
246
|
+
# Perform Y-axis projection
|
247
|
+
y_projection = projection_by_bboxes(boxes=y_sorted_boxes_chunk, axis=1)
|
248
|
+
y_intervals = split_projection_profile(y_projection, 0, min_gap)
|
249
|
+
|
250
|
+
if not y_intervals:
|
251
|
+
continue
|
252
|
+
|
253
|
+
# If Y-axis cannot be further segmented, add current indices to results
|
254
|
+
if len(y_intervals[0]) == 1:
|
255
|
+
res.extend(y_sorted_indices_chunk)
|
256
|
+
continue
|
257
|
+
|
258
|
+
# Recursively process each segment defined by Y-axis projection
|
259
|
+
for y_start, y_end in zip(*y_intervals):
|
260
|
+
y_interval_indices = (y_start <= y_sorted_boxes_chunk[:, 1]) & (
|
261
|
+
y_sorted_boxes_chunk[:, 1] < y_end
|
262
|
+
)
|
263
|
+
recursive_xy_cut(
|
264
|
+
y_sorted_boxes_chunk[y_interval_indices],
|
265
|
+
y_sorted_indices_chunk[y_interval_indices],
|
266
|
+
res,
|
267
|
+
)
|
268
|
+
|
269
|
+
|
270
|
+
def reference_insert(
|
271
|
+
block: LayoutBlock,
|
272
|
+
sorted_blocks: List[LayoutBlock],
|
273
|
+
**kwargs,
|
274
|
+
):
|
275
|
+
"""
|
276
|
+
Insert reference block into sorted blocks based on the distance between the block and the nearest sorted block.
|
277
|
+
|
278
|
+
Args:
|
279
|
+
block: The block to insert into the sorted blocks.
|
280
|
+
sorted_blocks: The sorted blocks where the new block will be inserted.
|
281
|
+
config: Configuration dictionary containing parameters related to the layout parsing.
|
282
|
+
median_width: Median width of the document. Defaults to 0.0.
|
283
|
+
|
284
|
+
Returns:
|
285
|
+
sorted_blocks: The updated sorted blocks after insertion.
|
286
|
+
"""
|
287
|
+
min_distance = float("inf")
|
288
|
+
nearest_sorted_block_index = 0
|
289
|
+
for sorted_block_idx, sorted_block in enumerate(sorted_blocks):
|
290
|
+
if sorted_block.bbox[3] <= block.bbox[1]:
|
291
|
+
distance = -(sorted_block.bbox[2] * 10 + sorted_block.bbox[3])
|
292
|
+
if distance < min_distance:
|
293
|
+
min_distance = distance
|
294
|
+
nearest_sorted_block_index = sorted_block_idx
|
295
|
+
|
296
|
+
sorted_blocks.insert(nearest_sorted_block_index + 1, block)
|
297
|
+
return sorted_blocks
|
298
|
+
|
299
|
+
|
300
|
+
def manhattan_insert(
|
301
|
+
block: LayoutBlock,
|
302
|
+
sorted_blocks: List[LayoutBlock],
|
303
|
+
**kwargs,
|
304
|
+
):
|
305
|
+
"""
|
306
|
+
Insert a block into a sorted list of blocks based on the Manhattan distance between the block and the nearest sorted block.
|
307
|
+
|
308
|
+
Args:
|
309
|
+
block: The block to insert into the sorted blocks.
|
310
|
+
sorted_blocks: The sorted blocks where the new block will be inserted.
|
311
|
+
config: Configuration dictionary containing parameters related to the layout parsing.
|
312
|
+
median_width: Median width of the document. Defaults to 0.0.
|
313
|
+
|
314
|
+
Returns:
|
315
|
+
sorted_blocks: The updated sorted blocks after insertion.
|
316
|
+
"""
|
317
|
+
min_distance = float("inf")
|
318
|
+
nearest_sorted_block_index = 0
|
319
|
+
for sorted_block_idx, sorted_block in enumerate(sorted_blocks):
|
320
|
+
distance = _manhattan_distance(block.bbox, sorted_block.bbox)
|
321
|
+
if distance < min_distance:
|
322
|
+
min_distance = distance
|
323
|
+
nearest_sorted_block_index = sorted_block_idx
|
324
|
+
|
325
|
+
sorted_blocks.insert(nearest_sorted_block_index + 1, block)
|
326
|
+
return sorted_blocks
|
327
|
+
|
328
|
+
|
329
|
+
def euclidean_insert(
|
330
|
+
block: LayoutRegion,
|
331
|
+
sorted_blocks: List[LayoutRegion],
|
332
|
+
**kwargs,
|
333
|
+
):
|
334
|
+
"""
|
335
|
+
Insert a block into a sorted list of blocks based on the Euclidean distance between the block and the nearest sorted block.
|
336
|
+
|
337
|
+
Args:
|
338
|
+
block: The block to insert into the sorted blocks.
|
339
|
+
sorted_blocks: The sorted blocks where the new block will be inserted.
|
340
|
+
config: Configuration dictionary containing parameters related to the layout parsing.
|
341
|
+
median_width: Median width of the document. Defaults to 0.0.
|
342
|
+
|
343
|
+
Returns:
|
344
|
+
sorted_blocks: The updated sorted blocks after insertion.
|
345
|
+
"""
|
346
|
+
nearest_sorted_block_index = len(sorted_blocks)
|
347
|
+
block_euclidean_distance = block.euclidean_distance
|
348
|
+
for sorted_block_idx, sorted_block in enumerate(sorted_blocks):
|
349
|
+
distance = sorted_block.euclidean_distance
|
350
|
+
if distance > block_euclidean_distance:
|
351
|
+
nearest_sorted_block_index = sorted_block_idx
|
352
|
+
break
|
353
|
+
sorted_blocks.insert(nearest_sorted_block_index, block)
|
354
|
+
return sorted_blocks
|
355
|
+
|
356
|
+
|
357
|
+
def weighted_distance_insert(
|
358
|
+
block: LayoutBlock,
|
359
|
+
sorted_blocks: List[LayoutBlock],
|
360
|
+
region: LayoutRegion,
|
361
|
+
):
|
362
|
+
"""
|
363
|
+
Insert a block into a sorted list of blocks based on the weighted distance between the block and the nearest sorted block.
|
364
|
+
|
365
|
+
Args:
|
366
|
+
block: The block to insert into the sorted blocks.
|
367
|
+
sorted_blocks: The sorted blocks where the new block will be inserted.
|
368
|
+
config: Configuration dictionary containing parameters related to the layout parsing.
|
369
|
+
median_width: Median width of the document. Defaults to 0.0.
|
370
|
+
|
371
|
+
Returns:
|
372
|
+
sorted_blocks: The updated sorted blocks after insertion.
|
373
|
+
"""
|
374
|
+
|
375
|
+
tolerance_len = XYCUT_SETTINGS["edge_distance_compare_tolerance_len"]
|
376
|
+
x1, y1, x2, y2 = block.bbox
|
377
|
+
min_weighted_distance, min_edge_distance, min_up_edge_distance = (
|
378
|
+
float("inf"),
|
379
|
+
float("inf"),
|
380
|
+
float("inf"),
|
381
|
+
)
|
382
|
+
nearest_sorted_block_index = 0
|
383
|
+
for sorted_block_idx, sorted_block in enumerate(sorted_blocks):
|
384
|
+
|
385
|
+
x1_prime, y1_prime, x2_prime, y2_prime = sorted_block.bbox
|
386
|
+
|
387
|
+
# Calculate edge distance
|
388
|
+
weight = _get_weights(block.order_label, block.direction)
|
389
|
+
edge_distance = get_nearest_edge_distance(block.bbox, sorted_block.bbox, weight)
|
390
|
+
|
391
|
+
if block.label in BLOCK_LABEL_MAP["doc_title_labels"]:
|
392
|
+
disperse = max(1, region.text_line_width)
|
393
|
+
tolerance_len = max(tolerance_len, disperse)
|
394
|
+
if block.label == "abstract":
|
395
|
+
tolerance_len *= 2
|
396
|
+
edge_distance = max(0.1, edge_distance) * 10
|
397
|
+
|
398
|
+
# Calculate up edge distances
|
399
|
+
up_edge_distance = y1_prime if region.direction == "horizontal" else -x2_prime
|
400
|
+
left_edge_distance = x1_prime if region.direction == "horizontal" else y1_prime
|
401
|
+
is_below_sorted_block = (
|
402
|
+
y2_prime < y1 if region.direction == "horizontal" else x1_prime > x2
|
403
|
+
)
|
404
|
+
|
405
|
+
if (
|
406
|
+
block.label not in BLOCK_LABEL_MAP["unordered_labels"]
|
407
|
+
or block.label in BLOCK_LABEL_MAP["doc_title_labels"]
|
408
|
+
or block.label in BLOCK_LABEL_MAP["paragraph_title_labels"]
|
409
|
+
or block.label in BLOCK_LABEL_MAP["vision_labels"]
|
410
|
+
) and is_below_sorted_block:
|
411
|
+
up_edge_distance = -up_edge_distance
|
412
|
+
left_edge_distance = -left_edge_distance
|
413
|
+
|
414
|
+
if abs(min_up_edge_distance - up_edge_distance) <= tolerance_len:
|
415
|
+
up_edge_distance = min_up_edge_distance
|
416
|
+
|
417
|
+
# Calculate weighted distance
|
418
|
+
weighted_distance = (
|
419
|
+
+edge_distance
|
420
|
+
* XYCUT_SETTINGS["distance_weight_map"].get("edge_weight", 10**4)
|
421
|
+
+ up_edge_distance
|
422
|
+
* XYCUT_SETTINGS["distance_weight_map"].get("up_edge_weight", 1)
|
423
|
+
+ left_edge_distance
|
424
|
+
* XYCUT_SETTINGS["distance_weight_map"].get("left_edge_weight", 0.0001)
|
425
|
+
)
|
426
|
+
|
427
|
+
min_edge_distance = min(edge_distance, min_edge_distance)
|
428
|
+
min_up_edge_distance = min(up_edge_distance, min_up_edge_distance)
|
429
|
+
|
430
|
+
if weighted_distance < min_weighted_distance:
|
431
|
+
nearest_sorted_block_index = sorted_block_idx
|
432
|
+
min_weighted_distance = weighted_distance
|
433
|
+
if abs(y1 // 2 - y1_prime // 2) > 0:
|
434
|
+
sorted_distance = y1_prime
|
435
|
+
block_distance = y1
|
436
|
+
else:
|
437
|
+
if region.direction == "horizontal":
|
438
|
+
if abs(x1 // 2 - x2 // 2) > 0:
|
439
|
+
sorted_distance = x1_prime
|
440
|
+
block_distance = x1
|
441
|
+
else:
|
442
|
+
# distance with (0,0)
|
443
|
+
sorted_block_center_x, sorted_block_center_y = (
|
444
|
+
sorted_block.get_centroid()
|
445
|
+
)
|
446
|
+
block_center_x, block_center_y = block.get_centroid()
|
447
|
+
sorted_distance = (
|
448
|
+
sorted_block_center_x**2 + sorted_block_center_y**2
|
449
|
+
)
|
450
|
+
block_distance = block_center_x**2 + block_center_y**2
|
451
|
+
else:
|
452
|
+
if abs(x1 - x2) > 0:
|
453
|
+
sorted_distance = -x2_prime
|
454
|
+
block_distance = -x2
|
455
|
+
else:
|
456
|
+
# distance with (max,0)
|
457
|
+
sorted_block_center_x, sorted_block_center_y = (
|
458
|
+
sorted_block.get_centroid()
|
459
|
+
)
|
460
|
+
block_center_x, block_center_y = block.get_centroid()
|
461
|
+
sorted_distance = (
|
462
|
+
sorted_block_center_x**2 + sorted_block_center_y**2
|
463
|
+
)
|
464
|
+
block_distance = block_center_x**2 + block_center_y**2
|
465
|
+
if block_distance > sorted_distance:
|
466
|
+
nearest_sorted_block_index = sorted_block_idx + 1
|
467
|
+
if (
|
468
|
+
sorted_block_idx < len(sorted_blocks) - 1
|
469
|
+
and block.label
|
470
|
+
in BLOCK_LABEL_MAP["vision_labels"]
|
471
|
+
+ BLOCK_LABEL_MAP["vision_title_labels"]
|
472
|
+
):
|
473
|
+
seg_start_flag, _ = get_seg_flag(
|
474
|
+
sorted_blocks[sorted_block_idx + 1],
|
475
|
+
sorted_blocks[sorted_block_idx],
|
476
|
+
)
|
477
|
+
if not seg_start_flag:
|
478
|
+
nearest_sorted_block_index += 1
|
479
|
+
else:
|
480
|
+
if (
|
481
|
+
sorted_block_idx > 0
|
482
|
+
and block.label
|
483
|
+
in BLOCK_LABEL_MAP["vision_labels"]
|
484
|
+
+ BLOCK_LABEL_MAP["vision_title_labels"]
|
485
|
+
):
|
486
|
+
seg_start_flag, _ = get_seg_flag(
|
487
|
+
sorted_blocks[sorted_block_idx],
|
488
|
+
sorted_blocks[sorted_block_idx - 1],
|
489
|
+
)
|
490
|
+
if not seg_start_flag:
|
491
|
+
nearest_sorted_block_index = sorted_block_idx - 1
|
492
|
+
|
493
|
+
sorted_blocks.insert(nearest_sorted_block_index, block)
|
494
|
+
return sorted_blocks
|
495
|
+
|
496
|
+
|
497
|
+
def insert_child_blocks(
|
498
|
+
block: LayoutBlock,
|
499
|
+
block_idx: int,
|
500
|
+
sorted_blocks: List[LayoutBlock],
|
501
|
+
) -> List[LayoutBlock]:
|
502
|
+
"""
|
503
|
+
Insert child blocks of a block into the sorted blocks list.
|
504
|
+
|
505
|
+
Args:
|
506
|
+
block: The parent block whose child blocks need to be inserted.
|
507
|
+
block_idx: Index at which the parent block exists in the sorted blocks list.
|
508
|
+
sorted_blocks: Sorted blocks list where the child blocks are to be inserted.
|
509
|
+
|
510
|
+
Returns:
|
511
|
+
sorted_blocks: Updated sorted blocks list after inserting child blocks.
|
512
|
+
"""
|
513
|
+
if block.child_blocks:
|
514
|
+
sub_blocks = block.get_child_blocks()
|
515
|
+
sub_blocks.append(block)
|
516
|
+
sub_blocks = sort_child_blocks(sub_blocks, sub_blocks[0].direction)
|
517
|
+
sorted_blocks[block_idx] = sub_blocks[0]
|
518
|
+
for block in sub_blocks[1:]:
|
519
|
+
block_idx += 1
|
520
|
+
sorted_blocks.insert(block_idx, block)
|
521
|
+
return sorted_blocks
|
522
|
+
|
523
|
+
|
524
|
+
def sort_child_blocks(
|
525
|
+
blocks: List[LayoutRegion], direction="horizontal"
|
526
|
+
) -> List[LayoutBlock]:
|
527
|
+
"""
|
528
|
+
Sort child blocks based on their bounding box coordinates.
|
529
|
+
|
530
|
+
Args:
|
531
|
+
blocks: A list of LayoutBlock objects representing the child blocks.
|
532
|
+
direction: direction of the blocks ('horizontal' or 'vertical'). Default is 'horizontal'.
|
533
|
+
Returns:
|
534
|
+
sorted_blocks: A sorted list of LayoutBlock objects.
|
535
|
+
"""
|
536
|
+
if blocks[0].label != "region":
|
537
|
+
if direction == "horizontal":
|
538
|
+
blocks.sort(
|
539
|
+
key=lambda x: (
|
540
|
+
x.bbox[1],
|
541
|
+
x.bbox[0],
|
542
|
+
x.get_centroid()[0] ** 2 + x.get_centroid()[1] ** 2,
|
543
|
+
), # distance with (0,0)
|
544
|
+
)
|
545
|
+
else:
|
546
|
+
blocks.sort(
|
547
|
+
key=lambda x: (
|
548
|
+
-x.bbox[2],
|
549
|
+
x.bbox[1],
|
550
|
+
-x.get_centroid()[0] ** 2 + x.get_centroid()[1] ** 2,
|
551
|
+
), # distance with (max,0)
|
552
|
+
)
|
553
|
+
else:
|
554
|
+
blocks.sort(key=lambda x: x.euclidean_distance)
|
555
|
+
return blocks
|
556
|
+
|
557
|
+
|
558
|
+
def _get_weights(label, direction="horizontal"):
|
559
|
+
"""Define weights based on the label and direction."""
|
560
|
+
if label == "doc_title":
|
561
|
+
return (
|
562
|
+
[1, 0.1, 0.1, 1] if direction == "horizontal" else [0.2, 0.1, 1, 1]
|
563
|
+
) # left-down , right-left
|
564
|
+
elif label in [
|
565
|
+
"paragraph_title",
|
566
|
+
"table_title",
|
567
|
+
"abstract",
|
568
|
+
"image",
|
569
|
+
"seal",
|
570
|
+
"chart",
|
571
|
+
"figure",
|
572
|
+
]:
|
573
|
+
return [1, 1, 0.1, 1] # down
|
574
|
+
else:
|
575
|
+
return [1, 1, 1, 0.1] # up
|
576
|
+
|
577
|
+
|
578
|
+
def _manhattan_distance(
|
579
|
+
point1: Tuple[float, float],
|
580
|
+
point2: Tuple[float, float],
|
581
|
+
weight_x: float = 1.0,
|
582
|
+
weight_y: float = 1.0,
|
583
|
+
) -> float:
|
584
|
+
"""
|
585
|
+
Calculate the weighted Manhattan distance between two points.
|
586
|
+
|
587
|
+
Args:
|
588
|
+
point1 (Tuple[float, float]): The first point as (x, y).
|
589
|
+
point2 (Tuple[float, float]): The second point as (x, y).
|
590
|
+
weight_x (float): The weight for the x-axis distance. Default is 1.0.
|
591
|
+
weight_y (float): The weight for the y-axis distance. Default is 1.0.
|
592
|
+
|
593
|
+
Returns:
|
594
|
+
float: The weighted Manhattan distance between the two points.
|
595
|
+
"""
|
596
|
+
return weight_x * abs(point1[0] - point2[0]) + weight_y * abs(point1[1] - point2[1])
|
597
|
+
|
598
|
+
|
599
|
+
def sort_normal_blocks(
|
600
|
+
blocks, text_line_height, text_line_width, region_direction
|
601
|
+
) -> List[LayoutBlock]:
|
602
|
+
"""Sort blocks by their position within the page
|
603
|
+
|
604
|
+
Args:
|
605
|
+
blocks (List[LayoutBlock]): List of blocks to be sorted.
|
606
|
+
text_line_height (int): Height of each line of text.
|
607
|
+
text_line_width (int): Width of each line of text.
|
608
|
+
region_direction (str): Direction of the region, either "horizontal" or "vertical".
|
609
|
+
|
610
|
+
Returns:
|
611
|
+
List[LayoutBlock]: Sorted list of blocks.
|
612
|
+
"""
|
613
|
+
if region_direction == "horizontal":
|
614
|
+
blocks.sort(
|
615
|
+
key=lambda x: (
|
616
|
+
x.bbox[1] // text_line_height,
|
617
|
+
x.bbox[0] // text_line_width,
|
618
|
+
x.get_centroid()[0] ** 2 + x.get_centroid()[1] ** 2,
|
619
|
+
),
|
620
|
+
)
|
621
|
+
else:
|
622
|
+
blocks.sort(
|
623
|
+
key=lambda x: (
|
624
|
+
-x.bbox[2] // text_line_width,
|
625
|
+
x.bbox[1] // text_line_height,
|
626
|
+
-x.get_centroid()[0] ** 2 + x.get_centroid()[1] ** 2,
|
627
|
+
),
|
628
|
+
)
|
629
|
+
return blocks
|
630
|
+
|
631
|
+
|
632
|
+
def get_cut_blocks(blocks, cut_direction, cut_coordinates, mask_labels=[]):
|
633
|
+
"""
|
634
|
+
Cut blocks based on the given cut direction and coordinates.
|
635
|
+
|
636
|
+
Args:
|
637
|
+
blocks (list): list of blocks to be cut.
|
638
|
+
cut_direction (str): cut direction, either "horizontal" or "vertical".
|
639
|
+
cut_coordinates (list): list of cut coordinates.
|
640
|
+
|
641
|
+
Returns:
|
642
|
+
list: a list of tuples containing the cutted blocks and their corresponding mean width。
|
643
|
+
"""
|
644
|
+
cuted_list = []
|
645
|
+
# filter out mask blocks,including header, footer, unordered and child_blocks
|
646
|
+
|
647
|
+
# 0: horizontal, 1: vertical
|
648
|
+
cut_aixis = 0 if cut_direction == "horizontal" else 1
|
649
|
+
blocks.sort(key=lambda x: x.bbox[cut_aixis + 2])
|
650
|
+
cut_coordinates.append(float("inf"))
|
651
|
+
|
652
|
+
cut_coordinates = list(set(cut_coordinates))
|
653
|
+
cut_coordinates.sort()
|
654
|
+
|
655
|
+
cut_idx = 0
|
656
|
+
for cut_coordinate in cut_coordinates:
|
657
|
+
group_blocks = []
|
658
|
+
block_idx = cut_idx
|
659
|
+
while block_idx < len(blocks):
|
660
|
+
block = blocks[block_idx]
|
661
|
+
if block.bbox[cut_aixis + 2] > cut_coordinate:
|
662
|
+
break
|
663
|
+
elif block.order_label not in mask_labels:
|
664
|
+
group_blocks.append(block)
|
665
|
+
block_idx += 1
|
666
|
+
cut_idx = block_idx
|
667
|
+
if group_blocks:
|
668
|
+
cuted_list.append(group_blocks)
|
669
|
+
|
670
|
+
return cuted_list
|
671
|
+
|
672
|
+
|
673
|
+
def get_blocks_by_direction_interval(
|
674
|
+
blocks: List[LayoutBlock],
|
675
|
+
start_index: int,
|
676
|
+
end_index: int,
|
677
|
+
direction: str = "horizontal",
|
678
|
+
) -> List[LayoutBlock]:
|
679
|
+
"""
|
680
|
+
Get blocks within a specified direction interval.
|
681
|
+
|
682
|
+
Args:
|
683
|
+
blocks (List[LayoutBlock]): A list of blocks.
|
684
|
+
start_index (int): The starting index of the direction.
|
685
|
+
end_index (int): The ending index of the direction.
|
686
|
+
direction (str, optional): The direction to consider. Defaults to "horizontal".
|
687
|
+
|
688
|
+
Returns:
|
689
|
+
List[LayoutBlock]: A list of blocks within the specified direction interval.
|
690
|
+
"""
|
691
|
+
interval_blocks = []
|
692
|
+
aixis = 0 if direction == "horizontal" else 1
|
693
|
+
blocks.sort(key=lambda x: x.bbox[aixis + 2])
|
694
|
+
|
695
|
+
for block in blocks:
|
696
|
+
if block.bbox[aixis] >= start_index and block.bbox[aixis + 2] <= end_index:
|
697
|
+
interval_blocks.append(block)
|
698
|
+
|
699
|
+
return interval_blocks
|
700
|
+
|
701
|
+
|
702
|
+
def get_nearest_blocks(
|
703
|
+
block: LayoutBlock,
|
704
|
+
ref_blocks: List[LayoutBlock],
|
705
|
+
overlap_threshold,
|
706
|
+
direction="horizontal",
|
707
|
+
) -> List:
|
708
|
+
"""
|
709
|
+
Get the adjacent blocks with the same direction as the current block.
|
710
|
+
Args:
|
711
|
+
block (LayoutBlock): The current block.
|
712
|
+
blocks (List[LayoutBlock]): A list of all blocks.
|
713
|
+
ref_block_idxes (List[int]): A list of indices of reference blocks.
|
714
|
+
iou_threshold (float): The IOU threshold to determine if two blocks are considered adjacent.
|
715
|
+
Returns:
|
716
|
+
Int: The index of the previous block with same direction.
|
717
|
+
Int: The index of the following block with same direction.
|
718
|
+
"""
|
719
|
+
prev_blocks: List[LayoutBlock] = []
|
720
|
+
post_blocks: List[LayoutBlock] = []
|
721
|
+
sort_index = 1 if direction == "horizontal" else 0
|
722
|
+
for ref_block in ref_blocks:
|
723
|
+
if ref_block.index == block.index:
|
724
|
+
continue
|
725
|
+
overlap_ratio = calculate_projection_overlap_ratio(
|
726
|
+
block.bbox, ref_block.bbox, direction, mode="small"
|
727
|
+
)
|
728
|
+
if overlap_ratio > overlap_threshold:
|
729
|
+
if ref_block.bbox[sort_index] <= block.bbox[sort_index]:
|
730
|
+
prev_blocks.append(ref_block)
|
731
|
+
else:
|
732
|
+
post_blocks.append(ref_block)
|
733
|
+
|
734
|
+
if prev_blocks:
|
735
|
+
prev_blocks.sort(key=lambda x: x.bbox[sort_index], reverse=True)
|
736
|
+
if post_blocks:
|
737
|
+
post_blocks.sort(key=lambda x: x.bbox[sort_index])
|
738
|
+
|
739
|
+
return prev_blocks, post_blocks
|
740
|
+
|
741
|
+
|
742
|
+
def update_doc_title_child_blocks(
|
743
|
+
block: LayoutBlock,
|
744
|
+
region: LayoutRegion,
|
745
|
+
) -> None:
|
746
|
+
"""
|
747
|
+
Update the child blocks of a document title block.
|
748
|
+
|
749
|
+
The child blocks need to meet the following conditions:
|
750
|
+
1. They must be adjacent
|
751
|
+
2. They must have the same direction as the parent block.
|
752
|
+
3. Their short side length should be less than 80% of the parent's short side length.
|
753
|
+
4. Their long side length should be less than 150% of the parent's long side length.
|
754
|
+
5. The child block must be text block.
|
755
|
+
6. The nearest edge distance should be less than 2 times of the text line height.
|
756
|
+
|
757
|
+
Args:
|
758
|
+
blocks (List[LayoutBlock]): overall blocks.
|
759
|
+
block (LayoutBlock): document title block.
|
760
|
+
prev_idx (int): previous block index, None if not exist.
|
761
|
+
post_idx (int): post block index, None if not exist.
|
762
|
+
config (dict): configurations.
|
763
|
+
|
764
|
+
Returns:
|
765
|
+
None
|
766
|
+
|
767
|
+
"""
|
768
|
+
ref_blocks = [region.block_map[idx] for idx in region.normal_text_block_idxes]
|
769
|
+
overlap_threshold = XYCUT_SETTINGS["child_block_overlap_ratio_threshold"]
|
770
|
+
prev_blocks, post_blocks = get_nearest_blocks(
|
771
|
+
block, ref_blocks, overlap_threshold, block.direction
|
772
|
+
)
|
773
|
+
prev_block = None
|
774
|
+
post_block = None
|
775
|
+
|
776
|
+
if prev_blocks:
|
777
|
+
prev_block = prev_blocks[0]
|
778
|
+
if post_blocks:
|
779
|
+
post_block = post_blocks[0]
|
780
|
+
|
781
|
+
for ref_block in [prev_block, post_block]:
|
782
|
+
if ref_block is None:
|
783
|
+
continue
|
784
|
+
with_seem_direction = ref_block.direction == block.direction
|
785
|
+
|
786
|
+
short_side_length_condition = (
|
787
|
+
ref_block.short_side_length < block.short_side_length * 0.8
|
788
|
+
)
|
789
|
+
|
790
|
+
long_side_length_condition = (
|
791
|
+
ref_block.long_side_length < block.long_side_length
|
792
|
+
or ref_block.long_side_length > 1.5 * block.long_side_length
|
793
|
+
)
|
794
|
+
|
795
|
+
nearest_edge_distance = get_nearest_edge_distance(block.bbox, ref_block.bbox)
|
796
|
+
|
797
|
+
if (
|
798
|
+
with_seem_direction
|
799
|
+
and ref_block.label in BLOCK_LABEL_MAP["text_labels"]
|
800
|
+
and short_side_length_condition
|
801
|
+
and long_side_length_condition
|
802
|
+
and ref_block.num_of_lines < 3
|
803
|
+
and nearest_edge_distance < ref_block.text_line_height * 2
|
804
|
+
):
|
805
|
+
ref_block.order_label = "doc_title_text"
|
806
|
+
block.append_child_block(ref_block)
|
807
|
+
region.normal_text_block_idxes.remove(ref_block.index)
|
808
|
+
|
809
|
+
for ref_block in ref_blocks:
|
810
|
+
if ref_block.order_label == "doc_title_text":
|
811
|
+
continue
|
812
|
+
with_seem_direction = ref_block.direction == block.direction
|
813
|
+
|
814
|
+
overlap_ratio = calculate_overlap_ratio(
|
815
|
+
block.bbox, ref_block.bbox, mode="small"
|
816
|
+
)
|
817
|
+
|
818
|
+
if overlap_ratio > 0.9 and with_seem_direction:
|
819
|
+
ref_block.order_label = "doc_title_text"
|
820
|
+
block.append_child_block(ref_block)
|
821
|
+
region.normal_text_block_idxes.remove(ref_block.index)
|
822
|
+
|
823
|
+
|
824
|
+
def update_paragraph_title_child_blocks(
|
825
|
+
block: LayoutBlock,
|
826
|
+
region: LayoutRegion,
|
827
|
+
) -> None:
|
828
|
+
"""
|
829
|
+
Update the child blocks of a paragraph title block.
|
830
|
+
|
831
|
+
The child blocks need to meet the following conditions:
|
832
|
+
1. They must be adjacent
|
833
|
+
2. They must have the same direction as the parent block.
|
834
|
+
3. The child block must be paragraph title block.
|
835
|
+
|
836
|
+
Args:
|
837
|
+
blocks (List[LayoutBlock]): overall blocks.
|
838
|
+
block (LayoutBlock): document title block.
|
839
|
+
prev_idx (int): previous block index, None if not exist.
|
840
|
+
post_idx (int): post block index, None if not exist.
|
841
|
+
config (dict): configurations.
|
842
|
+
|
843
|
+
Returns:
|
844
|
+
None
|
845
|
+
|
846
|
+
"""
|
847
|
+
if block.order_label == "sub_paragraph_title":
|
848
|
+
return
|
849
|
+
ref_blocks = [
|
850
|
+
region.block_map[idx]
|
851
|
+
for idx in region.paragraph_title_block_idxes + region.normal_text_block_idxes
|
852
|
+
]
|
853
|
+
overlap_threshold = XYCUT_SETTINGS["child_block_overlap_ratio_threshold"]
|
854
|
+
prev_blocks, post_blocks = get_nearest_blocks(
|
855
|
+
block, ref_blocks, overlap_threshold, block.direction
|
856
|
+
)
|
857
|
+
for ref_blocks in [prev_blocks, post_blocks]:
|
858
|
+
for ref_block in ref_blocks:
|
859
|
+
if ref_block.label not in BLOCK_LABEL_MAP["paragraph_title_labels"]:
|
860
|
+
break
|
861
|
+
min_text_line_height = min(
|
862
|
+
block.text_line_height, ref_block.text_line_height
|
863
|
+
)
|
864
|
+
nearest_edge_distance = get_nearest_edge_distance(
|
865
|
+
block.bbox, ref_block.bbox
|
866
|
+
)
|
867
|
+
with_seem_direction = ref_block.direction == block.direction
|
868
|
+
with_seem_start = (
|
869
|
+
abs(ref_block.start_coordinate - block.start_coordinate)
|
870
|
+
< min_text_line_height * 2
|
871
|
+
)
|
872
|
+
if (
|
873
|
+
with_seem_direction
|
874
|
+
and with_seem_start
|
875
|
+
and nearest_edge_distance <= min_text_line_height * 1.5
|
876
|
+
):
|
877
|
+
ref_block.order_label = "sub_paragraph_title"
|
878
|
+
block.append_child_block(ref_block)
|
879
|
+
region.paragraph_title_block_idxes.remove(ref_block.index)
|
880
|
+
|
881
|
+
|
882
|
+
def update_vision_child_blocks(
|
883
|
+
block: LayoutBlock,
|
884
|
+
region: LayoutRegion,
|
885
|
+
) -> None:
|
886
|
+
"""
|
887
|
+
Update the child blocks of a paragraph title block.
|
888
|
+
|
889
|
+
The child blocks need to meet the following conditions:
|
890
|
+
- For Both:
|
891
|
+
1. They must be adjacent
|
892
|
+
2. The child block must be vision_title or text block.
|
893
|
+
- For vision_title:
|
894
|
+
1. The distance between the child block and the parent block should be less than 1/2 of the parent's height.
|
895
|
+
- For text block:
|
896
|
+
1. The distance between the child block and the parent block should be less than 15.
|
897
|
+
2. The child short_side_length should be less than the parent's short side length.
|
898
|
+
3. The child long_side_length should be less than 50% of the parent's long side length.
|
899
|
+
4. The difference between their centers is very small.
|
900
|
+
|
901
|
+
Args:
|
902
|
+
blocks (List[LayoutBlock]): overall blocks.
|
903
|
+
block (LayoutBlock): document title block.
|
904
|
+
ref_block_idxes (List[int]): A list of indices of reference blocks.
|
905
|
+
prev_idx (int): previous block index, None if not exist.
|
906
|
+
post_idx (int): post block index, None if not exist.
|
907
|
+
config (dict): configurations.
|
908
|
+
|
909
|
+
Returns:
|
910
|
+
None
|
911
|
+
|
912
|
+
"""
|
913
|
+
ref_blocks = [
|
914
|
+
region.block_map[idx]
|
915
|
+
for idx in region.normal_text_block_idxes + region.vision_title_block_idxes
|
916
|
+
]
|
917
|
+
overlap_threshold = XYCUT_SETTINGS["child_block_overlap_ratio_threshold"]
|
918
|
+
has_vision_footnote = False
|
919
|
+
has_vision_title = False
|
920
|
+
for direction in [block.direction, block.secondary_direction]:
|
921
|
+
prev_blocks, post_blocks = get_nearest_blocks(
|
922
|
+
block, ref_blocks, overlap_threshold, direction
|
923
|
+
)
|
924
|
+
for ref_block in prev_blocks:
|
925
|
+
if (
|
926
|
+
ref_block.label
|
927
|
+
not in BLOCK_LABEL_MAP["text_labels"]
|
928
|
+
+ BLOCK_LABEL_MAP["vision_title_labels"]
|
929
|
+
):
|
930
|
+
break
|
931
|
+
nearest_edge_distance = get_nearest_edge_distance(
|
932
|
+
block.bbox, ref_block.bbox
|
933
|
+
)
|
934
|
+
block_center = block.get_centroid()
|
935
|
+
ref_block_center = ref_block.get_centroid()
|
936
|
+
if (
|
937
|
+
ref_block.label in BLOCK_LABEL_MAP["vision_title_labels"]
|
938
|
+
and nearest_edge_distance <= ref_block.text_line_height * 2
|
939
|
+
):
|
940
|
+
has_vision_title = True
|
941
|
+
ref_block.order_label = "vision_title"
|
942
|
+
block.append_child_block(ref_block)
|
943
|
+
region.vision_title_block_idxes.remove(ref_block.index)
|
944
|
+
if ref_block.label in BLOCK_LABEL_MAP["text_labels"]:
|
945
|
+
if (
|
946
|
+
not has_vision_footnote
|
947
|
+
and ref_block.direction == block.direction
|
948
|
+
and ref_block.long_side_length < block.long_side_length
|
949
|
+
and nearest_edge_distance <= ref_block.text_line_height * 2
|
950
|
+
):
|
951
|
+
if (
|
952
|
+
(
|
953
|
+
ref_block.short_side_length < block.short_side_length
|
954
|
+
and ref_block.long_side_length
|
955
|
+
< 0.5 * block.long_side_length
|
956
|
+
and abs(block_center[0] - ref_block_center[0]) < 10
|
957
|
+
)
|
958
|
+
or (
|
959
|
+
block.bbox[0] - ref_block.bbox[0] < 10
|
960
|
+
and ref_block.num_of_lines == 1
|
961
|
+
)
|
962
|
+
or (
|
963
|
+
block.bbox[2] - ref_block.bbox[2] < 10
|
964
|
+
and ref_block.num_of_lines == 1
|
965
|
+
)
|
966
|
+
):
|
967
|
+
has_vision_footnote = True
|
968
|
+
ref_block.order_label = "vision_footnote"
|
969
|
+
block.append_child_block(ref_block)
|
970
|
+
region.normal_text_block_idxes.remove(ref_block.index)
|
971
|
+
break
|
972
|
+
for ref_block in post_blocks:
|
973
|
+
if (
|
974
|
+
has_vision_footnote
|
975
|
+
and ref_block.label in BLOCK_LABEL_MAP["text_labels"]
|
976
|
+
):
|
977
|
+
break
|
978
|
+
nearest_edge_distance = get_nearest_edge_distance(
|
979
|
+
block.bbox, ref_block.bbox
|
980
|
+
)
|
981
|
+
block_center = block.get_centroid()
|
982
|
+
ref_block_center = ref_block.get_centroid()
|
983
|
+
if (
|
984
|
+
ref_block.label in BLOCK_LABEL_MAP["vision_title_labels"]
|
985
|
+
and nearest_edge_distance <= ref_block.text_line_height * 2
|
986
|
+
):
|
987
|
+
has_vision_title = True
|
988
|
+
ref_block.order_label = "vision_title"
|
989
|
+
block.append_child_block(ref_block)
|
990
|
+
region.vision_title_block_idxes.remove(ref_block.index)
|
991
|
+
if ref_block.label in BLOCK_LABEL_MAP["text_labels"]:
|
992
|
+
if (
|
993
|
+
not has_vision_footnote
|
994
|
+
and ref_block.direction == block.direction
|
995
|
+
and ref_block.long_side_length < block.long_side_length
|
996
|
+
and nearest_edge_distance <= ref_block.text_line_height * 2
|
997
|
+
):
|
998
|
+
if (
|
999
|
+
(
|
1000
|
+
ref_block.short_side_length < block.short_side_length
|
1001
|
+
and ref_block.long_side_length
|
1002
|
+
< 0.5 * block.long_side_length
|
1003
|
+
and abs(block_center[0] - ref_block_center[0]) < 10
|
1004
|
+
)
|
1005
|
+
or (
|
1006
|
+
block.bbox[0] - ref_block.bbox[0] < 10
|
1007
|
+
and ref_block.num_of_lines == 1
|
1008
|
+
)
|
1009
|
+
or (
|
1010
|
+
block.bbox[2] - ref_block.bbox[2] < 10
|
1011
|
+
and ref_block.num_of_lines == 1
|
1012
|
+
)
|
1013
|
+
):
|
1014
|
+
has_vision_footnote = True
|
1015
|
+
ref_block.label = "vision_footnote"
|
1016
|
+
ref_block.order_label = "vision_footnote"
|
1017
|
+
block.append_child_block(ref_block)
|
1018
|
+
region.normal_text_block_idxes.remove(ref_block.index)
|
1019
|
+
break
|
1020
|
+
if has_vision_title:
|
1021
|
+
break
|
1022
|
+
|
1023
|
+
for ref_block in ref_blocks:
|
1024
|
+
if ref_block.index not in region.normal_text_block_idxes:
|
1025
|
+
continue
|
1026
|
+
|
1027
|
+
overlap_ratio = calculate_overlap_ratio(
|
1028
|
+
block.bbox, ref_block.bbox, mode="small"
|
1029
|
+
)
|
1030
|
+
|
1031
|
+
if overlap_ratio > 0.9:
|
1032
|
+
ref_block.label = "vision_footnote"
|
1033
|
+
ref_block.order_label = "vision_footnote"
|
1034
|
+
block.append_child_block(ref_block)
|
1035
|
+
region.normal_text_block_idxes.remove(ref_block.index)
|
1036
|
+
|
1037
|
+
|
1038
|
+
def update_region_child_blocks(
|
1039
|
+
block: LayoutBlock,
|
1040
|
+
region: LayoutRegion,
|
1041
|
+
) -> None:
|
1042
|
+
"""Update child blocks of a region.
|
1043
|
+
|
1044
|
+
Args:
|
1045
|
+
block (LayoutBlock): document title block.
|
1046
|
+
region (LayoutRegion): layout region.
|
1047
|
+
|
1048
|
+
Returns:
|
1049
|
+
None
|
1050
|
+
"""
|
1051
|
+
for ref_block in region.block_map.values():
|
1052
|
+
if block.index != ref_block.index:
|
1053
|
+
bbox_iou = calculate_overlap_ratio(block.bbox, ref_block.bbox)
|
1054
|
+
if (
|
1055
|
+
bbox_iou > 0
|
1056
|
+
and block.area > ref_block.area
|
1057
|
+
and ref_block.order_label != "sub_region"
|
1058
|
+
):
|
1059
|
+
ref_block.order_label = "sub_region"
|
1060
|
+
block.append_child_block(ref_block)
|
1061
|
+
region.normal_text_block_idxes.remove(ref_block.index)
|
1062
|
+
|
1063
|
+
|
1064
|
+
def calculate_discontinuous_projection(
|
1065
|
+
boxes, direction="horizontal", return_num=False
|
1066
|
+
) -> List:
|
1067
|
+
"""
|
1068
|
+
Calculate the discontinuous projection of boxes along the specified direction.
|
1069
|
+
|
1070
|
+
Args:
|
1071
|
+
boxes (ndarray): Array of bounding boxes represented by [[x_min, y_min, x_max, y_max]].
|
1072
|
+
direction (str): direction along which to perform the projection ('horizontal' or 'vertical').
|
1073
|
+
|
1074
|
+
Returns:
|
1075
|
+
list: List of tuples representing the merged intervals.
|
1076
|
+
"""
|
1077
|
+
boxes = np.array(boxes)
|
1078
|
+
if direction == "horizontal":
|
1079
|
+
intervals = boxes[:, [0, 2]]
|
1080
|
+
elif direction == "vertical":
|
1081
|
+
intervals = boxes[:, [1, 3]]
|
1082
|
+
else:
|
1083
|
+
raise ValueError("direction must be 'horizontal' or 'vertical'")
|
1084
|
+
|
1085
|
+
intervals = intervals[np.argsort(intervals[:, 0])]
|
1086
|
+
|
1087
|
+
merged_intervals = []
|
1088
|
+
num = 1
|
1089
|
+
current_start, current_end = intervals[0]
|
1090
|
+
num_list = []
|
1091
|
+
|
1092
|
+
for start, end in intervals[1:]:
|
1093
|
+
if start <= current_end:
|
1094
|
+
num += 1
|
1095
|
+
current_end = max(current_end, end)
|
1096
|
+
else:
|
1097
|
+
num_list.append(num)
|
1098
|
+
merged_intervals.append((current_start, current_end))
|
1099
|
+
num = 1
|
1100
|
+
current_start, current_end = start, end
|
1101
|
+
|
1102
|
+
num_list.append(num)
|
1103
|
+
merged_intervals.append((current_start, current_end))
|
1104
|
+
if return_num:
|
1105
|
+
return merged_intervals, num_list
|
1106
|
+
return merged_intervals
|
1107
|
+
|
1108
|
+
|
1109
|
+
def shrink_overlapping_boxes(
|
1110
|
+
boxes, direction="horizontal", min_threshold=0, max_threshold=0.1
|
1111
|
+
) -> List:
|
1112
|
+
"""
|
1113
|
+
Shrink overlapping boxes along the specified direction.
|
1114
|
+
|
1115
|
+
Args:
|
1116
|
+
boxes (ndarray): Array of bounding boxes represented by [[x_min, y_min, x_max, y_max]].
|
1117
|
+
direction (str): direction along which to perform the shrinking ('horizontal' or 'vertical').
|
1118
|
+
min_threshold (float): Minimum threshold for shrinking. Default is 0.
|
1119
|
+
max_threshold (float): Maximum threshold for shrinking. Default is 0.2.
|
1120
|
+
|
1121
|
+
Returns:
|
1122
|
+
list: List of tuples representing the merged intervals.
|
1123
|
+
"""
|
1124
|
+
current_block = boxes[0]
|
1125
|
+
for block in boxes[1:]:
|
1126
|
+
x1, y1, x2, y2 = current_block.bbox
|
1127
|
+
x1_prime, y1_prime, x2_prime, y2_prime = block.bbox
|
1128
|
+
cut_iou = calculate_projection_overlap_ratio(
|
1129
|
+
current_block.bbox, block.bbox, direction=direction
|
1130
|
+
)
|
1131
|
+
match_iou = calculate_projection_overlap_ratio(
|
1132
|
+
current_block.bbox,
|
1133
|
+
block.bbox,
|
1134
|
+
direction="horizontal" if direction == "vertical" else "vertical",
|
1135
|
+
)
|
1136
|
+
if direction == "vertical":
|
1137
|
+
if (
|
1138
|
+
(match_iou > 0 and cut_iou > min_threshold and cut_iou < max_threshold)
|
1139
|
+
or y2 == y1_prime
|
1140
|
+
or abs(y2 - y1_prime) <= 3
|
1141
|
+
):
|
1142
|
+
overlap_y_min = max(y1, y1_prime)
|
1143
|
+
overlap_y_max = min(y2, y2_prime)
|
1144
|
+
split_y = int((overlap_y_min + overlap_y_max) / 2)
|
1145
|
+
overlap_y_min = split_y - 1
|
1146
|
+
overlap_y_max = split_y + 1
|
1147
|
+
if y1 < y1_prime:
|
1148
|
+
current_block.bbox = [x1, y1, x2, overlap_y_min]
|
1149
|
+
block.bbox = [x1_prime, overlap_y_max, x2_prime, y2_prime]
|
1150
|
+
else:
|
1151
|
+
current_block.bbox = [x1, overlap_y_min, x2, y2]
|
1152
|
+
block.bbox = [x1_prime, y1_prime, x2_prime, overlap_y_max]
|
1153
|
+
else:
|
1154
|
+
if (
|
1155
|
+
(match_iou > 0 and cut_iou > min_threshold and cut_iou < max_threshold)
|
1156
|
+
or x2 == x1_prime
|
1157
|
+
or abs(x2 - x1_prime) <= 3
|
1158
|
+
):
|
1159
|
+
overlap_x_min = max(x1, x1_prime)
|
1160
|
+
overlap_x_max = min(x2, x2_prime)
|
1161
|
+
split_x = int((overlap_x_min + overlap_x_max) / 2)
|
1162
|
+
overlap_x_min = split_x - 1
|
1163
|
+
overlap_x_max = split_x + 1
|
1164
|
+
if x1 < x1_prime:
|
1165
|
+
current_block.bbox = [x1, y1, overlap_x_min, y2]
|
1166
|
+
block.bbox = [overlap_x_max, y1_prime, x2_prime, y2_prime]
|
1167
|
+
else:
|
1168
|
+
current_block.bbox = [overlap_x_min, y1, x2, y2]
|
1169
|
+
block.bbox = [x1_prime, y1_prime, overlap_x_max, y2_prime]
|
1170
|
+
current_block = block
|
1171
|
+
return boxes
|
1172
|
+
|
1173
|
+
|
1174
|
+
def find_local_minima_flat_regions(arr) -> List:
|
1175
|
+
"""
|
1176
|
+
Find all local minima regions in a flat array.
|
1177
|
+
|
1178
|
+
Args:
|
1179
|
+
arr (list): The input array.
|
1180
|
+
|
1181
|
+
Returns:
|
1182
|
+
list: A list of tuples containing the indices of the local minima regions.
|
1183
|
+
"""
|
1184
|
+
n = len(arr)
|
1185
|
+
if n == 0:
|
1186
|
+
return []
|
1187
|
+
|
1188
|
+
flat_minima_regions = []
|
1189
|
+
start = 0
|
1190
|
+
|
1191
|
+
for i in range(1, n):
|
1192
|
+
if arr[i] != arr[i - 1]:
|
1193
|
+
if (start == 0 or arr[start - 1] > arr[start]) and (
|
1194
|
+
i == n or arr[i] > arr[start]
|
1195
|
+
):
|
1196
|
+
flat_minima_regions.append((start, i - 1))
|
1197
|
+
start = i
|
1198
|
+
|
1199
|
+
return flat_minima_regions[1:] if len(flat_minima_regions) > 1 else None
|