paddlex 3.0.0rc1__py3-none-any.whl → 3.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- paddlex/.version +1 -1
- paddlex/__init__.py +1 -1
- paddlex/configs/modules/chart_parsing/PP-Chart2Table.yaml +13 -0
- paddlex/configs/modules/doc_vlm/PP-DocBee2-3B.yaml +14 -0
- paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-L.yaml +40 -0
- paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-M.yaml +40 -0
- paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-S.yaml +40 -0
- paddlex/configs/modules/layout_detection/PP-DocBlockLayout.yaml +40 -0
- paddlex/configs/modules/layout_detection/PP-DocLayout-L.yaml +2 -2
- paddlex/configs/modules/layout_detection/PP-DocLayout-M.yaml +2 -2
- paddlex/configs/modules/layout_detection/PP-DocLayout-S.yaml +2 -2
- paddlex/configs/modules/layout_detection/PP-DocLayout_plus-L.yaml +40 -0
- paddlex/configs/modules/text_detection/PP-OCRv5_mobile_det.yaml +40 -0
- paddlex/configs/modules/text_detection/PP-OCRv5_server_det.yaml +40 -0
- paddlex/configs/modules/text_recognition/PP-OCRv5_mobile_rec.yaml +39 -0
- paddlex/configs/modules/text_recognition/PP-OCRv5_server_rec.yaml +39 -0
- paddlex/configs/modules/textline_orientation/PP-LCNet_x1_0_textline_ori.yaml +41 -0
- paddlex/configs/pipelines/OCR.yaml +7 -6
- paddlex/configs/pipelines/PP-ChatOCRv3-doc.yaml +3 -1
- paddlex/configs/pipelines/PP-ChatOCRv4-doc.yaml +91 -34
- paddlex/configs/pipelines/PP-StructureV3.yaml +72 -72
- paddlex/configs/pipelines/doc_understanding.yaml +1 -1
- paddlex/configs/pipelines/formula_recognition.yaml +2 -2
- paddlex/configs/pipelines/layout_parsing.yaml +3 -2
- paddlex/configs/pipelines/seal_recognition.yaml +1 -0
- paddlex/configs/pipelines/table_recognition.yaml +2 -1
- paddlex/configs/pipelines/table_recognition_v2.yaml +7 -1
- paddlex/hpip_links.html +20 -20
- paddlex/inference/common/batch_sampler/doc_vlm_batch_sampler.py +33 -10
- paddlex/inference/common/batch_sampler/image_batch_sampler.py +34 -25
- paddlex/inference/common/result/mixin.py +19 -12
- paddlex/inference/models/base/predictor/base_predictor.py +2 -8
- paddlex/inference/models/common/static_infer.py +11 -59
- paddlex/inference/models/common/tokenizer/__init__.py +2 -0
- paddlex/inference/models/common/tokenizer/clip_tokenizer.py +1 -1
- paddlex/inference/models/common/tokenizer/gpt_tokenizer.py +2 -2
- paddlex/inference/models/common/tokenizer/qwen2_5_tokenizer.py +112 -0
- paddlex/inference/models/common/tokenizer/qwen2_tokenizer.py +7 -1
- paddlex/inference/models/common/tokenizer/qwen_tokenizer.py +288 -0
- paddlex/inference/models/common/tokenizer/tokenizer_utils.py +13 -13
- paddlex/inference/models/common/tokenizer/tokenizer_utils_base.py +3 -3
- paddlex/inference/models/common/tokenizer/vocab.py +7 -7
- paddlex/inference/models/common/vlm/conversion_utils.py +99 -0
- paddlex/inference/models/common/vlm/fusion_ops.py +205 -0
- paddlex/inference/models/common/vlm/generation/configuration_utils.py +1 -1
- paddlex/inference/models/common/vlm/generation/logits_process.py +1 -1
- paddlex/inference/models/common/vlm/generation/utils.py +1 -1
- paddlex/inference/models/common/vlm/transformers/configuration_utils.py +3 -3
- paddlex/inference/models/common/vlm/transformers/conversion_utils.py +3 -3
- paddlex/inference/models/common/vlm/transformers/model_outputs.py +2 -2
- paddlex/inference/models/common/vlm/transformers/model_utils.py +7 -31
- paddlex/inference/models/doc_vlm/modeling/GOT_ocr_2_0.py +830 -0
- paddlex/inference/models/doc_vlm/modeling/__init__.py +2 -0
- paddlex/inference/models/doc_vlm/modeling/qwen2.py +1606 -0
- paddlex/inference/models/doc_vlm/modeling/qwen2_5_vl.py +3006 -0
- paddlex/inference/models/doc_vlm/modeling/qwen2_vl.py +0 -105
- paddlex/inference/models/doc_vlm/predictor.py +79 -24
- paddlex/inference/models/doc_vlm/processors/GOT_ocr_2_0.py +97 -0
- paddlex/inference/models/doc_vlm/processors/__init__.py +2 -0
- paddlex/inference/models/doc_vlm/processors/common.py +189 -0
- paddlex/inference/models/doc_vlm/processors/qwen2_5_vl.py +548 -0
- paddlex/inference/models/doc_vlm/processors/qwen2_vl.py +21 -176
- paddlex/inference/models/formula_recognition/predictor.py +7 -1
- paddlex/inference/models/formula_recognition/processors.py +92 -79
- paddlex/inference/models/formula_recognition/result.py +28 -27
- paddlex/inference/models/image_feature/processors.py +3 -4
- paddlex/inference/models/keypoint_detection/predictor.py +3 -0
- paddlex/inference/models/object_detection/predictor.py +2 -0
- paddlex/inference/models/object_detection/processors.py +28 -3
- paddlex/inference/models/object_detection/utils.py +2 -0
- paddlex/inference/models/table_structure_recognition/result.py +0 -10
- paddlex/inference/models/text_detection/predictor.py +8 -0
- paddlex/inference/models/text_detection/processors.py +44 -10
- paddlex/inference/models/text_detection/result.py +0 -10
- paddlex/inference/pipelines/__init__.py +9 -5
- paddlex/inference/pipelines/_parallel.py +172 -0
- paddlex/inference/pipelines/anomaly_detection/pipeline.py +16 -6
- paddlex/inference/pipelines/attribute_recognition/pipeline.py +11 -1
- paddlex/inference/pipelines/base.py +14 -4
- paddlex/inference/pipelines/components/faisser.py +1 -1
- paddlex/inference/pipelines/doc_preprocessor/pipeline.py +53 -27
- paddlex/inference/pipelines/formula_recognition/pipeline.py +120 -82
- paddlex/inference/pipelines/formula_recognition/result.py +1 -11
- paddlex/inference/pipelines/image_classification/pipeline.py +16 -6
- paddlex/inference/pipelines/image_multilabel_classification/pipeline.py +16 -6
- paddlex/inference/pipelines/instance_segmentation/pipeline.py +16 -6
- paddlex/inference/pipelines/keypoint_detection/pipeline.py +16 -6
- paddlex/inference/pipelines/layout_parsing/pipeline.py +34 -47
- paddlex/inference/pipelines/layout_parsing/pipeline_v2.py +893 -260
- paddlex/inference/pipelines/layout_parsing/result.py +4 -17
- paddlex/inference/pipelines/layout_parsing/result_v2.py +523 -245
- paddlex/inference/pipelines/layout_parsing/setting.py +87 -0
- paddlex/inference/pipelines/layout_parsing/utils.py +565 -1998
- paddlex/inference/pipelines/layout_parsing/xycut_enhanced/__init__.py +16 -0
- paddlex/inference/pipelines/layout_parsing/xycut_enhanced/utils.py +1144 -0
- paddlex/inference/pipelines/layout_parsing/xycut_enhanced/xycuts.py +563 -0
- paddlex/inference/pipelines/m_3d_bev_detection/pipeline.py +2 -2
- paddlex/inference/pipelines/multilingual_speech_recognition/pipeline.py +2 -2
- paddlex/inference/pipelines/object_detection/pipeline.py +16 -6
- paddlex/inference/pipelines/ocr/pipeline.py +127 -70
- paddlex/inference/pipelines/ocr/result.py +19 -16
- paddlex/inference/pipelines/open_vocabulary_detection/pipeline.py +2 -2
- paddlex/inference/pipelines/open_vocabulary_segmentation/pipeline.py +2 -2
- paddlex/inference/pipelines/pp_chatocr/pipeline_base.py +2 -2
- paddlex/inference/pipelines/pp_chatocr/pipeline_v3.py +2 -5
- paddlex/inference/pipelines/pp_chatocr/pipeline_v4.py +5 -5
- paddlex/inference/pipelines/rotated_object_detection/pipeline.py +16 -6
- paddlex/inference/pipelines/seal_recognition/pipeline.py +109 -53
- paddlex/inference/pipelines/semantic_segmentation/pipeline.py +16 -6
- paddlex/inference/pipelines/small_object_detection/pipeline.py +16 -6
- paddlex/inference/pipelines/table_recognition/pipeline.py +26 -18
- paddlex/inference/pipelines/table_recognition/pipeline_v2.py +624 -53
- paddlex/inference/pipelines/table_recognition/result.py +1 -1
- paddlex/inference/pipelines/table_recognition/table_recognition_post_processing_v2.py +9 -5
- paddlex/inference/pipelines/ts_anomaly_detection/pipeline.py +2 -2
- paddlex/inference/pipelines/ts_classification/pipeline.py +2 -2
- paddlex/inference/pipelines/ts_forecasting/pipeline.py +2 -2
- paddlex/inference/pipelines/video_classification/pipeline.py +2 -2
- paddlex/inference/pipelines/video_detection/pipeline.py +2 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/_common/common.py +5 -1
- paddlex/inference/serving/basic_serving/_pipeline_apps/layout_parsing.py +0 -1
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv3_doc.py +0 -1
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv4_doc.py +1 -1
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_structurev3.py +6 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition.py +1 -5
- paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition_v2.py +4 -5
- paddlex/inference/serving/infra/utils.py +20 -22
- paddlex/inference/serving/schemas/formula_recognition.py +1 -1
- paddlex/inference/serving/schemas/layout_parsing.py +1 -2
- paddlex/inference/serving/schemas/pp_chatocrv3_doc.py +1 -2
- paddlex/inference/serving/schemas/pp_chatocrv4_doc.py +2 -2
- paddlex/inference/serving/schemas/pp_structurev3.py +10 -6
- paddlex/inference/serving/schemas/seal_recognition.py +1 -1
- paddlex/inference/serving/schemas/table_recognition.py +2 -6
- paddlex/inference/serving/schemas/table_recognition_v2.py +5 -6
- paddlex/inference/utils/hpi.py +8 -1
- paddlex/inference/utils/hpi_model_info_collection.json +81 -2
- paddlex/inference/utils/io/readers.py +12 -12
- paddlex/inference/utils/mkldnn_blocklist.py +25 -0
- paddlex/inference/utils/official_models.py +14 -0
- paddlex/inference/utils/pp_option.py +29 -8
- paddlex/model.py +2 -2
- paddlex/modules/__init__.py +1 -1
- paddlex/modules/anomaly_detection/evaluator.py +2 -2
- paddlex/modules/base/__init__.py +1 -1
- paddlex/modules/base/evaluator.py +5 -5
- paddlex/modules/base/trainer.py +1 -1
- paddlex/modules/doc_vlm/dataset_checker.py +2 -2
- paddlex/modules/doc_vlm/evaluator.py +2 -2
- paddlex/modules/doc_vlm/exportor.py +2 -2
- paddlex/modules/doc_vlm/model_list.py +1 -1
- paddlex/modules/doc_vlm/trainer.py +2 -2
- paddlex/modules/face_recognition/evaluator.py +2 -2
- paddlex/modules/formula_recognition/evaluator.py +5 -2
- paddlex/modules/formula_recognition/model_list.py +3 -0
- paddlex/modules/formula_recognition/trainer.py +3 -0
- paddlex/modules/general_recognition/evaluator.py +1 -1
- paddlex/modules/image_classification/evaluator.py +2 -2
- paddlex/modules/image_classification/model_list.py +1 -0
- paddlex/modules/instance_segmentation/evaluator.py +1 -1
- paddlex/modules/keypoint_detection/evaluator.py +1 -1
- paddlex/modules/m_3d_bev_detection/evaluator.py +2 -2
- paddlex/modules/multilabel_classification/evaluator.py +2 -2
- paddlex/modules/object_detection/dataset_checker/dataset_src/convert_dataset.py +4 -4
- paddlex/modules/object_detection/evaluator.py +2 -2
- paddlex/modules/object_detection/model_list.py +2 -0
- paddlex/modules/semantic_segmentation/evaluator.py +2 -2
- paddlex/modules/table_recognition/evaluator.py +2 -2
- paddlex/modules/text_detection/evaluator.py +2 -2
- paddlex/modules/text_detection/model_list.py +2 -0
- paddlex/modules/text_recognition/evaluator.py +2 -2
- paddlex/modules/text_recognition/model_list.py +2 -0
- paddlex/modules/ts_anomaly_detection/evaluator.py +2 -2
- paddlex/modules/ts_classification/dataset_checker/dataset_src/split_dataset.py +1 -1
- paddlex/modules/ts_classification/evaluator.py +2 -2
- paddlex/modules/ts_forecast/evaluator.py +2 -2
- paddlex/modules/video_classification/evaluator.py +2 -2
- paddlex/modules/video_detection/evaluator.py +2 -2
- paddlex/ops/__init__.py +2 -2
- paddlex/paddlex_cli.py +19 -13
- paddlex/repo_apis/Paddle3D_api/bev_fusion/model.py +2 -2
- paddlex/repo_apis/PaddleClas_api/cls/config.py +1 -1
- paddlex/repo_apis/PaddleClas_api/cls/model.py +1 -1
- paddlex/repo_apis/PaddleClas_api/cls/register.py +10 -0
- paddlex/repo_apis/PaddleClas_api/cls/runner.py +1 -1
- paddlex/repo_apis/PaddleDetection_api/instance_seg/model.py +1 -1
- paddlex/repo_apis/PaddleDetection_api/instance_seg/runner.py +1 -1
- paddlex/repo_apis/PaddleDetection_api/object_det/config.py +1 -1
- paddlex/repo_apis/PaddleDetection_api/object_det/model.py +1 -1
- paddlex/repo_apis/PaddleDetection_api/object_det/official_categories.py +25 -0
- paddlex/repo_apis/PaddleDetection_api/object_det/register.py +30 -0
- paddlex/repo_apis/PaddleDetection_api/object_det/runner.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/formula_rec/config.py +3 -3
- paddlex/repo_apis/PaddleOCR_api/formula_rec/model.py +5 -9
- paddlex/repo_apis/PaddleOCR_api/formula_rec/register.py +27 -0
- paddlex/repo_apis/PaddleOCR_api/formula_rec/runner.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/table_rec/model.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/table_rec/runner.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/text_det/model.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/text_det/register.py +18 -0
- paddlex/repo_apis/PaddleOCR_api/text_det/runner.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/text_rec/config.py +3 -3
- paddlex/repo_apis/PaddleOCR_api/text_rec/model.py +5 -9
- paddlex/repo_apis/PaddleOCR_api/text_rec/register.py +18 -0
- paddlex/repo_apis/PaddleOCR_api/text_rec/runner.py +1 -1
- paddlex/repo_apis/PaddleSeg_api/seg/model.py +1 -1
- paddlex/repo_apis/PaddleSeg_api/seg/runner.py +1 -1
- paddlex/repo_apis/PaddleTS_api/ts_ad/config.py +3 -3
- paddlex/repo_apis/PaddleTS_api/ts_cls/config.py +2 -2
- paddlex/repo_apis/PaddleTS_api/ts_fc/config.py +4 -4
- paddlex/repo_apis/PaddleVideo_api/video_cls/config.py +1 -1
- paddlex/repo_apis/PaddleVideo_api/video_cls/model.py +1 -1
- paddlex/repo_apis/PaddleVideo_api/video_cls/runner.py +1 -1
- paddlex/repo_apis/PaddleVideo_api/video_det/config.py +1 -1
- paddlex/repo_apis/PaddleVideo_api/video_det/model.py +1 -1
- paddlex/repo_apis/PaddleVideo_api/video_det/runner.py +1 -1
- paddlex/repo_apis/base/config.py +1 -1
- paddlex/repo_manager/core.py +3 -3
- paddlex/repo_manager/meta.py +6 -2
- paddlex/repo_manager/repo.py +17 -16
- paddlex/utils/custom_device_list.py +26 -2
- paddlex/utils/deps.py +1 -1
- paddlex/utils/device.py +15 -8
- paddlex/utils/env.py +4 -0
- paddlex/utils/flags.py +2 -4
- paddlex/utils/fonts/__init__.py +34 -4
- paddlex/utils/misc.py +1 -1
- {paddlex-3.0.0rc1.dist-info → paddlex-3.0.1.dist-info}/METADATA +52 -56
- {paddlex-3.0.0rc1.dist-info → paddlex-3.0.1.dist-info}/RECORD +233 -206
- {paddlex-3.0.0rc1.dist-info → paddlex-3.0.1.dist-info}/WHEEL +1 -1
- {paddlex-3.0.0rc1.dist-info → paddlex-3.0.1.dist-info}/entry_points.txt +0 -0
- {paddlex-3.0.0rc1.dist-info → paddlex-3.0.1.dist-info}/licenses/LICENSE +0 -0
- {paddlex-3.0.0rc1.dist-info → paddlex-3.0.1.dist-info}/top_level.txt +0 -0
@@ -14,12 +14,15 @@
|
|
14
14
|
from __future__ import annotations
|
15
15
|
|
16
16
|
import copy
|
17
|
+
import math
|
17
18
|
import re
|
18
|
-
from
|
19
|
+
from functools import partial
|
20
|
+
from typing import List
|
19
21
|
|
20
22
|
import numpy as np
|
21
|
-
from PIL import Image, ImageDraw
|
23
|
+
from PIL import Image, ImageDraw, ImageFont
|
22
24
|
|
25
|
+
from ....utils.fonts import PINGFANG_FONT_FILE_PATH
|
23
26
|
from ...common.result import (
|
24
27
|
BaseCVResult,
|
25
28
|
HtmlMixin,
|
@@ -27,7 +30,166 @@ from ...common.result import (
|
|
27
30
|
MarkdownMixin,
|
28
31
|
XlsxMixin,
|
29
32
|
)
|
30
|
-
from .
|
33
|
+
from .setting import BLOCK_LABEL_MAP
|
34
|
+
|
35
|
+
|
36
|
+
def compile_title_pattern():
|
37
|
+
# Precompiled regex pattern for matching numbering at the beginning of the title
|
38
|
+
numbering_pattern = (
|
39
|
+
r"(?:" + r"[1-9][0-9]*(?:\.[1-9][0-9]*)*[\.、]?|" + r"[\(\(](?:[1-9][0-9]*|["
|
40
|
+
r"一二三四五六七八九十百千万亿零壹贰叁肆伍陆柒捌玖拾]+)[\)\)]|" + r"["
|
41
|
+
r"一二三四五六七八九十百千万亿零壹贰叁肆伍陆柒捌玖拾]+"
|
42
|
+
r"[、\.]?|" + r"(?:I|II|III|IV|V|VI|VII|VIII|IX|X)\.?" + r")"
|
43
|
+
)
|
44
|
+
return re.compile(r"^\s*(" + numbering_pattern + r")(\s*)(.*)$")
|
45
|
+
|
46
|
+
|
47
|
+
TITLE_RE_PATTERN = compile_title_pattern()
|
48
|
+
|
49
|
+
|
50
|
+
def format_title_func(block):
|
51
|
+
"""
|
52
|
+
Normalize chapter title.
|
53
|
+
Add the '#' to indicate the level of the title.
|
54
|
+
If numbering exists, ensure there's exactly one space between it and the title content.
|
55
|
+
If numbering does not exist, return the original title unchanged.
|
56
|
+
|
57
|
+
:param title: Original chapter title string.
|
58
|
+
:return: Normalized chapter title string.
|
59
|
+
"""
|
60
|
+
title = block.content
|
61
|
+
match = TITLE_RE_PATTERN.match(title)
|
62
|
+
if match:
|
63
|
+
numbering = match.group(1).strip()
|
64
|
+
title_content = match.group(3).lstrip()
|
65
|
+
# Return numbering and title content separated by one space
|
66
|
+
title = numbering + " " + title_content
|
67
|
+
|
68
|
+
title = title.rstrip(".")
|
69
|
+
level = (
|
70
|
+
title.count(
|
71
|
+
".",
|
72
|
+
)
|
73
|
+
+ 1
|
74
|
+
if "." in title
|
75
|
+
else 1
|
76
|
+
)
|
77
|
+
return f"#{'#' * level} {title}".replace("-\n", "").replace(
|
78
|
+
"\n",
|
79
|
+
" ",
|
80
|
+
)
|
81
|
+
|
82
|
+
|
83
|
+
def format_centered_by_html(string):
|
84
|
+
return (
|
85
|
+
f'<div style="text-align: center;">{string}</div>'.replace(
|
86
|
+
"-\n",
|
87
|
+
"",
|
88
|
+
).replace("\n", " ")
|
89
|
+
+ "\n"
|
90
|
+
)
|
91
|
+
|
92
|
+
|
93
|
+
def format_text_plain_func(block):
|
94
|
+
return block.content
|
95
|
+
|
96
|
+
|
97
|
+
def format_image_scaled_by_html_func(block, original_image_width):
|
98
|
+
img_tags = []
|
99
|
+
image_path = block.image["path"]
|
100
|
+
image_width = block.image["img"].width
|
101
|
+
scale = int(image_width / original_image_width * 100)
|
102
|
+
img_tags.append(
|
103
|
+
'<img src="{}" alt="Image" width="{}%" />'.format(
|
104
|
+
image_path.replace("-\n", "").replace("\n", " "), scale
|
105
|
+
),
|
106
|
+
)
|
107
|
+
return "\n".join(img_tags)
|
108
|
+
|
109
|
+
|
110
|
+
def format_image_plain_func(block):
|
111
|
+
img_tags = []
|
112
|
+
image_path = block.image["path"]
|
113
|
+
img_tags.append("".format(image_path.replace("-\n", "").replace("\n", " ")))
|
114
|
+
return "\n".join(img_tags)
|
115
|
+
|
116
|
+
|
117
|
+
def format_chart2table_func(block):
|
118
|
+
lines_list = block.content.split("\n")
|
119
|
+
column_num = len(lines_list[0].split("|"))
|
120
|
+
lines_list.insert(1, "|".join(["---"] * column_num))
|
121
|
+
lines_list = [f"|{line}|" for line in lines_list]
|
122
|
+
return "\n".join(lines_list)
|
123
|
+
|
124
|
+
|
125
|
+
def simplify_table_func(table_code):
|
126
|
+
return "\n" + table_code.replace("<html>", "").replace("</html>", "").replace(
|
127
|
+
"<body>", ""
|
128
|
+
).replace("</body>", "")
|
129
|
+
|
130
|
+
|
131
|
+
def format_first_line_func(block, templates, format_func, spliter):
|
132
|
+
lines = block.content.split(spliter)
|
133
|
+
for idx in range(len(lines)):
|
134
|
+
line = lines[idx]
|
135
|
+
if line.strip() == "":
|
136
|
+
continue
|
137
|
+
if line.lower() in templates:
|
138
|
+
lines[idx] = format_func(line)
|
139
|
+
break
|
140
|
+
return spliter.join(lines)
|
141
|
+
|
142
|
+
|
143
|
+
def get_seg_flag(block: LayoutParsingBlock, prev_block: LayoutParsingBlock):
|
144
|
+
|
145
|
+
seg_start_flag = True
|
146
|
+
seg_end_flag = True
|
147
|
+
|
148
|
+
block_box = block.bbox
|
149
|
+
context_left_coordinate = block_box[0]
|
150
|
+
context_right_coordinate = block_box[2]
|
151
|
+
seg_start_coordinate = block.seg_start_coordinate
|
152
|
+
seg_end_coordinate = block.seg_end_coordinate
|
153
|
+
|
154
|
+
if prev_block is not None:
|
155
|
+
prev_block_bbox = prev_block.bbox
|
156
|
+
num_of_prev_lines = prev_block.num_of_lines
|
157
|
+
pre_block_seg_end_coordinate = prev_block.seg_end_coordinate
|
158
|
+
prev_end_space_small = (
|
159
|
+
abs(prev_block_bbox[2] - pre_block_seg_end_coordinate) < 10
|
160
|
+
)
|
161
|
+
prev_lines_more_than_one = num_of_prev_lines > 1
|
162
|
+
|
163
|
+
overlap_blocks = context_left_coordinate < prev_block_bbox[2]
|
164
|
+
|
165
|
+
# update context_left_coordinate and context_right_coordinate
|
166
|
+
if overlap_blocks:
|
167
|
+
context_left_coordinate = min(prev_block_bbox[0], context_left_coordinate)
|
168
|
+
context_right_coordinate = max(prev_block_bbox[2], context_right_coordinate)
|
169
|
+
prev_end_space_small = (
|
170
|
+
abs(context_right_coordinate - pre_block_seg_end_coordinate) < 10
|
171
|
+
)
|
172
|
+
edge_distance = 0
|
173
|
+
else:
|
174
|
+
edge_distance = abs(block_box[0] - prev_block_bbox[2])
|
175
|
+
|
176
|
+
current_start_space_small = seg_start_coordinate - context_left_coordinate < 10
|
177
|
+
|
178
|
+
if (
|
179
|
+
prev_end_space_small
|
180
|
+
and current_start_space_small
|
181
|
+
and prev_lines_more_than_one
|
182
|
+
and edge_distance < max(prev_block.width, block.width)
|
183
|
+
):
|
184
|
+
seg_start_flag = False
|
185
|
+
else:
|
186
|
+
if seg_start_coordinate - context_left_coordinate < 10:
|
187
|
+
seg_start_flag = False
|
188
|
+
|
189
|
+
if context_right_coordinate - seg_end_coordinate < 10:
|
190
|
+
seg_end_flag = False
|
191
|
+
|
192
|
+
return seg_start_flag, seg_end_flag
|
31
193
|
|
32
194
|
|
33
195
|
class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
|
@@ -40,30 +202,10 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
|
|
40
202
|
XlsxMixin.__init__(self)
|
41
203
|
MarkdownMixin.__init__(self)
|
42
204
|
JsonMixin.__init__(self)
|
43
|
-
self.title_pattern = self._build_title_pattern()
|
44
|
-
|
45
|
-
def _build_title_pattern(self):
|
46
|
-
# Precompiled regex pattern for matching numbering at the beginning of the title
|
47
|
-
numbering_pattern = (
|
48
|
-
r"(?:"
|
49
|
-
+ r"[1-9][0-9]*(?:\.[1-9][0-9]*)*[\.、]?|"
|
50
|
-
+ r"[\(\(](?:[1-9][0-9]*|["
|
51
|
-
r"一二三四五六七八九十百千万亿零壹贰叁肆伍陆柒捌玖拾]+)[\)\)]|" + r"["
|
52
|
-
r"一二三四五六七八九十百千万亿零壹贰叁肆伍陆柒捌玖拾]+"
|
53
|
-
r"[、\.]?|" + r"(?:I|II|III|IV|V|VI|VII|VIII|IX|X)\.?" + r")"
|
54
|
-
)
|
55
|
-
return re.compile(r"^\s*(" + numbering_pattern + r")(\s*)(.*)$")
|
56
|
-
|
57
|
-
def _get_input_fn(self):
|
58
|
-
fn = super()._get_input_fn()
|
59
|
-
if (page_idx := self["page_index"]) is not None:
|
60
|
-
fp = Path(fn)
|
61
|
-
stem, suffix = fp.stem, fp.suffix
|
62
|
-
return f"{stem}_{page_idx}{suffix}"
|
63
|
-
else:
|
64
|
-
return fn
|
65
205
|
|
66
206
|
def _to_img(self) -> dict[str, np.ndarray]:
|
207
|
+
from .utils import get_show_color
|
208
|
+
|
67
209
|
res_img_dict = {}
|
68
210
|
model_settings = self["model_settings"]
|
69
211
|
if model_settings["use_doc_preprocessor"]:
|
@@ -71,12 +213,14 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
|
|
71
213
|
res_img_dict[key] = value
|
72
214
|
res_img_dict["layout_det_res"] = self["layout_det_res"].img["res"]
|
73
215
|
|
74
|
-
if model_settings["
|
75
|
-
res_img_dict["
|
216
|
+
if model_settings["use_region_detection"]:
|
217
|
+
res_img_dict["region_det_res"] = self["region_det_res"].img["res"]
|
218
|
+
|
219
|
+
res_img_dict["overall_ocr_res"] = self["overall_ocr_res"].img["ocr_res_img"]
|
76
220
|
|
77
221
|
if model_settings["use_table_recognition"] and len(self["table_res_list"]) > 0:
|
78
222
|
table_cell_img = Image.fromarray(
|
79
|
-
copy.deepcopy(self["doc_preprocessor_res"]["output_img"])
|
223
|
+
copy.deepcopy(self["doc_preprocessor_res"]["output_img"][:, :, ::-1])
|
80
224
|
)
|
81
225
|
table_draw = ImageDraw.Draw(table_cell_img)
|
82
226
|
rectangle_color = (255, 0, 0)
|
@@ -101,16 +245,23 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
|
|
101
245
|
# for layout ordering image
|
102
246
|
image = Image.fromarray(self["doc_preprocessor_res"]["output_img"][:, :, ::-1])
|
103
247
|
draw = ImageDraw.Draw(image, "RGBA")
|
104
|
-
|
248
|
+
font_size = int(0.018 * int(image.width)) + 2
|
249
|
+
font = ImageFont.truetype(PINGFANG_FONT_FILE_PATH, font_size, encoding="utf-8")
|
250
|
+
parsing_result: List[LayoutParsingBlock] = self["parsing_res_list"]
|
105
251
|
for block in parsing_result:
|
106
|
-
bbox = block
|
107
|
-
index = block.
|
108
|
-
label = block
|
109
|
-
fill_color = get_show_color(label)
|
252
|
+
bbox = block.bbox
|
253
|
+
index = block.order_index
|
254
|
+
label = block.label
|
255
|
+
fill_color = get_show_color(label, False)
|
110
256
|
draw.rectangle(bbox, fill=fill_color)
|
111
257
|
if index is not None:
|
112
|
-
text_position = (bbox[2] + 2, bbox[1] -
|
113
|
-
|
258
|
+
text_position = (bbox[2] + 2, bbox[1] - font_size // 2)
|
259
|
+
if int(image.width) - bbox[2] < font_size:
|
260
|
+
text_position = (
|
261
|
+
int(bbox[2] - font_size * 1.1),
|
262
|
+
bbox[1] - font_size // 2,
|
263
|
+
)
|
264
|
+
draw.text(text_position, str(index), font=font, fill="red")
|
114
265
|
|
115
266
|
res_img_dict["layout_order_res"] = image
|
116
267
|
|
@@ -134,8 +285,7 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
|
|
134
285
|
if self["model_settings"]["use_doc_preprocessor"]:
|
135
286
|
data["doc_preprocessor_res"] = self["doc_preprocessor_res"].str["res"]
|
136
287
|
data["layout_det_res"] = self["layout_det_res"].str["res"]
|
137
|
-
|
138
|
-
data["overall_ocr_res"] = self["overall_ocr_res"].str["res"]
|
288
|
+
data["overall_ocr_res"] = self["overall_ocr_res"].str["res"]
|
139
289
|
if model_settings["use_table_recognition"] and len(self["table_res_list"]) > 0:
|
140
290
|
data["table_res_list"] = []
|
141
291
|
for sno in range(len(self["table_res_list"])):
|
@@ -176,9 +326,9 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
|
|
176
326
|
parsing_res_list = self["parsing_res_list"]
|
177
327
|
parsing_res_list = [
|
178
328
|
{
|
179
|
-
"block_label": parsing_res
|
180
|
-
"block_content": parsing_res
|
181
|
-
"block_bbox": parsing_res
|
329
|
+
"block_label": parsing_res.label,
|
330
|
+
"block_content": parsing_res.content,
|
331
|
+
"block_bbox": parsing_res.bbox,
|
182
332
|
}
|
183
333
|
for parsing_res in parsing_res_list
|
184
334
|
]
|
@@ -186,8 +336,7 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
|
|
186
336
|
if self["model_settings"]["use_doc_preprocessor"]:
|
187
337
|
data["doc_preprocessor_res"] = self["doc_preprocessor_res"].json["res"]
|
188
338
|
data["layout_det_res"] = self["layout_det_res"].json["res"]
|
189
|
-
|
190
|
-
data["overall_ocr_res"] = self["overall_ocr_res"].json["res"]
|
339
|
+
data["overall_ocr_res"] = self["overall_ocr_res"].json["res"]
|
191
340
|
if model_settings["use_table_recognition"] and len(self["table_res_list"]) > 0:
|
192
341
|
data["table_res_list"] = []
|
193
342
|
for sno in range(len(self["table_res_list"])):
|
@@ -240,228 +389,357 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
|
|
240
389
|
res_xlsx_dict[key] = table_res.xlsx["pred"]
|
241
390
|
return res_xlsx_dict
|
242
391
|
|
243
|
-
def _to_markdown(self) -> dict:
|
392
|
+
def _to_markdown(self, pretty=True) -> dict:
|
244
393
|
"""
|
245
394
|
Save the parsing result to a Markdown file.
|
246
395
|
|
396
|
+
Args:
|
397
|
+
pretty (Optional[bool]): whether to pretty markdown by HTML, default by True.
|
398
|
+
|
247
399
|
Returns:
|
248
400
|
Dict
|
249
401
|
"""
|
402
|
+
original_image_width = self["doc_preprocessor_res"]["output_img"].shape[1]
|
250
403
|
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
:param title: Original chapter title string.
|
261
|
-
:return: Normalized chapter title string.
|
262
|
-
"""
|
263
|
-
match = self.title_pattern.match(title)
|
264
|
-
if match:
|
265
|
-
numbering = match.group(1).strip()
|
266
|
-
title_content = match.group(3).lstrip()
|
267
|
-
# Return numbering and title content separated by one space
|
268
|
-
title = numbering + " " + title_content
|
269
|
-
|
270
|
-
title = title.rstrip(".")
|
271
|
-
level = (
|
272
|
-
title.count(
|
273
|
-
".",
|
274
|
-
)
|
275
|
-
+ 1
|
276
|
-
if "." in title
|
277
|
-
else 1
|
278
|
-
)
|
279
|
-
return f"#{'#' * level} {title}".replace("-\n", "").replace(
|
280
|
-
"\n",
|
281
|
-
" ",
|
282
|
-
)
|
283
|
-
|
284
|
-
def format_centered_text(key):
|
285
|
-
return (
|
286
|
-
f'<div style="text-align: center;">{block[key]}</div>'.replace(
|
287
|
-
"-\n",
|
288
|
-
"",
|
289
|
-
).replace("\n", " ")
|
290
|
-
+ "\n"
|
404
|
+
if pretty:
|
405
|
+
format_text_func = lambda block: format_centered_by_html(
|
406
|
+
format_text_plain_func(block)
|
407
|
+
)
|
408
|
+
format_image_func = lambda block: format_centered_by_html(
|
409
|
+
format_image_scaled_by_html_func(
|
410
|
+
block,
|
411
|
+
original_image_width=original_image_width,
|
291
412
|
)
|
413
|
+
)
|
414
|
+
else:
|
415
|
+
format_text_func = lambda block: block.content
|
416
|
+
format_image_func = format_image_plain_func
|
292
417
|
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
'<div style="text-align: center;"><img src="{}" alt="Image" /></div>'.format(
|
298
|
-
image_path.replace("-\n", "").replace("\n", " "),
|
299
|
-
),
|
300
|
-
)
|
301
|
-
return "\n".join(img_tags)
|
302
|
-
|
303
|
-
def format_first_line(templates, format_func, spliter):
|
304
|
-
lines = block["block_content"].split(spliter)
|
305
|
-
for idx in range(len(lines)):
|
306
|
-
line = lines[idx]
|
307
|
-
if line.strip() == "":
|
308
|
-
continue
|
309
|
-
if line.lower() in templates:
|
310
|
-
lines[idx] = format_func(line)
|
311
|
-
break
|
312
|
-
return spliter.join(lines)
|
313
|
-
|
314
|
-
def format_table():
|
315
|
-
return "\n" + block["block_content"]
|
316
|
-
|
317
|
-
def get_seg_flag(block, prev_block):
|
318
|
-
|
319
|
-
seg_start_flag = True
|
320
|
-
seg_end_flag = True
|
321
|
-
|
322
|
-
block_box = block["block_bbox"]
|
323
|
-
context_left_coordinate = block_box[0]
|
324
|
-
context_right_coordinate = block_box[2]
|
325
|
-
seg_start_coordinate = block.get("seg_start_coordinate")
|
326
|
-
seg_end_coordinate = block.get("seg_end_coordinate")
|
327
|
-
|
328
|
-
if prev_block is not None:
|
329
|
-
prev_block_bbox = prev_block["block_bbox"]
|
330
|
-
num_of_prev_lines = prev_block.get("num_of_lines")
|
331
|
-
pre_block_seg_end_coordinate = prev_block.get("seg_end_coordinate")
|
332
|
-
prev_end_space_small = (
|
333
|
-
context_right_coordinate - pre_block_seg_end_coordinate < 10
|
334
|
-
)
|
335
|
-
prev_lines_more_than_one = num_of_prev_lines > 1
|
336
|
-
|
337
|
-
overlap_blocks = context_left_coordinate < prev_block_bbox[2]
|
338
|
-
|
339
|
-
# update context_left_coordinate and context_right_coordinate
|
340
|
-
if overlap_blocks:
|
341
|
-
context_left_coordinate = min(
|
342
|
-
prev_block_bbox[0], context_left_coordinate
|
343
|
-
)
|
344
|
-
context_right_coordinate = max(
|
345
|
-
prev_block_bbox[2], context_right_coordinate
|
346
|
-
)
|
347
|
-
prev_end_space_small = (
|
348
|
-
prev_block_bbox[2] - pre_block_seg_end_coordinate < 10
|
349
|
-
)
|
350
|
-
|
351
|
-
current_start_space_small = (
|
352
|
-
seg_start_coordinate - context_left_coordinate < 10
|
353
|
-
)
|
418
|
+
if self["model_settings"].get("use_chart_recognition", False):
|
419
|
+
format_chart_func = format_chart2table_func
|
420
|
+
else:
|
421
|
+
format_chart_func = format_image_func
|
354
422
|
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
and prev_lines_more_than_one
|
359
|
-
):
|
360
|
-
seg_start_flag = False
|
361
|
-
else:
|
362
|
-
if seg_start_coordinate - context_left_coordinate < 10:
|
363
|
-
seg_start_flag = False
|
364
|
-
|
365
|
-
if context_right_coordinate - seg_end_coordinate < 10:
|
366
|
-
seg_end_flag = False
|
367
|
-
|
368
|
-
return seg_start_flag, seg_end_flag
|
369
|
-
|
370
|
-
handlers = {
|
371
|
-
"paragraph_title": lambda: format_title(block["block_content"]),
|
372
|
-
"doc_title": lambda: f"# {block['block_content']}".replace(
|
373
|
-
"-\n",
|
374
|
-
"",
|
375
|
-
).replace("\n", " "),
|
376
|
-
"table_title": lambda: format_centered_text("block_content"),
|
377
|
-
"figure_title": lambda: format_centered_text("block_content"),
|
378
|
-
"chart_title": lambda: format_centered_text("block_content"),
|
379
|
-
"text": lambda: block["block_content"]
|
380
|
-
.replace("-\n", " ")
|
381
|
-
.replace("\n", " "),
|
382
|
-
"abstract": lambda: format_first_line(
|
383
|
-
["摘要", "abstract"], lambda l: f"## {l}\n", " "
|
384
|
-
),
|
385
|
-
"content": lambda: block["block_content"]
|
386
|
-
.replace("-\n", " \n")
|
387
|
-
.replace("\n", " \n"),
|
388
|
-
"image": lambda: format_image("block_image"),
|
389
|
-
"chart": lambda: format_image("block_image"),
|
390
|
-
"formula": lambda: f"$${block['block_content']}$$",
|
391
|
-
"table": format_table,
|
392
|
-
"reference": lambda: format_first_line(
|
393
|
-
["参考文献", "references"], lambda l: f"## {l}", "\n"
|
394
|
-
),
|
395
|
-
"algorithm": lambda: block["block_content"].strip("\n"),
|
396
|
-
"seal": lambda: f"Words of Seals:\n{block['block_content']}",
|
397
|
-
}
|
398
|
-
parsing_res_list = obj["parsing_res_list"]
|
399
|
-
markdown_content = ""
|
400
|
-
last_label = None
|
401
|
-
seg_start_flag = None
|
402
|
-
seg_end_flag = None
|
403
|
-
prev_block = None
|
404
|
-
page_first_element_seg_start_flag = None
|
405
|
-
page_last_element_seg_end_flag = None
|
406
|
-
parsing_res_list = sorted(
|
407
|
-
parsing_res_list,
|
408
|
-
key=lambda x: x.get("sub_index", 999),
|
423
|
+
if self["model_settings"].get("use_seal_recognition", False):
|
424
|
+
format_seal_func = lambda block: "\n".join(
|
425
|
+
[format_image_func(block), format_text_func(block)]
|
409
426
|
)
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
427
|
+
else:
|
428
|
+
format_seal_func = format_image_func
|
429
|
+
|
430
|
+
if self["model_settings"].get("use_table_recognition", False):
|
431
|
+
if pretty:
|
432
|
+
format_table_func = lambda block: "\n" + format_text_func(
|
433
|
+
block
|
434
|
+
).replace("<table>", '<table border="1">')
|
435
|
+
else:
|
436
|
+
format_table_func = lambda block: simplify_table_func(
|
437
|
+
"\n" + block.content
|
418
438
|
)
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
439
|
+
else:
|
440
|
+
format_table_func = format_image_func
|
441
|
+
|
442
|
+
if self["model_settings"].get("use_formula_recognition", False):
|
443
|
+
format_formula_func = lambda block: f"$${block.content}$$"
|
444
|
+
else:
|
445
|
+
format_formula_func = format_image_func
|
446
|
+
|
447
|
+
handle_funcs_dict = {
|
448
|
+
"paragraph_title": format_title_func,
|
449
|
+
"abstract_title": format_title_func,
|
450
|
+
"reference_title": format_title_func,
|
451
|
+
"content_title": format_title_func,
|
452
|
+
"doc_title": lambda block: f"# {block.content}".replace(
|
453
|
+
"-\n",
|
454
|
+
"",
|
455
|
+
).replace("\n", " "),
|
456
|
+
"table_title": format_text_func,
|
457
|
+
"figure_title": format_text_func,
|
458
|
+
"chart_title": format_text_func,
|
459
|
+
"text": lambda block: block.content.replace("\n\n", "\n").replace(
|
460
|
+
"\n", "\n\n"
|
461
|
+
),
|
462
|
+
"abstract": partial(
|
463
|
+
format_first_line_func,
|
464
|
+
templates=["摘要", "abstract"],
|
465
|
+
format_func=lambda l: f"## {l}\n",
|
466
|
+
spliter=" ",
|
467
|
+
),
|
468
|
+
"content": lambda block: block.content.replace("-\n", " \n").replace(
|
469
|
+
"\n", " \n"
|
470
|
+
),
|
471
|
+
"image": format_image_func,
|
472
|
+
"chart": format_chart_func,
|
473
|
+
"formula": format_formula_func,
|
474
|
+
"table": format_table_func,
|
475
|
+
"reference": partial(
|
476
|
+
format_first_line_func,
|
477
|
+
templates=["参考文献", "references"],
|
478
|
+
format_func=lambda l: f"## {l}",
|
479
|
+
spliter="\n",
|
480
|
+
),
|
481
|
+
"algorithm": lambda block: block.content.strip("\n"),
|
482
|
+
"seal": format_seal_func,
|
483
|
+
}
|
484
|
+
|
485
|
+
markdown_content = ""
|
486
|
+
last_label = None
|
487
|
+
seg_start_flag = None
|
488
|
+
seg_end_flag = None
|
489
|
+
prev_block = None
|
490
|
+
page_first_element_seg_start_flag = None
|
491
|
+
page_last_element_seg_end_flag = None
|
492
|
+
markdown_info = {}
|
493
|
+
markdown_info["markdown_images"] = {}
|
494
|
+
for block in self["parsing_res_list"]:
|
495
|
+
seg_start_flag, seg_end_flag = get_seg_flag(block, prev_block)
|
496
|
+
|
497
|
+
label = block.label
|
498
|
+
if block.image is not None:
|
499
|
+
markdown_info["markdown_images"][block.image["path"]] = block.image[
|
500
|
+
"img"
|
501
|
+
]
|
502
|
+
page_first_element_seg_start_flag = (
|
503
|
+
seg_start_flag
|
504
|
+
if (page_first_element_seg_start_flag is None)
|
505
|
+
else page_first_element_seg_start_flag
|
451
506
|
)
|
452
507
|
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
508
|
+
handle_func = handle_funcs_dict.get(label, None)
|
509
|
+
if handle_func:
|
510
|
+
prev_block = block
|
511
|
+
if label == last_label == "text" and seg_start_flag == False:
|
512
|
+
markdown_content += handle_func(block)
|
513
|
+
else:
|
514
|
+
markdown_content += (
|
515
|
+
"\n\n" + handle_func(block)
|
516
|
+
if markdown_content
|
517
|
+
else handle_func(block)
|
518
|
+
)
|
519
|
+
last_label = label
|
520
|
+
page_last_element_seg_end_flag = seg_end_flag
|
521
|
+
|
522
|
+
markdown_info["markdown_texts"] = markdown_content
|
458
523
|
markdown_info["page_continuation_flags"] = (
|
459
524
|
page_first_element_seg_start_flag,
|
460
525
|
page_last_element_seg_end_flag,
|
461
526
|
)
|
462
|
-
|
463
|
-
markdown_info["markdown_images"] = {}
|
464
527
|
for img in self["imgs_in_doc"]:
|
465
528
|
markdown_info["markdown_images"][img["path"]] = img["img"]
|
466
529
|
|
467
530
|
return markdown_info
|
531
|
+
|
532
|
+
|
533
|
+
class LayoutParsingBlock:
|
534
|
+
|
535
|
+
def __init__(self, label, bbox, content="") -> None:
|
536
|
+
self.label = label
|
537
|
+
self.order_label = None
|
538
|
+
self.bbox = list(map(int, bbox))
|
539
|
+
self.content = content
|
540
|
+
self.seg_start_coordinate = float("inf")
|
541
|
+
self.seg_end_coordinate = float("-inf")
|
542
|
+
self.width = bbox[2] - bbox[0]
|
543
|
+
self.height = bbox[3] - bbox[1]
|
544
|
+
self.area = self.width * self.height
|
545
|
+
self.num_of_lines = 1
|
546
|
+
self.image = None
|
547
|
+
self.index = None
|
548
|
+
self.order_index = None
|
549
|
+
self.text_line_width = 1
|
550
|
+
self.text_line_height = 1
|
551
|
+
self.direction = self.get_bbox_direction()
|
552
|
+
self.child_blocks = []
|
553
|
+
self.update_direction_info()
|
554
|
+
|
555
|
+
def __str__(self) -> str:
|
556
|
+
return f"{self.__dict__}"
|
557
|
+
|
558
|
+
def __repr__(self) -> str:
|
559
|
+
_str = f"\n\n#################\nindex:\t{self.index}\nlabel:\t{self.label}\nregion_label:\t{self.order_label}\nbbox:\t{self.bbox}\ncontent:\t{self.content}\n#################"
|
560
|
+
return _str
|
561
|
+
|
562
|
+
def to_dict(self) -> dict:
|
563
|
+
return self.__dict__
|
564
|
+
|
565
|
+
def update_direction_info(self) -> None:
|
566
|
+
if self.direction == "horizontal":
|
567
|
+
self.secondary_direction = "vertical"
|
568
|
+
self.short_side_length = self.height
|
569
|
+
self.long_side_length = self.width
|
570
|
+
self.start_coordinate = self.bbox[0]
|
571
|
+
self.end_coordinate = self.bbox[2]
|
572
|
+
self.secondary_direction_start_coordinate = self.bbox[1]
|
573
|
+
self.secondary_direction_end_coordinate = self.bbox[3]
|
574
|
+
else:
|
575
|
+
self.secondary_direction = "horizontal"
|
576
|
+
self.short_side_length = self.width
|
577
|
+
self.long_side_length = self.height
|
578
|
+
self.start_coordinate = self.bbox[1]
|
579
|
+
self.end_coordinate = self.bbox[3]
|
580
|
+
self.secondary_direction_start_coordinate = self.bbox[0]
|
581
|
+
self.secondary_direction_end_coordinate = self.bbox[2]
|
582
|
+
|
583
|
+
def append_child_block(self, child_block: LayoutParsingBlock) -> None:
|
584
|
+
if not self.child_blocks:
|
585
|
+
self.ori_bbox = self.bbox.copy()
|
586
|
+
x1, y1, x2, y2 = self.bbox
|
587
|
+
x1_child, y1_child, x2_child, y2_child = child_block.bbox
|
588
|
+
union_bbox = (
|
589
|
+
min(x1, x1_child),
|
590
|
+
min(y1, y1_child),
|
591
|
+
max(x2, x2_child),
|
592
|
+
max(y2, y2_child),
|
593
|
+
)
|
594
|
+
self.bbox = union_bbox
|
595
|
+
self.update_direction_info()
|
596
|
+
child_blocks = [child_block]
|
597
|
+
if child_block.child_blocks:
|
598
|
+
child_blocks.extend(child_block.get_child_blocks())
|
599
|
+
self.child_blocks.extend(child_blocks)
|
600
|
+
|
601
|
+
def get_child_blocks(self) -> list:
|
602
|
+
self.bbox = self.ori_bbox
|
603
|
+
child_blocks = self.child_blocks.copy()
|
604
|
+
self.child_blocks = []
|
605
|
+
return child_blocks
|
606
|
+
|
607
|
+
def get_centroid(self) -> tuple:
|
608
|
+
x1, y1, x2, y2 = self.bbox
|
609
|
+
centroid = ((x1 + x2) / 2, (y1 + y2) / 2)
|
610
|
+
return centroid
|
611
|
+
|
612
|
+
def get_bbox_direction(self, direction_ratio: float = 1.0) -> bool:
|
613
|
+
"""
|
614
|
+
Determine if a bounding box is horizontal or vertical.
|
615
|
+
|
616
|
+
Args:
|
617
|
+
bbox (List[float]): Bounding box [x_min, y_min, x_max, y_max].
|
618
|
+
direction_ratio (float): Ratio for determining direction. Default is 1.0.
|
619
|
+
|
620
|
+
Returns:
|
621
|
+
str: "horizontal" or "vertical".
|
622
|
+
"""
|
623
|
+
return (
|
624
|
+
"horizontal" if self.width * direction_ratio >= self.height else "vertical"
|
625
|
+
)
|
626
|
+
|
627
|
+
|
628
|
+
class LayoutParsingRegion:
|
629
|
+
|
630
|
+
def __init__(
|
631
|
+
self, bbox, blocks: List[LayoutParsingBlock] = [], image_shape=None
|
632
|
+
) -> None:
|
633
|
+
self.bbox = bbox
|
634
|
+
self.block_map = {}
|
635
|
+
self.direction = "horizontal"
|
636
|
+
self.calculate_bbox_metrics(image_shape)
|
637
|
+
self.doc_title_block_idxes = []
|
638
|
+
self.paragraph_title_block_idxes = []
|
639
|
+
self.vision_block_idxes = []
|
640
|
+
self.unordered_block_idxes = []
|
641
|
+
self.vision_title_block_idxes = []
|
642
|
+
self.normal_text_block_idxes = []
|
643
|
+
self.header_block_idxes = []
|
644
|
+
self.footer_block_idxes = []
|
645
|
+
self.text_line_width = 20
|
646
|
+
self.text_line_height = 10
|
647
|
+
self.init_region_info_from_layout(blocks)
|
648
|
+
self.init_direction_info()
|
649
|
+
|
650
|
+
def init_region_info_from_layout(self, blocks: List[LayoutParsingBlock]):
|
651
|
+
horizontal_normal_text_block_num = 0
|
652
|
+
text_line_height_list = []
|
653
|
+
text_line_width_list = []
|
654
|
+
for idx, block in enumerate(blocks):
|
655
|
+
self.block_map[idx] = block
|
656
|
+
block.index = idx
|
657
|
+
if block.label in BLOCK_LABEL_MAP["header_labels"]:
|
658
|
+
self.header_block_idxes.append(idx)
|
659
|
+
elif block.label in BLOCK_LABEL_MAP["doc_title_labels"]:
|
660
|
+
self.doc_title_block_idxes.append(idx)
|
661
|
+
elif block.label in BLOCK_LABEL_MAP["paragraph_title_labels"]:
|
662
|
+
self.paragraph_title_block_idxes.append(idx)
|
663
|
+
elif block.label in BLOCK_LABEL_MAP["vision_labels"]:
|
664
|
+
self.vision_block_idxes.append(idx)
|
665
|
+
elif block.label in BLOCK_LABEL_MAP["vision_title_labels"]:
|
666
|
+
self.vision_title_block_idxes.append(idx)
|
667
|
+
elif block.label in BLOCK_LABEL_MAP["footer_labels"]:
|
668
|
+
self.footer_block_idxes.append(idx)
|
669
|
+
elif block.label in BLOCK_LABEL_MAP["unordered_labels"]:
|
670
|
+
self.unordered_block_idxes.append(idx)
|
671
|
+
else:
|
672
|
+
self.normal_text_block_idxes.append(idx)
|
673
|
+
text_line_height_list.append(block.text_line_height)
|
674
|
+
text_line_width_list.append(block.text_line_width)
|
675
|
+
if block.direction == "horizontal":
|
676
|
+
horizontal_normal_text_block_num += 1
|
677
|
+
self.direction = (
|
678
|
+
"horizontal"
|
679
|
+
if horizontal_normal_text_block_num
|
680
|
+
>= len(self.normal_text_block_idxes) * 0.5
|
681
|
+
else "vertical"
|
682
|
+
)
|
683
|
+
self.text_line_width = (
|
684
|
+
np.mean(text_line_width_list) if text_line_width_list else 20
|
685
|
+
)
|
686
|
+
self.text_line_height = (
|
687
|
+
np.mean(text_line_height_list) if text_line_height_list else 10
|
688
|
+
)
|
689
|
+
|
690
|
+
def init_direction_info(self):
|
691
|
+
if self.direction == "horizontal":
|
692
|
+
self.direction_start_index = 0
|
693
|
+
self.direction_end_index = 2
|
694
|
+
self.secondary_direction_start_index = 1
|
695
|
+
self.secondary_direction_end_index = 3
|
696
|
+
self.secondary_direction = "vertical"
|
697
|
+
else:
|
698
|
+
self.direction_start_index = 1
|
699
|
+
self.direction_end_index = 3
|
700
|
+
self.secondary_direction_start_index = 0
|
701
|
+
self.secondary_direction_end_index = 2
|
702
|
+
self.secondary_direction = "horizontal"
|
703
|
+
|
704
|
+
self.direction_center_coordinate = (
|
705
|
+
self.bbox[self.direction_start_index] + self.bbox[self.direction_end_index]
|
706
|
+
) / 2
|
707
|
+
self.secondary_direction_center_coordinate = (
|
708
|
+
self.bbox[self.secondary_direction_start_index]
|
709
|
+
+ self.bbox[self.secondary_direction_end_index]
|
710
|
+
) / 2
|
711
|
+
|
712
|
+
def calculate_bbox_metrics(self, image_shape):
|
713
|
+
x1, y1, x2, y2 = self.bbox
|
714
|
+
image_height, image_width = image_shape
|
715
|
+
width = x2 - x1
|
716
|
+
x_center, y_center = (x1 + x2) / 2, (y1 + y2) / 2
|
717
|
+
self.euclidean_distance = math.sqrt(((x1) ** 2 + (y1) ** 2))
|
718
|
+
self.center_euclidean_distance = math.sqrt(((x_center) ** 2 + (y_center) ** 2))
|
719
|
+
self.angle_rad = math.atan2(y_center, x_center)
|
720
|
+
self.weighted_distance = (
|
721
|
+
y2 + width + (x1 // (image_width // 10)) * (image_width // 10) * 1.5
|
722
|
+
)
|
723
|
+
|
724
|
+
def sort_normal_blocks(self, blocks):
|
725
|
+
if self.direction == "horizontal":
|
726
|
+
blocks.sort(
|
727
|
+
key=lambda x: (
|
728
|
+
x.bbox[1] // self.text_line_height,
|
729
|
+
x.bbox[0] // self.text_line_width,
|
730
|
+
x.bbox[1] ** 2 + x.bbox[0] ** 2,
|
731
|
+
),
|
732
|
+
)
|
733
|
+
else:
|
734
|
+
blocks.sort(
|
735
|
+
key=lambda x: (
|
736
|
+
-x.bbox[0] // self.text_line_width,
|
737
|
+
x.bbox[1] // self.text_line_height,
|
738
|
+
-(x.bbox[2] ** 2 + x.bbox[1] ** 2),
|
739
|
+
),
|
740
|
+
)
|
741
|
+
|
742
|
+
def sort(self):
|
743
|
+
from .xycut_enhanced import xycut_enhanced
|
744
|
+
|
745
|
+
return xycut_enhanced(self)
|