paddlex 3.0.0rc1__py3-none-any.whl → 3.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- paddlex/.version +1 -1
- paddlex/__init__.py +1 -1
- paddlex/configs/modules/chart_parsing/PP-Chart2Table.yaml +13 -0
- paddlex/configs/modules/doc_vlm/PP-DocBee2-3B.yaml +14 -0
- paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-L.yaml +40 -0
- paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-M.yaml +40 -0
- paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-S.yaml +40 -0
- paddlex/configs/modules/layout_detection/PP-DocBlockLayout.yaml +40 -0
- paddlex/configs/modules/layout_detection/PP-DocLayout-L.yaml +2 -2
- paddlex/configs/modules/layout_detection/PP-DocLayout-M.yaml +2 -2
- paddlex/configs/modules/layout_detection/PP-DocLayout-S.yaml +2 -2
- paddlex/configs/modules/layout_detection/PP-DocLayout_plus-L.yaml +40 -0
- paddlex/configs/modules/text_detection/PP-OCRv5_mobile_det.yaml +40 -0
- paddlex/configs/modules/text_detection/PP-OCRv5_server_det.yaml +40 -0
- paddlex/configs/modules/text_recognition/PP-OCRv5_mobile_rec.yaml +39 -0
- paddlex/configs/modules/text_recognition/PP-OCRv5_server_rec.yaml +39 -0
- paddlex/configs/modules/textline_orientation/PP-LCNet_x1_0_textline_ori.yaml +41 -0
- paddlex/configs/pipelines/OCR.yaml +7 -6
- paddlex/configs/pipelines/PP-ChatOCRv3-doc.yaml +3 -1
- paddlex/configs/pipelines/PP-ChatOCRv4-doc.yaml +91 -34
- paddlex/configs/pipelines/PP-StructureV3.yaml +72 -72
- paddlex/configs/pipelines/doc_understanding.yaml +1 -1
- paddlex/configs/pipelines/formula_recognition.yaml +2 -2
- paddlex/configs/pipelines/layout_parsing.yaml +3 -2
- paddlex/configs/pipelines/seal_recognition.yaml +1 -0
- paddlex/configs/pipelines/table_recognition.yaml +2 -1
- paddlex/configs/pipelines/table_recognition_v2.yaml +7 -1
- paddlex/hpip_links.html +20 -20
- paddlex/inference/common/batch_sampler/doc_vlm_batch_sampler.py +33 -10
- paddlex/inference/common/batch_sampler/image_batch_sampler.py +34 -25
- paddlex/inference/common/result/mixin.py +19 -12
- paddlex/inference/models/base/predictor/base_predictor.py +2 -8
- paddlex/inference/models/common/static_infer.py +11 -59
- paddlex/inference/models/common/tokenizer/__init__.py +2 -0
- paddlex/inference/models/common/tokenizer/clip_tokenizer.py +1 -1
- paddlex/inference/models/common/tokenizer/gpt_tokenizer.py +2 -2
- paddlex/inference/models/common/tokenizer/qwen2_5_tokenizer.py +112 -0
- paddlex/inference/models/common/tokenizer/qwen2_tokenizer.py +7 -1
- paddlex/inference/models/common/tokenizer/qwen_tokenizer.py +288 -0
- paddlex/inference/models/common/tokenizer/tokenizer_utils.py +13 -13
- paddlex/inference/models/common/tokenizer/tokenizer_utils_base.py +3 -3
- paddlex/inference/models/common/tokenizer/vocab.py +7 -7
- paddlex/inference/models/common/vlm/conversion_utils.py +99 -0
- paddlex/inference/models/common/vlm/fusion_ops.py +205 -0
- paddlex/inference/models/common/vlm/generation/configuration_utils.py +1 -1
- paddlex/inference/models/common/vlm/generation/logits_process.py +1 -1
- paddlex/inference/models/common/vlm/generation/utils.py +1 -1
- paddlex/inference/models/common/vlm/transformers/configuration_utils.py +3 -3
- paddlex/inference/models/common/vlm/transformers/conversion_utils.py +3 -3
- paddlex/inference/models/common/vlm/transformers/model_outputs.py +2 -2
- paddlex/inference/models/common/vlm/transformers/model_utils.py +7 -31
- paddlex/inference/models/doc_vlm/modeling/GOT_ocr_2_0.py +830 -0
- paddlex/inference/models/doc_vlm/modeling/__init__.py +2 -0
- paddlex/inference/models/doc_vlm/modeling/qwen2.py +1606 -0
- paddlex/inference/models/doc_vlm/modeling/qwen2_5_vl.py +3006 -0
- paddlex/inference/models/doc_vlm/modeling/qwen2_vl.py +0 -105
- paddlex/inference/models/doc_vlm/predictor.py +79 -24
- paddlex/inference/models/doc_vlm/processors/GOT_ocr_2_0.py +97 -0
- paddlex/inference/models/doc_vlm/processors/__init__.py +2 -0
- paddlex/inference/models/doc_vlm/processors/common.py +189 -0
- paddlex/inference/models/doc_vlm/processors/qwen2_5_vl.py +548 -0
- paddlex/inference/models/doc_vlm/processors/qwen2_vl.py +21 -176
- paddlex/inference/models/formula_recognition/predictor.py +7 -1
- paddlex/inference/models/formula_recognition/processors.py +92 -79
- paddlex/inference/models/formula_recognition/result.py +28 -27
- paddlex/inference/models/image_feature/processors.py +3 -4
- paddlex/inference/models/keypoint_detection/predictor.py +3 -0
- paddlex/inference/models/object_detection/predictor.py +2 -0
- paddlex/inference/models/object_detection/processors.py +28 -3
- paddlex/inference/models/object_detection/utils.py +2 -0
- paddlex/inference/models/table_structure_recognition/result.py +0 -10
- paddlex/inference/models/text_detection/predictor.py +8 -0
- paddlex/inference/models/text_detection/processors.py +44 -10
- paddlex/inference/models/text_detection/result.py +0 -10
- paddlex/inference/pipelines/__init__.py +9 -5
- paddlex/inference/pipelines/_parallel.py +172 -0
- paddlex/inference/pipelines/anomaly_detection/pipeline.py +16 -6
- paddlex/inference/pipelines/attribute_recognition/pipeline.py +11 -1
- paddlex/inference/pipelines/base.py +14 -4
- paddlex/inference/pipelines/components/faisser.py +1 -1
- paddlex/inference/pipelines/doc_preprocessor/pipeline.py +53 -27
- paddlex/inference/pipelines/formula_recognition/pipeline.py +120 -82
- paddlex/inference/pipelines/formula_recognition/result.py +1 -11
- paddlex/inference/pipelines/image_classification/pipeline.py +16 -6
- paddlex/inference/pipelines/image_multilabel_classification/pipeline.py +16 -6
- paddlex/inference/pipelines/instance_segmentation/pipeline.py +16 -6
- paddlex/inference/pipelines/keypoint_detection/pipeline.py +16 -6
- paddlex/inference/pipelines/layout_parsing/pipeline.py +34 -47
- paddlex/inference/pipelines/layout_parsing/pipeline_v2.py +893 -260
- paddlex/inference/pipelines/layout_parsing/result.py +4 -17
- paddlex/inference/pipelines/layout_parsing/result_v2.py +523 -245
- paddlex/inference/pipelines/layout_parsing/setting.py +87 -0
- paddlex/inference/pipelines/layout_parsing/utils.py +565 -1998
- paddlex/inference/pipelines/layout_parsing/xycut_enhanced/__init__.py +16 -0
- paddlex/inference/pipelines/layout_parsing/xycut_enhanced/utils.py +1144 -0
- paddlex/inference/pipelines/layout_parsing/xycut_enhanced/xycuts.py +563 -0
- paddlex/inference/pipelines/m_3d_bev_detection/pipeline.py +2 -2
- paddlex/inference/pipelines/multilingual_speech_recognition/pipeline.py +2 -2
- paddlex/inference/pipelines/object_detection/pipeline.py +16 -6
- paddlex/inference/pipelines/ocr/pipeline.py +127 -70
- paddlex/inference/pipelines/ocr/result.py +19 -16
- paddlex/inference/pipelines/open_vocabulary_detection/pipeline.py +2 -2
- paddlex/inference/pipelines/open_vocabulary_segmentation/pipeline.py +2 -2
- paddlex/inference/pipelines/pp_chatocr/pipeline_base.py +2 -2
- paddlex/inference/pipelines/pp_chatocr/pipeline_v3.py +2 -5
- paddlex/inference/pipelines/pp_chatocr/pipeline_v4.py +5 -5
- paddlex/inference/pipelines/rotated_object_detection/pipeline.py +16 -6
- paddlex/inference/pipelines/seal_recognition/pipeline.py +109 -53
- paddlex/inference/pipelines/semantic_segmentation/pipeline.py +16 -6
- paddlex/inference/pipelines/small_object_detection/pipeline.py +16 -6
- paddlex/inference/pipelines/table_recognition/pipeline.py +26 -18
- paddlex/inference/pipelines/table_recognition/pipeline_v2.py +624 -53
- paddlex/inference/pipelines/table_recognition/result.py +1 -1
- paddlex/inference/pipelines/table_recognition/table_recognition_post_processing_v2.py +9 -5
- paddlex/inference/pipelines/ts_anomaly_detection/pipeline.py +2 -2
- paddlex/inference/pipelines/ts_classification/pipeline.py +2 -2
- paddlex/inference/pipelines/ts_forecasting/pipeline.py +2 -2
- paddlex/inference/pipelines/video_classification/pipeline.py +2 -2
- paddlex/inference/pipelines/video_detection/pipeline.py +2 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/_common/common.py +5 -1
- paddlex/inference/serving/basic_serving/_pipeline_apps/layout_parsing.py +0 -1
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv3_doc.py +0 -1
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv4_doc.py +1 -1
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_structurev3.py +6 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition.py +1 -5
- paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition_v2.py +4 -5
- paddlex/inference/serving/infra/utils.py +20 -22
- paddlex/inference/serving/schemas/formula_recognition.py +1 -1
- paddlex/inference/serving/schemas/layout_parsing.py +1 -2
- paddlex/inference/serving/schemas/pp_chatocrv3_doc.py +1 -2
- paddlex/inference/serving/schemas/pp_chatocrv4_doc.py +2 -2
- paddlex/inference/serving/schemas/pp_structurev3.py +10 -6
- paddlex/inference/serving/schemas/seal_recognition.py +1 -1
- paddlex/inference/serving/schemas/table_recognition.py +2 -6
- paddlex/inference/serving/schemas/table_recognition_v2.py +5 -6
- paddlex/inference/utils/hpi.py +8 -1
- paddlex/inference/utils/hpi_model_info_collection.json +81 -2
- paddlex/inference/utils/io/readers.py +12 -12
- paddlex/inference/utils/mkldnn_blocklist.py +25 -0
- paddlex/inference/utils/official_models.py +14 -0
- paddlex/inference/utils/pp_option.py +29 -8
- paddlex/model.py +2 -2
- paddlex/modules/__init__.py +1 -1
- paddlex/modules/anomaly_detection/evaluator.py +2 -2
- paddlex/modules/base/__init__.py +1 -1
- paddlex/modules/base/evaluator.py +5 -5
- paddlex/modules/base/trainer.py +1 -1
- paddlex/modules/doc_vlm/dataset_checker.py +2 -2
- paddlex/modules/doc_vlm/evaluator.py +2 -2
- paddlex/modules/doc_vlm/exportor.py +2 -2
- paddlex/modules/doc_vlm/model_list.py +1 -1
- paddlex/modules/doc_vlm/trainer.py +2 -2
- paddlex/modules/face_recognition/evaluator.py +2 -2
- paddlex/modules/formula_recognition/evaluator.py +5 -2
- paddlex/modules/formula_recognition/model_list.py +3 -0
- paddlex/modules/formula_recognition/trainer.py +3 -0
- paddlex/modules/general_recognition/evaluator.py +1 -1
- paddlex/modules/image_classification/evaluator.py +2 -2
- paddlex/modules/image_classification/model_list.py +1 -0
- paddlex/modules/instance_segmentation/evaluator.py +1 -1
- paddlex/modules/keypoint_detection/evaluator.py +1 -1
- paddlex/modules/m_3d_bev_detection/evaluator.py +2 -2
- paddlex/modules/multilabel_classification/evaluator.py +2 -2
- paddlex/modules/object_detection/dataset_checker/dataset_src/convert_dataset.py +4 -4
- paddlex/modules/object_detection/evaluator.py +2 -2
- paddlex/modules/object_detection/model_list.py +2 -0
- paddlex/modules/semantic_segmentation/evaluator.py +2 -2
- paddlex/modules/table_recognition/evaluator.py +2 -2
- paddlex/modules/text_detection/evaluator.py +2 -2
- paddlex/modules/text_detection/model_list.py +2 -0
- paddlex/modules/text_recognition/evaluator.py +2 -2
- paddlex/modules/text_recognition/model_list.py +2 -0
- paddlex/modules/ts_anomaly_detection/evaluator.py +2 -2
- paddlex/modules/ts_classification/dataset_checker/dataset_src/split_dataset.py +1 -1
- paddlex/modules/ts_classification/evaluator.py +2 -2
- paddlex/modules/ts_forecast/evaluator.py +2 -2
- paddlex/modules/video_classification/evaluator.py +2 -2
- paddlex/modules/video_detection/evaluator.py +2 -2
- paddlex/ops/__init__.py +2 -2
- paddlex/paddlex_cli.py +19 -13
- paddlex/repo_apis/Paddle3D_api/bev_fusion/model.py +2 -2
- paddlex/repo_apis/PaddleClas_api/cls/config.py +1 -1
- paddlex/repo_apis/PaddleClas_api/cls/model.py +1 -1
- paddlex/repo_apis/PaddleClas_api/cls/register.py +10 -0
- paddlex/repo_apis/PaddleClas_api/cls/runner.py +1 -1
- paddlex/repo_apis/PaddleDetection_api/instance_seg/model.py +1 -1
- paddlex/repo_apis/PaddleDetection_api/instance_seg/runner.py +1 -1
- paddlex/repo_apis/PaddleDetection_api/object_det/config.py +1 -1
- paddlex/repo_apis/PaddleDetection_api/object_det/model.py +1 -1
- paddlex/repo_apis/PaddleDetection_api/object_det/official_categories.py +25 -0
- paddlex/repo_apis/PaddleDetection_api/object_det/register.py +30 -0
- paddlex/repo_apis/PaddleDetection_api/object_det/runner.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/formula_rec/config.py +3 -3
- paddlex/repo_apis/PaddleOCR_api/formula_rec/model.py +5 -9
- paddlex/repo_apis/PaddleOCR_api/formula_rec/register.py +27 -0
- paddlex/repo_apis/PaddleOCR_api/formula_rec/runner.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/table_rec/model.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/table_rec/runner.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/text_det/model.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/text_det/register.py +18 -0
- paddlex/repo_apis/PaddleOCR_api/text_det/runner.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/text_rec/config.py +3 -3
- paddlex/repo_apis/PaddleOCR_api/text_rec/model.py +5 -9
- paddlex/repo_apis/PaddleOCR_api/text_rec/register.py +18 -0
- paddlex/repo_apis/PaddleOCR_api/text_rec/runner.py +1 -1
- paddlex/repo_apis/PaddleSeg_api/seg/model.py +1 -1
- paddlex/repo_apis/PaddleSeg_api/seg/runner.py +1 -1
- paddlex/repo_apis/PaddleTS_api/ts_ad/config.py +3 -3
- paddlex/repo_apis/PaddleTS_api/ts_cls/config.py +2 -2
- paddlex/repo_apis/PaddleTS_api/ts_fc/config.py +4 -4
- paddlex/repo_apis/PaddleVideo_api/video_cls/config.py +1 -1
- paddlex/repo_apis/PaddleVideo_api/video_cls/model.py +1 -1
- paddlex/repo_apis/PaddleVideo_api/video_cls/runner.py +1 -1
- paddlex/repo_apis/PaddleVideo_api/video_det/config.py +1 -1
- paddlex/repo_apis/PaddleVideo_api/video_det/model.py +1 -1
- paddlex/repo_apis/PaddleVideo_api/video_det/runner.py +1 -1
- paddlex/repo_apis/base/config.py +1 -1
- paddlex/repo_manager/core.py +3 -3
- paddlex/repo_manager/meta.py +6 -2
- paddlex/repo_manager/repo.py +17 -16
- paddlex/utils/custom_device_list.py +26 -2
- paddlex/utils/deps.py +1 -1
- paddlex/utils/device.py +15 -8
- paddlex/utils/env.py +4 -0
- paddlex/utils/flags.py +2 -4
- paddlex/utils/fonts/__init__.py +34 -4
- paddlex/utils/misc.py +1 -1
- {paddlex-3.0.0rc1.dist-info → paddlex-3.0.1.dist-info}/METADATA +52 -56
- {paddlex-3.0.0rc1.dist-info → paddlex-3.0.1.dist-info}/RECORD +233 -206
- {paddlex-3.0.0rc1.dist-info → paddlex-3.0.1.dist-info}/WHEEL +1 -1
- {paddlex-3.0.0rc1.dist-info → paddlex-3.0.1.dist-info}/entry_points.txt +0 -0
- {paddlex-3.0.0rc1.dist-info → paddlex-3.0.1.dist-info}/licenses/LICENSE +0 -0
- {paddlex-3.0.0rc1.dist-info → paddlex-3.0.1.dist-info}/top_level.txt +0 -0
@@ -12,15 +12,9 @@
|
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
|
-
import
|
16
|
-
import math
|
17
|
-
from io import BytesIO
|
18
|
-
from typing import Dict, List, Optional, Tuple, Union
|
15
|
+
from typing import Dict, List, Optional, Union
|
19
16
|
|
20
17
|
import numpy as np
|
21
|
-
import paddle
|
22
|
-
import requests
|
23
|
-
from PIL import Image
|
24
18
|
|
25
19
|
from .....utils import logging
|
26
20
|
from ....utils.benchmark import benchmark
|
@@ -33,10 +27,12 @@ from .common import (
|
|
33
27
|
TensorType,
|
34
28
|
TextInput,
|
35
29
|
convert_to_rgb,
|
30
|
+
fetch_image,
|
36
31
|
get_image_size,
|
37
32
|
infer_channel_dimension_format,
|
38
|
-
|
33
|
+
make_batched_images,
|
39
34
|
make_list_of_images,
|
35
|
+
smart_resize,
|
40
36
|
to_channel_dimension_format,
|
41
37
|
to_numpy_array,
|
42
38
|
valid_images,
|
@@ -82,7 +78,7 @@ class Qwen2VLProcessor(object):
|
|
82
78
|
self.image_processor.min_pixels = kwargs.get("min_pixels", 3136)
|
83
79
|
self.image_processor.max_pixels = kwargs.get("max_pixels", 12845056)
|
84
80
|
|
85
|
-
def
|
81
|
+
def preprocess(
|
86
82
|
self,
|
87
83
|
images: ImageInput = None,
|
88
84
|
text: Union[TextInput, List[TextInput]] = None,
|
@@ -182,33 +178,6 @@ class Qwen2VLProcessor(object):
|
|
182
178
|
return self.tokenizer.decode(*args, **kwargs)
|
183
179
|
|
184
180
|
|
185
|
-
def make_batched_images(images) -> List[List[ImageInput]]:
|
186
|
-
"""
|
187
|
-
Accepts images in list or nested list format, and makes a list of images for preprocessing.
|
188
|
-
|
189
|
-
Args:
|
190
|
-
images (`Union[List[List[ImageInput]], List[ImageInput], ImageInput]`):
|
191
|
-
The input image.
|
192
|
-
|
193
|
-
Returns:
|
194
|
-
list: A list of images.
|
195
|
-
"""
|
196
|
-
if (
|
197
|
-
isinstance(images, (list, tuple))
|
198
|
-
and isinstance(images[0], (list, tuple))
|
199
|
-
and is_valid_image(images[0][0])
|
200
|
-
):
|
201
|
-
return [img for img_list in images for img in img_list]
|
202
|
-
|
203
|
-
elif isinstance(images, (list, tuple)) and is_valid_image(images[0]):
|
204
|
-
return images
|
205
|
-
|
206
|
-
elif is_valid_image(images):
|
207
|
-
return [images]
|
208
|
-
|
209
|
-
raise ValueError(f"Could not make batched images from {images}")
|
210
|
-
|
211
|
-
|
212
181
|
class Qwen2VLImageProcessor(object):
|
213
182
|
r"""
|
214
183
|
Constructs a Qwen2-VL image processor that dynamically resizes images based on the original images.
|
@@ -360,6 +329,7 @@ class Qwen2VLImageProcessor(object):
|
|
360
329
|
factor=self.patch_size * self.merge_size,
|
361
330
|
min_pixels=self.min_pixels,
|
362
331
|
max_pixels=self.max_pixels,
|
332
|
+
max_ratio=MAX_RATIO,
|
363
333
|
)
|
364
334
|
image = image.astype("uint8")
|
365
335
|
image = resize(
|
@@ -527,159 +497,34 @@ class Qwen2VLImageProcessor(object):
|
|
527
497
|
return self.preprocess(images, **kwargs)
|
528
498
|
|
529
499
|
|
530
|
-
def round_by_factor(number: int, factor: int) -> int:
|
531
|
-
"""Returns the closest integer to 'number' that is divisible by 'factor'."""
|
532
|
-
return round(number / factor) * factor
|
533
|
-
|
534
|
-
|
535
|
-
def ceil_by_factor(number: int, factor: int) -> int:
|
536
|
-
"""Returns the smallest integer greater than or equal to 'number' that is divisible by 'factor'."""
|
537
|
-
return math.ceil(number / factor) * factor
|
538
|
-
|
539
|
-
|
540
|
-
def floor_by_factor(number: int, factor: int) -> int:
|
541
|
-
"""Returns the largest integer less than or equal to 'number' that is divisible by 'factor'."""
|
542
|
-
return math.floor(number / factor) * factor
|
543
|
-
|
544
|
-
|
545
|
-
def smart_resize(
|
546
|
-
height: int,
|
547
|
-
width: int,
|
548
|
-
factor: int = IMAGE_FACTOR,
|
549
|
-
min_pixels: int = MIN_PIXELS,
|
550
|
-
max_pixels: int = MAX_PIXELS,
|
551
|
-
) -> Tuple[int, int]:
|
552
|
-
"""
|
553
|
-
Rescales the image so that the following conditions are met:
|
554
|
-
|
555
|
-
1. Both dimensions (height and width) are divisible by 'factor'.
|
556
|
-
|
557
|
-
2. The total number of pixels is within the range ['min_pixels', 'max_pixels'].
|
558
|
-
|
559
|
-
3. The aspect ratio of the image is maintained as closely as possible.
|
560
|
-
"""
|
561
|
-
if max(height, width) / min(height, width) > MAX_RATIO:
|
562
|
-
raise ValueError(
|
563
|
-
f"absolute aspect ratio must be smaller than {MAX_RATIO}, got {max(height, width) / min(height, width)}"
|
564
|
-
)
|
565
|
-
h_bar = max(factor, round_by_factor(height, factor))
|
566
|
-
w_bar = max(factor, round_by_factor(width, factor))
|
567
|
-
if h_bar * w_bar > max_pixels:
|
568
|
-
beta = math.sqrt((height * width) / max_pixels)
|
569
|
-
h_bar = floor_by_factor(height / beta, factor)
|
570
|
-
w_bar = floor_by_factor(width / beta, factor)
|
571
|
-
elif h_bar * w_bar < min_pixels:
|
572
|
-
beta = math.sqrt(min_pixels / (height * width))
|
573
|
-
h_bar = ceil_by_factor(height * beta, factor)
|
574
|
-
w_bar = ceil_by_factor(width * beta, factor)
|
575
|
-
return h_bar, w_bar
|
576
|
-
|
577
|
-
|
578
|
-
def fetch_image(
|
579
|
-
ele: Dict[str, Union[str, Image.Image]], size_factor: int = IMAGE_FACTOR
|
580
|
-
) -> Image.Image:
|
581
|
-
if not isinstance(ele, dict):
|
582
|
-
ele = {"image": ele}
|
583
|
-
if "image" in ele:
|
584
|
-
image = ele["image"]
|
585
|
-
else:
|
586
|
-
image = ele["image_url"]
|
587
|
-
image_obj = None
|
588
|
-
if isinstance(image, Image.Image):
|
589
|
-
image_obj = image
|
590
|
-
elif isinstance(image, np.ndarray):
|
591
|
-
image_obj = Image.fromarray(image)
|
592
|
-
elif image.startswith("http://") or image.startswith("https://"):
|
593
|
-
image_obj = Image.open(requests.get(image, stream=True).raw)
|
594
|
-
elif image.startswith("file://"):
|
595
|
-
image_obj = Image.open(image[7:])
|
596
|
-
elif image.startswith("data:image"):
|
597
|
-
data = image.split(";", 1)[1]
|
598
|
-
if data.startswith("base64,"):
|
599
|
-
data = base64.b64decode(data[7:])
|
600
|
-
image_obj = Image.open(BytesIO(data))
|
601
|
-
else:
|
602
|
-
image_obj = Image.open(image)
|
603
|
-
if image_obj is None:
|
604
|
-
raise ValueError(
|
605
|
-
f"Unrecognized image input, support local path, http url, base64 and PIL.Image, got {image}"
|
606
|
-
)
|
607
|
-
image = image_obj.convert("RGB")
|
608
|
-
# resize
|
609
|
-
if "resized_height" in ele and "resized_width" in ele:
|
610
|
-
resized_height, resized_width = smart_resize(
|
611
|
-
ele["resized_height"],
|
612
|
-
ele["resized_width"],
|
613
|
-
factor=size_factor,
|
614
|
-
)
|
615
|
-
else:
|
616
|
-
width, height = image.size # Image, not tensor
|
617
|
-
min_pixels = ele.get("min_pixels", MIN_PIXELS)
|
618
|
-
max_pixels = ele.get("max_pixels", MAX_PIXELS)
|
619
|
-
resized_height, resized_width = smart_resize(
|
620
|
-
height,
|
621
|
-
width,
|
622
|
-
factor=size_factor,
|
623
|
-
min_pixels=min_pixels,
|
624
|
-
max_pixels=max_pixels,
|
625
|
-
)
|
626
|
-
image = image.resize((resized_width, resized_height))
|
627
|
-
|
628
|
-
return image
|
629
|
-
|
630
|
-
|
631
|
-
def extract_vision_info(
|
632
|
-
conversations: Union[List[dict], List[List[dict]]]
|
633
|
-
) -> List[dict]:
|
634
|
-
vision_infos = []
|
635
|
-
if isinstance(conversations[0], dict):
|
636
|
-
conversations = [conversations]
|
637
|
-
for conversation in conversations:
|
638
|
-
for message in conversation:
|
639
|
-
if isinstance(message["content"], list):
|
640
|
-
for ele in message["content"]:
|
641
|
-
if (
|
642
|
-
"image" in ele
|
643
|
-
or "image_url" in ele
|
644
|
-
or ele["type"] in ("image", "image_url")
|
645
|
-
):
|
646
|
-
vision_infos.append(ele)
|
647
|
-
return vision_infos
|
648
|
-
|
649
|
-
|
650
|
-
def process_vision_info(
|
651
|
-
conversations: Union[List[dict], List[List[dict]]],
|
652
|
-
) -> Tuple[
|
653
|
-
Union[List[Image.Image], None, List[Union[paddle.Tensor, List[Image.Image]]], None]
|
654
|
-
]:
|
655
|
-
vision_infos = extract_vision_info(conversations)
|
656
|
-
image_inputs = []
|
657
|
-
for vision_info in vision_infos:
|
658
|
-
if "image" in vision_info or "image_url" in vision_info:
|
659
|
-
image_inputs.append(fetch_image(vision_info))
|
660
|
-
else:
|
661
|
-
raise ValueError("image, image_url should in content.")
|
662
|
-
if len(image_inputs) == 0:
|
663
|
-
image_inputs = None
|
664
|
-
return image_inputs
|
665
|
-
|
666
|
-
|
667
500
|
class PPDocBeeProcessor(Qwen2VLProcessor):
|
668
501
|
"""
|
669
502
|
PP-DocBee processor, based on Qwen2VLProcessor
|
670
503
|
"""
|
671
504
|
|
672
505
|
@benchmark.timeit
|
673
|
-
def preprocess(self,
|
506
|
+
def preprocess(self, input_dicts):
|
674
507
|
"""
|
675
508
|
PreProcess for PP-DocBee Series
|
676
509
|
"""
|
677
|
-
|
510
|
+
assert (
|
511
|
+
isinstance(input_dicts, list) and len(input_dicts) == 1
|
512
|
+
), f"PP-DocBee series only supports batchsize of one, but received {len(input_dicts)} samples."
|
513
|
+
input_dict = input_dicts[0]
|
514
|
+
image = input_dict["image"]
|
515
|
+
query = input_dict["query"]
|
516
|
+
image_inputs = fetch_image(
|
517
|
+
image,
|
518
|
+
size_factor=IMAGE_FACTOR,
|
519
|
+
min_pixels=MIN_PIXELS,
|
520
|
+
max_pixels=MAX_PIXELS,
|
521
|
+
max_ratio=MAX_RATIO,
|
522
|
+
)
|
678
523
|
image_pad_token = "<|vision_start|><|image_pad|><|vision_end|>"
|
679
524
|
text = f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{image_pad_token}{query}<|im_end|>\n<|im_start|>assistant\n"
|
680
525
|
text = [text]
|
681
526
|
|
682
|
-
rst_inputs =
|
527
|
+
rst_inputs = super().preprocess(
|
683
528
|
text=text,
|
684
529
|
images=[image_inputs],
|
685
530
|
padding=False,
|
@@ -97,7 +97,13 @@ class FormulaRecPredictor(BasePredictor):
|
|
97
97
|
batch_imgs = self.pre_tfs["UniMERNetImgDecode"](imgs=batch_raw_imgs)
|
98
98
|
batch_imgs = self.pre_tfs["UniMERNetTestTransform"](imgs=batch_imgs)
|
99
99
|
batch_imgs = self.pre_tfs["UniMERNetImageFormat"](imgs=batch_imgs)
|
100
|
-
elif self.model_name in (
|
100
|
+
elif self.model_name in (
|
101
|
+
"PP-FormulaNet-S",
|
102
|
+
"PP-FormulaNet-L",
|
103
|
+
"PP-FormulaNet_plus-S",
|
104
|
+
"PP-FormulaNet_plus-M",
|
105
|
+
"PP-FormulaNet_plus-L",
|
106
|
+
):
|
101
107
|
batch_imgs = self.pre_tfs["UniMERNetImgDecode"](imgs=batch_raw_imgs)
|
102
108
|
batch_imgs = self.pre_tfs["UniMERNetTestTransform"](imgs=batch_imgs)
|
103
109
|
batch_imgs = self.pre_tfs["LatexImageFormat"](imgs=batch_imgs)
|
@@ -15,9 +15,7 @@
|
|
15
15
|
|
16
16
|
import json
|
17
17
|
import math
|
18
|
-
import os
|
19
18
|
import re
|
20
|
-
import tempfile
|
21
19
|
from typing import Any, Dict, List, Optional, Tuple, Union
|
22
20
|
|
23
21
|
import numpy as np
|
@@ -325,14 +323,9 @@ class LaTeXOCRDecode(object):
|
|
325
323
|
**kwargs: Additional keyword arguments for initialization.
|
326
324
|
"""
|
327
325
|
super(LaTeXOCRDecode, self).__init__()
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
with open(rec_char_dict_path, "w") as f:
|
332
|
-
json.dump(character_list, f)
|
333
|
-
except Exception as e:
|
334
|
-
print(f"创建 latexocr_tokenizer.json 文件失败, 原因{str(e)}")
|
335
|
-
self.tokenizer = TokenizerFast.from_file(rec_char_dict_path)
|
326
|
+
fast_tokenizer_str = json.dumps(character_list)
|
327
|
+
fast_tokenizer_buffer = fast_tokenizer_str.encode("utf-8")
|
328
|
+
self.tokenizer = TokenizerFast.from_buffer(fast_tokenizer_buffer)
|
336
329
|
|
337
330
|
def post_process(self, s: str) -> str:
|
338
331
|
"""Post-processes the decoded LaTeX string.
|
@@ -372,7 +365,7 @@ class LaTeXOCRDecode(object):
|
|
372
365
|
dec = [self.tokenizer.decode(tok) for tok in tokens]
|
373
366
|
dec_str_list = [
|
374
367
|
"".join(detok.split(" "))
|
375
|
-
.replace("
|
368
|
+
.replace("臓", " ")
|
376
369
|
.replace("[EOS]", "")
|
377
370
|
.replace("[BOS]", "")
|
378
371
|
.replace("[PAD]", "")
|
@@ -631,74 +624,65 @@ class UniMERNetDecode(object):
|
|
631
624
|
self.pad_token_type_id = 0
|
632
625
|
self.pad_to_multiple_of = None
|
633
626
|
|
634
|
-
|
635
|
-
|
636
|
-
|
637
|
-
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
|
642
|
-
except Exception as e:
|
643
|
-
print(
|
644
|
-
f"创建 tokenizer.json 和 tokenizer_config.json 文件失败, 原因{str(e)}"
|
645
|
-
)
|
646
|
-
|
647
|
-
self.tokenizer = TokenizerFast.from_file(fast_tokenizer_file)
|
627
|
+
fast_tokenizer_str = json.dumps(character_list["fast_tokenizer_file"])
|
628
|
+
fast_tokenizer_buffer = fast_tokenizer_str.encode("utf-8")
|
629
|
+
self.tokenizer = TokenizerFast.from_buffer(fast_tokenizer_buffer)
|
630
|
+
tokenizer_config = (
|
631
|
+
character_list["tokenizer_config_file"]
|
632
|
+
if "tokenizer_config_file" in character_list
|
633
|
+
else None
|
634
|
+
)
|
648
635
|
added_tokens_decoder = {}
|
649
636
|
added_tokens_map = {}
|
650
|
-
if
|
651
|
-
|
652
|
-
|
653
|
-
|
654
|
-
|
655
|
-
|
656
|
-
|
657
|
-
|
658
|
-
|
659
|
-
|
660
|
-
|
661
|
-
|
662
|
-
else:
|
663
|
-
raise ValueError(
|
664
|
-
f"Found a {token.__class__} in the saved `added_tokens_decoder`, should be a dictionary or an AddedToken instance"
|
665
|
-
)
|
666
|
-
init_kwargs["added_tokens_decoder"] = added_tokens_decoder
|
667
|
-
added_tokens_decoder = init_kwargs.pop("added_tokens_decoder", {})
|
668
|
-
tokens_to_add = [
|
669
|
-
token
|
670
|
-
for index, token in sorted(
|
671
|
-
added_tokens_decoder.items(), key=lambda x: x[0]
|
672
|
-
)
|
673
|
-
if token not in added_tokens_decoder
|
674
|
-
]
|
675
|
-
added_tokens_encoder = self.added_tokens_encoder(added_tokens_decoder)
|
676
|
-
encoder = list(added_tokens_encoder.keys()) + [
|
677
|
-
str(token) for token in tokens_to_add
|
678
|
-
]
|
679
|
-
tokens_to_add += [
|
680
|
-
token
|
681
|
-
for token in self.all_special_tokens_extended
|
682
|
-
if token not in encoder and token not in tokens_to_add
|
683
|
-
]
|
684
|
-
if len(tokens_to_add) > 0:
|
685
|
-
is_last_special = None
|
686
|
-
tokens = []
|
687
|
-
special_tokens = self.all_special_tokens
|
688
|
-
for token in tokens_to_add:
|
689
|
-
is_special = (
|
690
|
-
(token.special or str(token) in special_tokens)
|
691
|
-
if isinstance(token, AddedToken)
|
692
|
-
else str(token) in special_tokens
|
637
|
+
if tokenizer_config is not None:
|
638
|
+
init_kwargs = tokenizer_config
|
639
|
+
if "added_tokens_decoder" in init_kwargs:
|
640
|
+
for idx, token in init_kwargs["added_tokens_decoder"].items():
|
641
|
+
if isinstance(token, dict):
|
642
|
+
token = AddedToken(**token)
|
643
|
+
if isinstance(token, AddedToken):
|
644
|
+
added_tokens_decoder[int(idx)] = token
|
645
|
+
added_tokens_map[str(token)] = token
|
646
|
+
else:
|
647
|
+
raise ValueError(
|
648
|
+
f"Found a {token.__class__} in the saved `added_tokens_decoder`, should be a dictionary or an AddedToken instance"
|
693
649
|
)
|
694
|
-
|
695
|
-
|
696
|
-
|
697
|
-
|
698
|
-
|
699
|
-
|
700
|
-
|
650
|
+
init_kwargs["added_tokens_decoder"] = added_tokens_decoder
|
651
|
+
added_tokens_decoder = init_kwargs.pop("added_tokens_decoder", {})
|
652
|
+
tokens_to_add = [
|
653
|
+
token
|
654
|
+
for index, token in sorted(
|
655
|
+
added_tokens_decoder.items(), key=lambda x: x[0]
|
656
|
+
)
|
657
|
+
if token not in added_tokens_decoder
|
658
|
+
]
|
659
|
+
added_tokens_encoder = self.added_tokens_encoder(added_tokens_decoder)
|
660
|
+
encoder = list(added_tokens_encoder.keys()) + [
|
661
|
+
str(token) for token in tokens_to_add
|
662
|
+
]
|
663
|
+
tokens_to_add += [
|
664
|
+
token
|
665
|
+
for token in self.all_special_tokens_extended
|
666
|
+
if token not in encoder and token not in tokens_to_add
|
667
|
+
]
|
668
|
+
if len(tokens_to_add) > 0:
|
669
|
+
is_last_special = None
|
670
|
+
tokens = []
|
671
|
+
special_tokens = self.all_special_tokens
|
672
|
+
for token in tokens_to_add:
|
673
|
+
is_special = (
|
674
|
+
(token.special or str(token) in special_tokens)
|
675
|
+
if isinstance(token, AddedToken)
|
676
|
+
else str(token) in special_tokens
|
677
|
+
)
|
678
|
+
if is_last_special is None or is_last_special == is_special:
|
679
|
+
tokens.append(token)
|
680
|
+
else:
|
701
681
|
self._add_tokens(tokens, special_tokens=is_last_special)
|
682
|
+
tokens = [token]
|
683
|
+
is_last_special = is_special
|
684
|
+
if tokens:
|
685
|
+
self._add_tokens(tokens, special_tokens=is_last_special)
|
702
686
|
|
703
687
|
def _add_tokens(
|
704
688
|
self, new_tokens: "List[Union[AddedToken, str]]", special_tokens: bool = False
|
@@ -814,7 +798,7 @@ class UniMERNetDecode(object):
|
|
814
798
|
for i in reversed(range(len(toks[b]))):
|
815
799
|
if toks[b][i] is None:
|
816
800
|
toks[b][i] = ""
|
817
|
-
toks[b][i] = toks[b][i].replace("
|
801
|
+
toks[b][i] = toks[b][i].replace("臓", " ").strip()
|
818
802
|
if toks[b][i] in (
|
819
803
|
[
|
820
804
|
self.tokenizer.bos_token,
|
@@ -858,8 +842,27 @@ class UniMERNetDecode(object):
|
|
858
842
|
text_reg = r"(\\(operatorname|mathrm|text|mathbf)\s?\*? {.*?})"
|
859
843
|
letter = "[a-zA-Z]"
|
860
844
|
noletter = "[\W_^\d]"
|
861
|
-
names = [
|
862
|
-
|
845
|
+
names = []
|
846
|
+
for x in re.findall(text_reg, s):
|
847
|
+
pattern = r"\\[a-zA-Z]+"
|
848
|
+
pattern = r"(\\[a-zA-Z]+)\s(?=\w)|\\[a-zA-Z]+\s(?=})"
|
849
|
+
matches = re.findall(pattern, x[0])
|
850
|
+
for m in matches:
|
851
|
+
if (
|
852
|
+
m
|
853
|
+
not in [
|
854
|
+
"\\operatorname",
|
855
|
+
"\\mathrm",
|
856
|
+
"\\text",
|
857
|
+
"\\mathbf",
|
858
|
+
]
|
859
|
+
and m.strip() != ""
|
860
|
+
):
|
861
|
+
s = s.replace(m, m + "XXXXXXX")
|
862
|
+
s = s.replace(" ", "")
|
863
|
+
names.append(s)
|
864
|
+
if len(names) > 0:
|
865
|
+
s = re.sub(text_reg, lambda match: str(names.pop(0)), s)
|
863
866
|
news = s
|
864
867
|
while True:
|
865
868
|
s = news
|
@@ -868,7 +871,16 @@ class UniMERNetDecode(object):
|
|
868
871
|
news = re.sub(r"(%s)\s+?(%s)" % (letter, noletter), r"\1\2", news)
|
869
872
|
if news == s:
|
870
873
|
break
|
871
|
-
return s
|
874
|
+
return s.replace("XXXXXXX", " ")
|
875
|
+
|
876
|
+
def remove_chinese_text_wrapping(self, formula):
|
877
|
+
pattern = re.compile(r"\\text\s*{\s*([^}]*?[\u4e00-\u9fff]+[^}]*?)\s*}")
|
878
|
+
|
879
|
+
def replacer(match):
|
880
|
+
return match.group(1)
|
881
|
+
|
882
|
+
replaced_formula = pattern.sub(replacer, formula)
|
883
|
+
return replaced_formula.replace('"', "")
|
872
884
|
|
873
885
|
def post_process(self, text: str) -> str:
|
874
886
|
"""Post-processes a string by fixing text and normalizing it.
|
@@ -881,6 +893,7 @@ class UniMERNetDecode(object):
|
|
881
893
|
"""
|
882
894
|
from ftfy import fix_text
|
883
895
|
|
896
|
+
text = self.remove_chinese_text_wrapping(text)
|
884
897
|
text = fix_text(text)
|
885
898
|
text = self.normalize(text)
|
886
899
|
return text
|
@@ -15,9 +15,9 @@
|
|
15
15
|
import copy
|
16
16
|
import math
|
17
17
|
import os
|
18
|
+
import re
|
18
19
|
import subprocess
|
19
20
|
import tempfile
|
20
|
-
from pathlib import Path
|
21
21
|
from typing import List, Optional
|
22
22
|
|
23
23
|
import numpy as np
|
@@ -32,19 +32,11 @@ from ...common.result import BaseCVResult, JsonMixin
|
|
32
32
|
|
33
33
|
if is_dep_available("opencv-contrib-python"):
|
34
34
|
import cv2
|
35
|
-
if is_dep_available("
|
36
|
-
import
|
35
|
+
if is_dep_available("pypdfium2"):
|
36
|
+
import pypdfium2 as pdfium
|
37
37
|
|
38
38
|
|
39
39
|
class FormulaRecResult(BaseCVResult):
|
40
|
-
def _get_input_fn(self):
|
41
|
-
fn = super()._get_input_fn()
|
42
|
-
if (page_idx := self["page_index"]) is not None:
|
43
|
-
fp = Path(fn)
|
44
|
-
stem, suffix = fp.stem, fp.suffix
|
45
|
-
return f"{stem}_{page_idx}{suffix}"
|
46
|
-
else:
|
47
|
-
return fn
|
48
40
|
|
49
41
|
def _to_str(self, *args, **kwargs):
|
50
42
|
data = copy.deepcopy(self)
|
@@ -126,6 +118,7 @@ def get_align_equation(equation: str) -> str:
|
|
126
118
|
"""
|
127
119
|
is_align = False
|
128
120
|
equation = str(equation) + "\n"
|
121
|
+
|
129
122
|
begin_dict = [
|
130
123
|
r"begin{align}",
|
131
124
|
r"begin{align*}",
|
@@ -147,6 +140,17 @@ def get_align_equation(equation: str) -> str:
|
|
147
140
|
return equation
|
148
141
|
|
149
142
|
|
143
|
+
def add_text_for_zh_formula(formula: str) -> str:
|
144
|
+
pattern = re.compile(r"([^\x00-\x7F]+)")
|
145
|
+
|
146
|
+
def replacer(match):
|
147
|
+
return f"\\text{{{match.group(1)}}}"
|
148
|
+
|
149
|
+
replaced_formula = pattern.sub(replacer, formula)
|
150
|
+
|
151
|
+
return replaced_formula
|
152
|
+
|
153
|
+
|
150
154
|
def generate_tex_file(tex_file_path: str, equation: str) -> None:
|
151
155
|
"""
|
152
156
|
Generates a LaTeX file containing a specific equation.
|
@@ -161,17 +165,19 @@ def generate_tex_file(tex_file_path: str, equation: str) -> None:
|
|
161
165
|
"""
|
162
166
|
with custom_open(tex_file_path, "w") as fp:
|
163
167
|
start_template = (
|
164
|
-
r"\documentclass{
|
168
|
+
r"\documentclass[varwidth]{standalone}" + "\n"
|
165
169
|
r"\usepackage{cite}" + "\n"
|
166
170
|
r"\usepackage{amsmath,amssymb,amsfonts,upgreek}" + "\n"
|
167
171
|
r"\usepackage{graphicx}" + "\n"
|
168
172
|
r"\usepackage{textcomp}" + "\n"
|
173
|
+
r"\usepackage{xeCJK}" + "\n"
|
169
174
|
r"\DeclareMathSizes{14}{14}{9.8}{7}" + "\n"
|
170
175
|
r"\pagestyle{empty}" + "\n"
|
171
176
|
r"\begin{document}" + "\n"
|
172
177
|
r"\begin{large}" + "\n"
|
173
178
|
)
|
174
179
|
fp.write(start_template)
|
180
|
+
equation = add_text_for_zh_formula(equation)
|
175
181
|
equation = get_align_equation(equation)
|
176
182
|
fp.write(equation)
|
177
183
|
end_template = r"\end{large}" + "\n" r"\end{document}" + "\n"
|
@@ -197,7 +203,7 @@ def generate_pdf_file(
|
|
197
203
|
and None if an error occurred during the pdflatex execution.
|
198
204
|
"""
|
199
205
|
if os.path.exists(tex_path):
|
200
|
-
command = "
|
206
|
+
command = "xelatex -interaction=nonstopmode -halt-on-error -output-directory={} {}".format(
|
201
207
|
pdf_dir, tex_path
|
202
208
|
)
|
203
209
|
if is_debug:
|
@@ -236,7 +242,7 @@ def crop_white_area(image: np.ndarray) -> Optional[List[int]]:
|
|
236
242
|
return None
|
237
243
|
|
238
244
|
|
239
|
-
@function_requires_deps("
|
245
|
+
@function_requires_deps("pypdfium2", "opencv-contrib-python")
|
240
246
|
def pdf2img(pdf_path: str, img_path: str, is_padding: bool = False):
|
241
247
|
"""
|
242
248
|
Converts a single-page PDF to an image, optionally cropping white areas and adding padding.
|
@@ -249,21 +255,16 @@ def pdf2img(pdf_path: str, img_path: str, is_padding: bool = False):
|
|
249
255
|
Returns:
|
250
256
|
np.ndarray: The resulting image as a NumPy array, or None if the PDF is not single-page.
|
251
257
|
"""
|
252
|
-
|
253
|
-
pdfDoc
|
254
|
-
if pdfDoc.page_count != 1:
|
258
|
+
pdfDoc = pdfium.PdfDocument(pdf_path)
|
259
|
+
if len(pdfDoc) != 1:
|
255
260
|
return None
|
256
|
-
for
|
257
|
-
page = pdfDoc[pg]
|
261
|
+
for page in pdfDoc:
|
258
262
|
rotate = int(0)
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
# decode as np.uint8
|
265
|
-
image_array = np.frombuffer(getpngdata, dtype=np.uint8)
|
266
|
-
img = cv2.imdecode(image_array, cv2.IMREAD_ANYCOLOR)
|
263
|
+
zoom = 2
|
264
|
+
img = page.render(scale=zoom, rotation=rotate).to_pil()
|
265
|
+
img = img.convert("RGB")
|
266
|
+
img = np.array(img)
|
267
|
+
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
|
267
268
|
xywh = crop_white_area(img)
|
268
269
|
|
269
270
|
if xywh is not None:
|
@@ -23,10 +23,9 @@ class NormalizeFeatures:
|
|
23
23
|
|
24
24
|
def _normalize(self, preds):
|
25
25
|
"""normalize"""
|
26
|
-
feas_norm = np.sqrt(np.sum(np.square(preds
|
27
|
-
features = np.divide(preds
|
26
|
+
feas_norm = np.sqrt(np.sum(np.square(preds), axis=1, keepdims=True))
|
27
|
+
features = np.divide(preds, feas_norm)
|
28
28
|
return features
|
29
29
|
|
30
30
|
def __call__(self, preds):
|
31
|
-
|
32
|
-
return normalized_features
|
31
|
+
return self._normalize(preds[0])
|
@@ -316,6 +316,8 @@ class DetPredictor(BasePredictor):
|
|
316
316
|
"BlazeFace",
|
317
317
|
"BlazeFace-FPN-SSH",
|
318
318
|
"PP-DocLayout-L",
|
319
|
+
"PP-DocLayout_plus-L",
|
320
|
+
"PP-DocBlockLayout",
|
319
321
|
]
|
320
322
|
if any(name in self.model_name for name in models_required_imgsize):
|
321
323
|
ordered_required_keys = (
|