paddlex 3.0.0rc0__py3-none-any.whl → 3.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- paddlex/.version +1 -1
- paddlex/__init__.py +17 -34
- paddlex/__main__.py +1 -1
- paddlex/configs/modules/chart_parsing/PP-Chart2Table.yaml +13 -0
- paddlex/configs/modules/doc_vlm/PP-DocBee-2B.yaml +14 -0
- paddlex/configs/modules/doc_vlm/PP-DocBee-7B.yaml +14 -0
- paddlex/configs/modules/doc_vlm/PP-DocBee2-3B.yaml +14 -0
- paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-L.yaml +40 -0
- paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-M.yaml +40 -0
- paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-S.yaml +40 -0
- paddlex/configs/modules/layout_detection/PP-DocBlockLayout.yaml +40 -0
- paddlex/configs/modules/layout_detection/PP-DocLayout-L.yaml +2 -2
- paddlex/configs/modules/layout_detection/PP-DocLayout-M.yaml +2 -2
- paddlex/configs/modules/layout_detection/PP-DocLayout-S.yaml +2 -2
- paddlex/configs/modules/layout_detection/PP-DocLayout_plus-L.yaml +40 -0
- paddlex/configs/modules/open_vocabulary_detection/YOLO-Worldv2-L.yaml +13 -0
- paddlex/configs/modules/text_detection/PP-OCRv5_mobile_det.yaml +40 -0
- paddlex/configs/modules/text_detection/PP-OCRv5_server_det.yaml +40 -0
- paddlex/configs/modules/text_recognition/PP-OCRv5_mobile_rec.yaml +39 -0
- paddlex/configs/modules/text_recognition/PP-OCRv5_server_rec.yaml +39 -0
- paddlex/configs/modules/textline_orientation/PP-LCNet_x1_0_textline_ori.yaml +41 -0
- paddlex/configs/pipelines/OCR.yaml +7 -6
- paddlex/configs/pipelines/PP-ChatOCRv3-doc.yaml +3 -1
- paddlex/configs/pipelines/PP-ChatOCRv4-doc.yaml +91 -34
- paddlex/configs/pipelines/PP-StructureV3.yaml +72 -72
- paddlex/configs/pipelines/anomaly_detection.yaml +1 -1
- paddlex/configs/pipelines/doc_understanding.yaml +9 -0
- paddlex/configs/pipelines/formula_recognition.yaml +2 -2
- paddlex/configs/pipelines/layout_parsing.yaml +3 -2
- paddlex/configs/pipelines/seal_recognition.yaml +1 -0
- paddlex/configs/pipelines/table_recognition.yaml +2 -1
- paddlex/configs/pipelines/table_recognition_v2.yaml +7 -1
- paddlex/configs/pipelines/ts_anomaly_detection.yaml +1 -1
- paddlex/configs/pipelines/ts_classification.yaml +1 -1
- paddlex/configs/pipelines/ts_forecast.yaml +1 -1
- paddlex/constants.py +17 -0
- paddlex/engine.py +7 -5
- paddlex/hpip_links.html +23 -11
- paddlex/inference/__init__.py +3 -3
- paddlex/inference/common/__init__.py +1 -1
- paddlex/inference/common/batch_sampler/__init__.py +5 -4
- paddlex/inference/common/batch_sampler/audio_batch_sampler.py +5 -6
- paddlex/inference/common/batch_sampler/base_batch_sampler.py +20 -16
- paddlex/inference/common/batch_sampler/det_3d_batch_sampler.py +4 -7
- paddlex/inference/common/batch_sampler/doc_vlm_batch_sampler.py +87 -0
- paddlex/inference/common/batch_sampler/image_batch_sampler.py +45 -60
- paddlex/inference/common/batch_sampler/ts_batch_sampler.py +9 -10
- paddlex/inference/common/batch_sampler/video_batch_sampler.py +2 -22
- paddlex/inference/common/reader/__init__.py +4 -4
- paddlex/inference/common/reader/audio_reader.py +3 -3
- paddlex/inference/common/reader/det_3d_reader.py +7 -5
- paddlex/inference/common/reader/image_reader.py +16 -12
- paddlex/inference/common/reader/ts_reader.py +3 -2
- paddlex/inference/common/reader/video_reader.py +3 -3
- paddlex/inference/common/result/__init__.py +7 -7
- paddlex/inference/common/result/base_cv_result.py +12 -2
- paddlex/inference/common/result/base_result.py +7 -5
- paddlex/inference/common/result/base_ts_result.py +1 -2
- paddlex/inference/common/result/base_video_result.py +2 -2
- paddlex/inference/common/result/mixin.py +31 -25
- paddlex/inference/models/__init__.py +41 -85
- paddlex/inference/models/anomaly_detection/__init__.py +1 -1
- paddlex/inference/models/anomaly_detection/predictor.py +9 -19
- paddlex/inference/models/anomaly_detection/processors.py +9 -2
- paddlex/inference/models/anomaly_detection/result.py +3 -2
- paddlex/inference/models/base/__init__.py +2 -2
- paddlex/inference/models/base/predictor/__init__.py +1 -2
- paddlex/inference/models/base/predictor/base_predictor.py +278 -39
- paddlex/inference/models/common/__init__.py +6 -15
- paddlex/inference/models/common/static_infer.py +724 -251
- paddlex/inference/models/common/tokenizer/__init__.py +7 -3
- paddlex/inference/models/common/tokenizer/bert_tokenizer.py +1 -1
- paddlex/inference/models/common/tokenizer/clip_tokenizer.py +609 -0
- paddlex/inference/models/common/tokenizer/gpt_tokenizer.py +9 -7
- paddlex/inference/models/common/tokenizer/qwen2_5_tokenizer.py +112 -0
- paddlex/inference/models/common/tokenizer/qwen2_tokenizer.py +438 -0
- paddlex/inference/models/common/tokenizer/qwen_tokenizer.py +288 -0
- paddlex/inference/models/common/tokenizer/tokenizer_utils.py +85 -77
- paddlex/inference/models/common/tokenizer/tokenizer_utils_base.py +339 -123
- paddlex/inference/models/common/tokenizer/utils.py +1 -1
- paddlex/inference/models/common/tokenizer/vocab.py +8 -8
- paddlex/inference/models/common/ts/__init__.py +1 -1
- paddlex/inference/models/common/ts/funcs.py +13 -6
- paddlex/inference/models/common/ts/processors.py +14 -5
- paddlex/inference/models/common/vision/__init__.py +3 -3
- paddlex/inference/models/common/vision/funcs.py +17 -12
- paddlex/inference/models/common/vision/processors.py +61 -46
- paddlex/inference/models/common/vlm/__init__.py +13 -0
- paddlex/inference/models/common/vlm/activations.py +189 -0
- paddlex/inference/models/common/vlm/bert_padding.py +127 -0
- paddlex/inference/models/common/vlm/conversion_utils.py +99 -0
- paddlex/inference/models/common/vlm/distributed.py +229 -0
- paddlex/inference/models/common/vlm/flash_attn_utils.py +119 -0
- paddlex/inference/models/common/vlm/fusion_ops.py +205 -0
- paddlex/inference/models/common/vlm/generation/__init__.py +34 -0
- paddlex/inference/models/common/vlm/generation/configuration_utils.py +533 -0
- paddlex/inference/models/common/vlm/generation/logits_process.py +730 -0
- paddlex/inference/models/common/vlm/generation/stopping_criteria.py +106 -0
- paddlex/inference/models/common/vlm/generation/utils.py +2162 -0
- paddlex/inference/models/common/vlm/transformers/__init__.py +16 -0
- paddlex/inference/models/common/vlm/transformers/configuration_utils.py +1037 -0
- paddlex/inference/models/common/vlm/transformers/conversion_utils.py +408 -0
- paddlex/inference/models/common/vlm/transformers/model_outputs.py +1612 -0
- paddlex/inference/models/common/vlm/transformers/model_utils.py +2014 -0
- paddlex/inference/models/common/vlm/transformers/utils.py +178 -0
- paddlex/inference/models/common/vlm/utils.py +109 -0
- paddlex/inference/models/doc_vlm/__init__.py +15 -0
- paddlex/inference/models/doc_vlm/modeling/GOT_ocr_2_0.py +830 -0
- paddlex/inference/models/doc_vlm/modeling/__init__.py +17 -0
- paddlex/inference/models/doc_vlm/modeling/qwen2.py +1606 -0
- paddlex/inference/models/doc_vlm/modeling/qwen2_5_vl.py +3006 -0
- paddlex/inference/models/doc_vlm/modeling/qwen2_vl.py +2495 -0
- paddlex/inference/models/doc_vlm/predictor.py +253 -0
- paddlex/inference/models/doc_vlm/processors/GOT_ocr_2_0.py +97 -0
- paddlex/inference/models/doc_vlm/processors/__init__.py +17 -0
- paddlex/inference/models/doc_vlm/processors/common.py +561 -0
- paddlex/inference/models/doc_vlm/processors/qwen2_5_vl.py +548 -0
- paddlex/inference/models/doc_vlm/processors/qwen2_vl.py +543 -0
- paddlex/inference/models/doc_vlm/result.py +21 -0
- paddlex/inference/models/face_feature/__init__.py +1 -1
- paddlex/inference/models/face_feature/predictor.py +2 -1
- paddlex/inference/models/formula_recognition/__init__.py +1 -1
- paddlex/inference/models/formula_recognition/predictor.py +18 -28
- paddlex/inference/models/formula_recognition/processors.py +126 -97
- paddlex/inference/models/formula_recognition/result.py +43 -35
- paddlex/inference/models/image_classification/__init__.py +1 -1
- paddlex/inference/models/image_classification/predictor.py +9 -19
- paddlex/inference/models/image_classification/processors.py +4 -2
- paddlex/inference/models/image_classification/result.py +4 -3
- paddlex/inference/models/image_feature/__init__.py +1 -1
- paddlex/inference/models/image_feature/predictor.py +9 -19
- paddlex/inference/models/image_feature/processors.py +7 -5
- paddlex/inference/models/image_feature/result.py +2 -3
- paddlex/inference/models/image_multilabel_classification/__init__.py +1 -1
- paddlex/inference/models/image_multilabel_classification/predictor.py +7 -6
- paddlex/inference/models/image_multilabel_classification/processors.py +6 -2
- paddlex/inference/models/image_multilabel_classification/result.py +4 -3
- paddlex/inference/models/image_unwarping/__init__.py +1 -1
- paddlex/inference/models/image_unwarping/predictor.py +8 -16
- paddlex/inference/models/image_unwarping/processors.py +6 -2
- paddlex/inference/models/image_unwarping/result.py +4 -2
- paddlex/inference/models/instance_segmentation/__init__.py +1 -1
- paddlex/inference/models/instance_segmentation/predictor.py +7 -15
- paddlex/inference/models/instance_segmentation/processors.py +4 -7
- paddlex/inference/models/instance_segmentation/result.py +11 -10
- paddlex/inference/models/keypoint_detection/__init__.py +1 -1
- paddlex/inference/models/keypoint_detection/predictor.py +5 -3
- paddlex/inference/models/keypoint_detection/processors.py +11 -3
- paddlex/inference/models/keypoint_detection/result.py +9 -4
- paddlex/inference/models/{3d_bev_detection → m_3d_bev_detection}/__init__.py +1 -1
- paddlex/inference/models/{3d_bev_detection → m_3d_bev_detection}/predictor.py +15 -26
- paddlex/inference/models/{3d_bev_detection → m_3d_bev_detection}/processors.py +26 -14
- paddlex/inference/models/{3d_bev_detection → m_3d_bev_detection}/result.py +15 -12
- paddlex/inference/models/{3d_bev_detection → m_3d_bev_detection}/visualizer_3d.py +77 -39
- paddlex/inference/models/multilingual_speech_recognition/__init__.py +1 -1
- paddlex/inference/models/multilingual_speech_recognition/predictor.py +11 -15
- paddlex/inference/models/multilingual_speech_recognition/processors.py +45 -53
- paddlex/inference/models/multilingual_speech_recognition/result.py +1 -1
- paddlex/inference/models/object_detection/__init__.py +1 -1
- paddlex/inference/models/object_detection/predictor.py +8 -12
- paddlex/inference/models/object_detection/processors.py +63 -33
- paddlex/inference/models/object_detection/result.py +5 -4
- paddlex/inference/models/object_detection/utils.py +3 -1
- paddlex/inference/models/open_vocabulary_detection/__init__.py +1 -1
- paddlex/inference/models/open_vocabulary_detection/predictor.py +31 -14
- paddlex/inference/models/open_vocabulary_detection/processors/__init__.py +3 -2
- paddlex/inference/models/open_vocabulary_detection/processors/common.py +114 -0
- paddlex/inference/models/open_vocabulary_detection/processors/groundingdino_processors.py +19 -8
- paddlex/inference/models/open_vocabulary_detection/processors/yoloworld_processors.py +209 -0
- paddlex/inference/models/open_vocabulary_segmentation/__init__.py +1 -1
- paddlex/inference/models/open_vocabulary_segmentation/predictor.py +6 -13
- paddlex/inference/models/open_vocabulary_segmentation/processors/__init__.py +1 -1
- paddlex/inference/models/open_vocabulary_segmentation/processors/sam_processer.py +12 -12
- paddlex/inference/models/open_vocabulary_segmentation/results/__init__.py +1 -1
- paddlex/inference/models/open_vocabulary_segmentation/results/sam_result.py +11 -9
- paddlex/inference/models/semantic_segmentation/__init__.py +1 -1
- paddlex/inference/models/semantic_segmentation/predictor.py +9 -18
- paddlex/inference/models/semantic_segmentation/processors.py +11 -8
- paddlex/inference/models/semantic_segmentation/result.py +4 -3
- paddlex/inference/models/table_structure_recognition/__init__.py +1 -1
- paddlex/inference/models/table_structure_recognition/predictor.py +8 -18
- paddlex/inference/models/table_structure_recognition/processors.py +23 -29
- paddlex/inference/models/table_structure_recognition/result.py +8 -15
- paddlex/inference/models/text_detection/__init__.py +1 -1
- paddlex/inference/models/text_detection/predictor.py +24 -24
- paddlex/inference/models/text_detection/processors.py +116 -44
- paddlex/inference/models/text_detection/result.py +8 -13
- paddlex/inference/models/text_recognition/__init__.py +1 -1
- paddlex/inference/models/text_recognition/predictor.py +11 -19
- paddlex/inference/models/text_recognition/processors.py +27 -13
- paddlex/inference/models/text_recognition/result.py +3 -2
- paddlex/inference/models/ts_anomaly_detection/__init__.py +1 -1
- paddlex/inference/models/ts_anomaly_detection/predictor.py +12 -17
- paddlex/inference/models/ts_anomaly_detection/processors.py +6 -2
- paddlex/inference/models/ts_anomaly_detection/result.py +21 -10
- paddlex/inference/models/ts_classification/__init__.py +1 -1
- paddlex/inference/models/ts_classification/predictor.py +14 -27
- paddlex/inference/models/ts_classification/processors.py +7 -2
- paddlex/inference/models/ts_classification/result.py +21 -12
- paddlex/inference/models/ts_forecasting/__init__.py +1 -1
- paddlex/inference/models/ts_forecasting/predictor.py +13 -18
- paddlex/inference/models/ts_forecasting/processors.py +12 -3
- paddlex/inference/models/ts_forecasting/result.py +24 -11
- paddlex/inference/models/video_classification/__init__.py +1 -1
- paddlex/inference/models/video_classification/predictor.py +9 -15
- paddlex/inference/models/video_classification/processors.py +24 -24
- paddlex/inference/models/video_classification/result.py +7 -3
- paddlex/inference/models/video_detection/__init__.py +1 -1
- paddlex/inference/models/video_detection/predictor.py +8 -15
- paddlex/inference/models/video_detection/processors.py +24 -11
- paddlex/inference/models/video_detection/result.py +10 -5
- paddlex/inference/pipelines/__init__.py +48 -37
- paddlex/inference/pipelines/_parallel.py +172 -0
- paddlex/inference/pipelines/anomaly_detection/__init__.py +1 -1
- paddlex/inference/pipelines/anomaly_detection/pipeline.py +29 -9
- paddlex/inference/pipelines/attribute_recognition/__init__.py +1 -1
- paddlex/inference/pipelines/attribute_recognition/pipeline.py +24 -9
- paddlex/inference/pipelines/attribute_recognition/result.py +10 -8
- paddlex/inference/pipelines/base.py +43 -13
- paddlex/inference/pipelines/components/__init__.py +14 -8
- paddlex/inference/pipelines/components/chat_server/__init__.py +1 -1
- paddlex/inference/pipelines/components/chat_server/base.py +2 -2
- paddlex/inference/pipelines/components/chat_server/openai_bot_chat.py +8 -8
- paddlex/inference/pipelines/components/common/__init__.py +5 -4
- paddlex/inference/pipelines/components/common/base_operator.py +2 -1
- paddlex/inference/pipelines/components/common/base_result.py +3 -2
- paddlex/inference/pipelines/components/common/convert_points_and_boxes.py +1 -2
- paddlex/inference/pipelines/components/common/crop_image_regions.py +11 -5
- paddlex/inference/pipelines/components/common/seal_det_warp.py +44 -13
- paddlex/inference/pipelines/components/common/sort_boxes.py +4 -2
- paddlex/inference/pipelines/components/common/warp_image.py +50 -0
- paddlex/inference/pipelines/components/faisser.py +10 -5
- paddlex/inference/pipelines/components/prompt_engineering/__init__.py +2 -2
- paddlex/inference/pipelines/components/prompt_engineering/base.py +2 -2
- paddlex/inference/pipelines/components/prompt_engineering/generate_ensemble_prompt.py +2 -1
- paddlex/inference/pipelines/components/prompt_engineering/generate_kie_prompt.py +2 -2
- paddlex/inference/pipelines/components/retriever/__init__.py +2 -2
- paddlex/inference/pipelines/components/retriever/base.py +18 -16
- paddlex/inference/pipelines/components/retriever/openai_bot_retriever.py +2 -2
- paddlex/inference/pipelines/components/retriever/qianfan_bot_retriever.py +87 -84
- paddlex/inference/pipelines/components/utils/__init__.py +1 -1
- paddlex/inference/pipelines/components/utils/mixin.py +7 -7
- paddlex/inference/pipelines/doc_preprocessor/__init__.py +1 -1
- paddlex/inference/pipelines/doc_preprocessor/pipeline.py +70 -51
- paddlex/inference/pipelines/doc_preprocessor/result.py +5 -10
- paddlex/inference/pipelines/doc_understanding/__init__.py +15 -0
- paddlex/inference/pipelines/doc_understanding/pipeline.py +71 -0
- paddlex/inference/pipelines/face_recognition/__init__.py +1 -1
- paddlex/inference/pipelines/face_recognition/pipeline.py +3 -1
- paddlex/inference/pipelines/face_recognition/result.py +3 -2
- paddlex/inference/pipelines/formula_recognition/__init__.py +1 -1
- paddlex/inference/pipelines/formula_recognition/pipeline.py +137 -93
- paddlex/inference/pipelines/formula_recognition/result.py +20 -29
- paddlex/inference/pipelines/image_classification/__init__.py +1 -1
- paddlex/inference/pipelines/image_classification/pipeline.py +30 -11
- paddlex/inference/pipelines/image_multilabel_classification/__init__.py +1 -1
- paddlex/inference/pipelines/image_multilabel_classification/pipeline.py +31 -12
- paddlex/inference/pipelines/instance_segmentation/__init__.py +1 -1
- paddlex/inference/pipelines/instance_segmentation/pipeline.py +30 -9
- paddlex/inference/pipelines/keypoint_detection/__init__.py +1 -1
- paddlex/inference/pipelines/keypoint_detection/pipeline.py +30 -9
- paddlex/inference/pipelines/layout_parsing/__init__.py +1 -1
- paddlex/inference/pipelines/layout_parsing/pipeline.py +54 -56
- paddlex/inference/pipelines/layout_parsing/pipeline_v2.py +904 -261
- paddlex/inference/pipelines/layout_parsing/result.py +9 -21
- paddlex/inference/pipelines/layout_parsing/result_v2.py +525 -250
- paddlex/inference/pipelines/layout_parsing/setting.py +87 -0
- paddlex/inference/pipelines/layout_parsing/utils.py +570 -2004
- paddlex/inference/pipelines/layout_parsing/xycut_enhanced/__init__.py +16 -0
- paddlex/inference/pipelines/layout_parsing/xycut_enhanced/utils.py +1144 -0
- paddlex/inference/pipelines/layout_parsing/xycut_enhanced/xycuts.py +563 -0
- paddlex/inference/pipelines/{3d_bev_detection → m_3d_bev_detection}/__init__.py +1 -1
- paddlex/inference/pipelines/{3d_bev_detection → m_3d_bev_detection}/pipeline.py +17 -10
- paddlex/inference/pipelines/multilingual_speech_recognition/__init__.py +1 -1
- paddlex/inference/pipelines/multilingual_speech_recognition/pipeline.py +17 -6
- paddlex/inference/pipelines/object_detection/__init__.py +1 -1
- paddlex/inference/pipelines/object_detection/pipeline.py +29 -9
- paddlex/inference/pipelines/ocr/__init__.py +1 -1
- paddlex/inference/pipelines/ocr/pipeline.py +151 -77
- paddlex/inference/pipelines/ocr/result.py +31 -24
- paddlex/inference/pipelines/open_vocabulary_detection/__init__.py +1 -1
- paddlex/inference/pipelines/open_vocabulary_detection/pipeline.py +17 -6
- paddlex/inference/pipelines/open_vocabulary_segmentation/__init__.py +1 -1
- paddlex/inference/pipelines/open_vocabulary_segmentation/pipeline.py +17 -6
- paddlex/inference/pipelines/pp_chatocr/__init__.py +1 -1
- paddlex/inference/pipelines/pp_chatocr/pipeline_base.py +14 -5
- paddlex/inference/pipelines/pp_chatocr/pipeline_v3.py +22 -14
- paddlex/inference/pipelines/pp_chatocr/pipeline_v4.py +34 -16
- paddlex/inference/pipelines/pp_shitu_v2/__init__.py +1 -1
- paddlex/inference/pipelines/pp_shitu_v2/pipeline.py +12 -8
- paddlex/inference/pipelines/pp_shitu_v2/result.py +4 -4
- paddlex/inference/pipelines/rotated_object_detection/__init__.py +1 -1
- paddlex/inference/pipelines/rotated_object_detection/pipeline.py +30 -9
- paddlex/inference/pipelines/seal_recognition/__init__.py +1 -1
- paddlex/inference/pipelines/seal_recognition/pipeline.py +127 -63
- paddlex/inference/pipelines/seal_recognition/result.py +4 -2
- paddlex/inference/pipelines/semantic_segmentation/__init__.py +1 -1
- paddlex/inference/pipelines/semantic_segmentation/pipeline.py +30 -9
- paddlex/inference/pipelines/small_object_detection/__init__.py +1 -1
- paddlex/inference/pipelines/small_object_detection/pipeline.py +30 -9
- paddlex/inference/pipelines/table_recognition/__init__.py +1 -1
- paddlex/inference/pipelines/table_recognition/pipeline.py +61 -37
- paddlex/inference/pipelines/table_recognition/pipeline_v2.py +668 -65
- paddlex/inference/pipelines/table_recognition/result.py +12 -10
- paddlex/inference/pipelines/table_recognition/table_recognition_post_processing.py +12 -8
- paddlex/inference/pipelines/table_recognition/table_recognition_post_processing_v2.py +55 -37
- paddlex/inference/pipelines/table_recognition/utils.py +1 -1
- paddlex/inference/pipelines/ts_anomaly_detection/__init__.py +1 -1
- paddlex/inference/pipelines/ts_anomaly_detection/pipeline.py +16 -6
- paddlex/inference/pipelines/ts_classification/__init__.py +1 -1
- paddlex/inference/pipelines/ts_classification/pipeline.py +16 -6
- paddlex/inference/pipelines/ts_forecasting/__init__.py +1 -1
- paddlex/inference/pipelines/ts_forecasting/pipeline.py +16 -6
- paddlex/inference/pipelines/video_classification/__init__.py +1 -1
- paddlex/inference/pipelines/video_classification/pipeline.py +17 -6
- paddlex/inference/pipelines/video_detection/__init__.py +1 -1
- paddlex/inference/pipelines/video_detection/pipeline.py +20 -7
- paddlex/inference/serving/__init__.py +5 -1
- paddlex/inference/serving/basic_serving/__init__.py +1 -1
- paddlex/inference/serving/basic_serving/_app.py +31 -19
- paddlex/inference/serving/basic_serving/_pipeline_apps/__init__.py +7 -4
- paddlex/inference/serving/basic_serving/_pipeline_apps/_common/__init__.py +1 -1
- paddlex/inference/serving/basic_serving/_pipeline_apps/_common/common.py +12 -4
- paddlex/inference/serving/basic_serving/_pipeline_apps/_common/image_recognition.py +1 -1
- paddlex/inference/serving/basic_serving/_pipeline_apps/_common/ocr.py +7 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/anomaly_detection.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/doc_preprocessor.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/doc_understanding.py +153 -0
- paddlex/inference/serving/basic_serving/_pipeline_apps/face_recognition.py +16 -13
- paddlex/inference/serving/basic_serving/_pipeline_apps/formula_recognition.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/human_keypoint_detection.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/image_classification.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/image_multilabel_classification.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/instance_segmentation.py +13 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/layout_parsing.py +10 -8
- paddlex/inference/serving/basic_serving/_pipeline_apps/m_3d_bev_detection.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/multilingual_speech_recognition.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/object_detection.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/ocr.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/open_vocabulary_detection.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/open_vocabulary_segmentation.py +13 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/pedestrian_attribute_recognition.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv3_doc.py +14 -12
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv4_doc.py +17 -14
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_shituv2.py +16 -13
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_structurev3.py +16 -9
- paddlex/inference/serving/basic_serving/_pipeline_apps/rotated_object_detection.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/seal_recognition.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/semantic_segmentation.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/small_object_detection.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition.py +11 -12
- paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition_v2.py +14 -12
- paddlex/inference/serving/basic_serving/_pipeline_apps/ts_anomaly_detection.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/ts_classification.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/ts_forecast.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/vehicle_attribute_recognition.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/video_classification.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/video_detection.py +10 -7
- paddlex/inference/serving/basic_serving/_server.py +9 -4
- paddlex/inference/serving/infra/__init__.py +1 -1
- paddlex/inference/serving/infra/config.py +1 -1
- paddlex/inference/serving/infra/models.py +13 -6
- paddlex/inference/serving/infra/storage.py +9 -4
- paddlex/inference/serving/infra/utils.py +54 -28
- paddlex/inference/serving/schemas/__init__.py +1 -1
- paddlex/inference/serving/schemas/anomaly_detection.py +1 -1
- paddlex/inference/serving/schemas/doc_preprocessor.py +1 -1
- paddlex/inference/serving/schemas/doc_understanding.py +78 -0
- paddlex/inference/serving/schemas/face_recognition.py +1 -1
- paddlex/inference/serving/schemas/formula_recognition.py +2 -2
- paddlex/inference/serving/schemas/human_keypoint_detection.py +1 -1
- paddlex/inference/serving/schemas/image_classification.py +1 -1
- paddlex/inference/serving/schemas/image_multilabel_classification.py +1 -1
- paddlex/inference/serving/schemas/instance_segmentation.py +1 -1
- paddlex/inference/serving/schemas/layout_parsing.py +2 -3
- paddlex/inference/serving/schemas/m_3d_bev_detection.py +1 -1
- paddlex/inference/serving/schemas/multilingual_speech_recognition.py +1 -1
- paddlex/inference/serving/schemas/object_detection.py +1 -1
- paddlex/inference/serving/schemas/ocr.py +1 -1
- paddlex/inference/serving/schemas/open_vocabulary_detection.py +1 -1
- paddlex/inference/serving/schemas/open_vocabulary_segmentation.py +1 -1
- paddlex/inference/serving/schemas/pedestrian_attribute_recognition.py +1 -1
- paddlex/inference/serving/schemas/pp_chatocrv3_doc.py +2 -3
- paddlex/inference/serving/schemas/pp_chatocrv4_doc.py +3 -3
- paddlex/inference/serving/schemas/pp_shituv2.py +1 -1
- paddlex/inference/serving/schemas/pp_structurev3.py +11 -7
- paddlex/inference/serving/schemas/rotated_object_detection.py +1 -1
- paddlex/inference/serving/schemas/seal_recognition.py +2 -2
- paddlex/inference/serving/schemas/semantic_segmentation.py +1 -1
- paddlex/inference/serving/schemas/shared/__init__.py +1 -1
- paddlex/inference/serving/schemas/shared/classification.py +1 -1
- paddlex/inference/serving/schemas/shared/image_segmentation.py +1 -1
- paddlex/inference/serving/schemas/shared/object_detection.py +1 -1
- paddlex/inference/serving/schemas/shared/ocr.py +1 -1
- paddlex/inference/serving/schemas/small_object_detection.py +1 -1
- paddlex/inference/serving/schemas/table_recognition.py +3 -7
- paddlex/inference/serving/schemas/table_recognition_v2.py +6 -7
- paddlex/inference/serving/schemas/ts_anomaly_detection.py +1 -1
- paddlex/inference/serving/schemas/ts_classification.py +1 -1
- paddlex/inference/serving/schemas/ts_forecast.py +1 -1
- paddlex/inference/serving/schemas/vehicle_attribute_recognition.py +1 -1
- paddlex/inference/serving/schemas/video_classification.py +1 -1
- paddlex/inference/serving/schemas/video_detection.py +1 -1
- paddlex/inference/utils/__init__.py +1 -1
- paddlex/inference/utils/benchmark.py +332 -179
- paddlex/inference/utils/color_map.py +1 -1
- paddlex/inference/utils/get_pipeline_path.py +1 -1
- paddlex/inference/utils/hpi.py +258 -0
- paddlex/inference/utils/hpi_model_info_collection.json +2331 -0
- paddlex/inference/utils/io/__init__.py +11 -11
- paddlex/inference/utils/io/readers.py +31 -27
- paddlex/inference/utils/io/style.py +21 -14
- paddlex/inference/utils/io/tablepyxl.py +13 -5
- paddlex/inference/utils/io/writers.py +9 -10
- paddlex/inference/utils/mkldnn_blocklist.py +25 -0
- paddlex/inference/utils/model_paths.py +48 -0
- paddlex/inference/utils/{new_ir_blacklist.py → new_ir_blocklist.py} +1 -2
- paddlex/inference/utils/official_models.py +278 -262
- paddlex/inference/utils/pp_option.py +184 -92
- paddlex/inference/utils/trt_blocklist.py +43 -0
- paddlex/inference/utils/trt_config.py +420 -0
- paddlex/model.py +30 -12
- paddlex/modules/__init__.py +57 -80
- paddlex/modules/anomaly_detection/__init__.py +2 -2
- paddlex/modules/anomaly_detection/dataset_checker/__init__.py +2 -3
- paddlex/modules/anomaly_detection/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/anomaly_detection/dataset_checker/dataset_src/analyse_dataset.py +6 -3
- paddlex/modules/anomaly_detection/dataset_checker/dataset_src/check_dataset.py +8 -4
- paddlex/modules/anomaly_detection/dataset_checker/dataset_src/convert_dataset.py +7 -4
- paddlex/modules/anomaly_detection/dataset_checker/dataset_src/split_dataset.py +2 -2
- paddlex/modules/anomaly_detection/dataset_checker/dataset_src/utils/__init__.py +1 -1
- paddlex/modules/anomaly_detection/dataset_checker/dataset_src/utils/visualizer.py +7 -2
- paddlex/modules/anomaly_detection/evaluator.py +3 -3
- paddlex/modules/anomaly_detection/exportor.py +1 -1
- paddlex/modules/anomaly_detection/model_list.py +1 -1
- paddlex/modules/anomaly_detection/trainer.py +3 -4
- paddlex/modules/base/__init__.py +5 -5
- paddlex/modules/base/build_model.py +1 -2
- paddlex/modules/base/dataset_checker/__init__.py +2 -2
- paddlex/modules/base/dataset_checker/dataset_checker.py +4 -4
- paddlex/modules/base/dataset_checker/utils.py +1 -3
- paddlex/modules/base/evaluator.py +13 -13
- paddlex/modules/base/exportor.py +12 -13
- paddlex/modules/base/trainer.py +21 -11
- paddlex/modules/base/utils/__init__.py +13 -0
- paddlex/modules/base/utils/cinn_setting.py +89 -0
- paddlex/modules/base/utils/coco_eval.py +94 -0
- paddlex/modules/base/utils/topk_eval.py +118 -0
- paddlex/modules/doc_vlm/__init__.py +18 -0
- paddlex/modules/doc_vlm/dataset_checker.py +29 -0
- paddlex/modules/doc_vlm/evaluator.py +29 -0
- paddlex/modules/doc_vlm/exportor.py +29 -0
- paddlex/modules/doc_vlm/model_list.py +16 -0
- paddlex/modules/doc_vlm/trainer.py +41 -0
- paddlex/modules/face_recognition/__init__.py +2 -2
- paddlex/modules/face_recognition/dataset_checker/__init__.py +2 -2
- paddlex/modules/face_recognition/dataset_checker/dataset_src/__init__.py +1 -1
- paddlex/modules/face_recognition/dataset_checker/dataset_src/check_dataset.py +3 -5
- paddlex/modules/face_recognition/dataset_checker/dataset_src/utils/__init__.py +1 -1
- paddlex/modules/face_recognition/dataset_checker/dataset_src/utils/visualizer.py +2 -5
- paddlex/modules/face_recognition/evaluator.py +3 -3
- paddlex/modules/face_recognition/exportor.py +1 -1
- paddlex/modules/face_recognition/model_list.py +1 -1
- paddlex/modules/face_recognition/trainer.py +1 -1
- paddlex/modules/formula_recognition/__init__.py +2 -2
- paddlex/modules/formula_recognition/dataset_checker/__init__.py +3 -3
- paddlex/modules/formula_recognition/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/formula_recognition/dataset_checker/dataset_src/analyse_dataset.py +13 -12
- paddlex/modules/formula_recognition/dataset_checker/dataset_src/check_dataset.py +2 -6
- paddlex/modules/formula_recognition/dataset_checker/dataset_src/convert_dataset.py +11 -10
- paddlex/modules/formula_recognition/dataset_checker/dataset_src/split_dataset.py +1 -2
- paddlex/modules/formula_recognition/evaluator.py +6 -3
- paddlex/modules/formula_recognition/exportor.py +1 -1
- paddlex/modules/formula_recognition/model_list.py +4 -1
- paddlex/modules/formula_recognition/trainer.py +5 -3
- paddlex/modules/general_recognition/__init__.py +2 -2
- paddlex/modules/general_recognition/dataset_checker/__init__.py +2 -2
- paddlex/modules/general_recognition/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/general_recognition/dataset_checker/dataset_src/analyse_dataset.py +7 -9
- paddlex/modules/general_recognition/dataset_checker/dataset_src/check_dataset.py +4 -5
- paddlex/modules/general_recognition/dataset_checker/dataset_src/convert_dataset.py +6 -5
- paddlex/modules/general_recognition/dataset_checker/dataset_src/split_dataset.py +1 -1
- paddlex/modules/general_recognition/dataset_checker/dataset_src/utils/__init__.py +1 -1
- paddlex/modules/general_recognition/dataset_checker/dataset_src/utils/visualizer.py +2 -5
- paddlex/modules/general_recognition/evaluator.py +2 -2
- paddlex/modules/general_recognition/exportor.py +1 -1
- paddlex/modules/general_recognition/model_list.py +1 -1
- paddlex/modules/general_recognition/trainer.py +1 -1
- paddlex/modules/image_classification/__init__.py +2 -2
- paddlex/modules/image_classification/dataset_checker/__init__.py +2 -2
- paddlex/modules/image_classification/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/image_classification/dataset_checker/dataset_src/analyse_dataset.py +8 -9
- paddlex/modules/image_classification/dataset_checker/dataset_src/check_dataset.py +4 -3
- paddlex/modules/image_classification/dataset_checker/dataset_src/convert_dataset.py +4 -4
- paddlex/modules/image_classification/dataset_checker/dataset_src/split_dataset.py +1 -1
- paddlex/modules/image_classification/dataset_checker/dataset_src/utils/__init__.py +1 -1
- paddlex/modules/image_classification/dataset_checker/dataset_src/utils/visualizer.py +2 -5
- paddlex/modules/image_classification/evaluator.py +3 -3
- paddlex/modules/image_classification/exportor.py +1 -1
- paddlex/modules/image_classification/model_list.py +2 -1
- paddlex/modules/image_classification/trainer.py +3 -3
- paddlex/modules/image_unwarping/__init__.py +1 -1
- paddlex/modules/image_unwarping/model_list.py +1 -1
- paddlex/modules/instance_segmentation/__init__.py +2 -2
- paddlex/modules/instance_segmentation/dataset_checker/__init__.py +2 -3
- paddlex/modules/instance_segmentation/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/instance_segmentation/dataset_checker/dataset_src/analyse_dataset.py +9 -5
- paddlex/modules/instance_segmentation/dataset_checker/dataset_src/check_dataset.py +8 -5
- paddlex/modules/instance_segmentation/dataset_checker/dataset_src/convert_dataset.py +8 -8
- paddlex/modules/instance_segmentation/dataset_checker/dataset_src/split_dataset.py +7 -4
- paddlex/modules/instance_segmentation/dataset_checker/dataset_src/utils/__init__.py +1 -1
- paddlex/modules/instance_segmentation/dataset_checker/dataset_src/utils/visualizer.py +10 -8
- paddlex/modules/instance_segmentation/evaluator.py +2 -2
- paddlex/modules/instance_segmentation/exportor.py +1 -1
- paddlex/modules/instance_segmentation/model_list.py +1 -1
- paddlex/modules/instance_segmentation/trainer.py +1 -1
- paddlex/modules/keypoint_detection/__init__.py +2 -2
- paddlex/modules/keypoint_detection/dataset_checker/__init__.py +2 -2
- paddlex/modules/keypoint_detection/dataset_checker/dataset_src/__init__.py +1 -1
- paddlex/modules/keypoint_detection/dataset_checker/dataset_src/check_dataset.py +10 -5
- paddlex/modules/keypoint_detection/dataset_checker/dataset_src/utils/__init__.py +1 -1
- paddlex/modules/keypoint_detection/dataset_checker/dataset_src/utils/visualizer.py +8 -3
- paddlex/modules/keypoint_detection/evaluator.py +2 -2
- paddlex/modules/keypoint_detection/exportor.py +1 -1
- paddlex/modules/keypoint_detection/model_list.py +1 -1
- paddlex/modules/keypoint_detection/trainer.py +2 -2
- paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/__init__.py +2 -2
- paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/dataset_checker/__init__.py +3 -3
- paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/dataset_checker/dataset_src/analyse_dataset.py +8 -8
- paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/dataset_checker/dataset_src/check_dataset.py +1 -2
- paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/evaluator.py +3 -3
- paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/exportor.py +1 -1
- paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/model_list.py +1 -1
- paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/trainer.py +5 -7
- paddlex/modules/multilabel_classification/__init__.py +2 -2
- paddlex/modules/multilabel_classification/dataset_checker/__init__.py +2 -2
- paddlex/modules/multilabel_classification/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/multilabel_classification/dataset_checker/dataset_src/analyse_dataset.py +8 -9
- paddlex/modules/multilabel_classification/dataset_checker/dataset_src/check_dataset.py +4 -3
- paddlex/modules/multilabel_classification/dataset_checker/dataset_src/convert_dataset.py +10 -7
- paddlex/modules/multilabel_classification/dataset_checker/dataset_src/split_dataset.py +1 -1
- paddlex/modules/multilabel_classification/dataset_checker/dataset_src/utils/__init__.py +1 -1
- paddlex/modules/multilabel_classification/dataset_checker/dataset_src/utils/visualizer.py +1 -5
- paddlex/modules/multilabel_classification/evaluator.py +3 -3
- paddlex/modules/multilabel_classification/exportor.py +1 -1
- paddlex/modules/multilabel_classification/model_list.py +1 -1
- paddlex/modules/multilabel_classification/trainer.py +3 -3
- paddlex/modules/multilingual_speech_recognition/__init__.py +2 -2
- paddlex/modules/multilingual_speech_recognition/dataset_checker.py +3 -3
- paddlex/modules/multilingual_speech_recognition/evaluator.py +3 -3
- paddlex/modules/multilingual_speech_recognition/exportor.py +3 -3
- paddlex/modules/multilingual_speech_recognition/model_list.py +1 -1
- paddlex/modules/multilingual_speech_recognition/trainer.py +7 -5
- paddlex/modules/object_detection/__init__.py +2 -2
- paddlex/modules/object_detection/dataset_checker/__init__.py +2 -11
- paddlex/modules/object_detection/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/object_detection/dataset_checker/dataset_src/analyse_dataset.py +10 -8
- paddlex/modules/object_detection/dataset_checker/dataset_src/check_dataset.py +10 -5
- paddlex/modules/object_detection/dataset_checker/dataset_src/convert_dataset.py +17 -12
- paddlex/modules/object_detection/dataset_checker/dataset_src/split_dataset.py +8 -4
- paddlex/modules/object_detection/dataset_checker/dataset_src/utils/__init__.py +1 -1
- paddlex/modules/object_detection/dataset_checker/dataset_src/utils/visualizer.py +9 -8
- paddlex/modules/object_detection/evaluator.py +11 -6
- paddlex/modules/object_detection/exportor.py +1 -1
- paddlex/modules/object_detection/model_list.py +3 -1
- paddlex/modules/object_detection/trainer.py +4 -5
- paddlex/modules/open_vocabulary_detection/__init__.py +2 -2
- paddlex/modules/open_vocabulary_detection/dataset_checker.py +3 -3
- paddlex/modules/open_vocabulary_detection/evaluator.py +3 -3
- paddlex/modules/open_vocabulary_detection/exportor.py +3 -3
- paddlex/modules/open_vocabulary_detection/model_list.py +2 -4
- paddlex/modules/open_vocabulary_detection/trainer.py +7 -5
- paddlex/modules/open_vocabulary_segmentation/__init__.py +2 -2
- paddlex/modules/open_vocabulary_segmentation/dataset_checker.py +3 -3
- paddlex/modules/open_vocabulary_segmentation/evaluator.py +3 -3
- paddlex/modules/open_vocabulary_segmentation/exportor.py +3 -3
- paddlex/modules/open_vocabulary_segmentation/model_list.py +1 -1
- paddlex/modules/open_vocabulary_segmentation/trainer.py +7 -5
- paddlex/modules/semantic_segmentation/__init__.py +2 -2
- paddlex/modules/semantic_segmentation/dataset_checker/__init__.py +2 -3
- paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/analyse_dataset.py +6 -3
- paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/check_dataset.py +2 -2
- paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/convert_dataset.py +7 -4
- paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/split_dataset.py +2 -2
- paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/utils/__init__.py +1 -1
- paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/utils/visualizer.py +6 -2
- paddlex/modules/semantic_segmentation/evaluator.py +3 -3
- paddlex/modules/semantic_segmentation/exportor.py +1 -1
- paddlex/modules/semantic_segmentation/model_list.py +1 -1
- paddlex/modules/semantic_segmentation/trainer.py +3 -4
- paddlex/modules/table_recognition/__init__.py +2 -2
- paddlex/modules/table_recognition/dataset_checker/__init__.py +5 -5
- paddlex/modules/table_recognition/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/table_recognition/dataset_checker/dataset_src/analyse_dataset.py +3 -2
- paddlex/modules/table_recognition/dataset_checker/dataset_src/check_dataset.py +8 -7
- paddlex/modules/table_recognition/dataset_checker/dataset_src/split_dataset.py +2 -1
- paddlex/modules/table_recognition/evaluator.py +3 -3
- paddlex/modules/table_recognition/exportor.py +1 -1
- paddlex/modules/table_recognition/model_list.py +1 -1
- paddlex/modules/table_recognition/trainer.py +2 -5
- paddlex/modules/text_detection/__init__.py +2 -2
- paddlex/modules/text_detection/dataset_checker/__init__.py +4 -6
- paddlex/modules/text_detection/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/text_detection/dataset_checker/dataset_src/analyse_dataset.py +12 -9
- paddlex/modules/text_detection/dataset_checker/dataset_src/check_dataset.py +3 -3
- paddlex/modules/text_detection/dataset_checker/dataset_src/split_dataset.py +3 -3
- paddlex/modules/text_detection/evaluator.py +3 -3
- paddlex/modules/text_detection/exportor.py +1 -1
- paddlex/modules/text_detection/model_list.py +3 -1
- paddlex/modules/text_detection/trainer.py +2 -5
- paddlex/modules/text_recognition/__init__.py +2 -2
- paddlex/modules/text_recognition/dataset_checker/__init__.py +4 -5
- paddlex/modules/text_recognition/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/text_recognition/dataset_checker/dataset_src/analyse_dataset.py +13 -12
- paddlex/modules/text_recognition/dataset_checker/dataset_src/check_dataset.py +2 -5
- paddlex/modules/text_recognition/dataset_checker/dataset_src/convert_dataset.py +11 -10
- paddlex/modules/text_recognition/dataset_checker/dataset_src/split_dataset.py +1 -2
- paddlex/modules/text_recognition/evaluator.py +3 -3
- paddlex/modules/text_recognition/exportor.py +1 -1
- paddlex/modules/text_recognition/model_list.py +3 -1
- paddlex/modules/text_recognition/trainer.py +2 -3
- paddlex/modules/ts_anomaly_detection/__init__.py +2 -2
- paddlex/modules/ts_anomaly_detection/dataset_checker/__init__.py +4 -5
- paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/analyse_dataset.py +1 -9
- paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/check_dataset.py +2 -2
- paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/convert_dataset.py +2 -6
- paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/split_dataset.py +4 -4
- paddlex/modules/ts_anomaly_detection/evaluator.py +3 -3
- paddlex/modules/ts_anomaly_detection/exportor.py +2 -3
- paddlex/modules/ts_anomaly_detection/model_list.py +1 -1
- paddlex/modules/ts_anomaly_detection/trainer.py +8 -8
- paddlex/modules/ts_classification/__init__.py +2 -2
- paddlex/modules/ts_classification/dataset_checker/__init__.py +4 -5
- paddlex/modules/ts_classification/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/ts_classification/dataset_checker/dataset_src/analyse_dataset.py +8 -5
- paddlex/modules/ts_classification/dataset_checker/dataset_src/check_dataset.py +2 -2
- paddlex/modules/ts_classification/dataset_checker/dataset_src/convert_dataset.py +2 -6
- paddlex/modules/ts_classification/dataset_checker/dataset_src/split_dataset.py +5 -5
- paddlex/modules/ts_classification/evaluator.py +3 -3
- paddlex/modules/ts_classification/exportor.py +2 -3
- paddlex/modules/ts_classification/model_list.py +1 -1
- paddlex/modules/ts_classification/trainer.py +7 -7
- paddlex/modules/ts_forecast/__init__.py +2 -2
- paddlex/modules/ts_forecast/dataset_checker/__init__.py +4 -5
- paddlex/modules/ts_forecast/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/ts_forecast/dataset_checker/dataset_src/analyse_dataset.py +1 -9
- paddlex/modules/ts_forecast/dataset_checker/dataset_src/check_dataset.py +2 -2
- paddlex/modules/ts_forecast/dataset_checker/dataset_src/convert_dataset.py +2 -6
- paddlex/modules/ts_forecast/dataset_checker/dataset_src/split_dataset.py +4 -4
- paddlex/modules/ts_forecast/evaluator.py +3 -3
- paddlex/modules/ts_forecast/exportor.py +2 -3
- paddlex/modules/ts_forecast/model_list.py +1 -1
- paddlex/modules/ts_forecast/trainer.py +7 -7
- paddlex/modules/video_classification/__init__.py +2 -2
- paddlex/modules/video_classification/dataset_checker/__init__.py +2 -2
- paddlex/modules/video_classification/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/video_classification/dataset_checker/dataset_src/analyse_dataset.py +9 -9
- paddlex/modules/video_classification/dataset_checker/dataset_src/check_dataset.py +2 -3
- paddlex/modules/video_classification/dataset_checker/dataset_src/split_dataset.py +1 -1
- paddlex/modules/video_classification/evaluator.py +3 -3
- paddlex/modules/video_classification/exportor.py +1 -1
- paddlex/modules/video_classification/model_list.py +1 -1
- paddlex/modules/video_classification/trainer.py +3 -3
- paddlex/modules/video_detection/__init__.py +2 -2
- paddlex/modules/video_detection/dataset_checker/__init__.py +2 -2
- paddlex/modules/video_detection/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/video_detection/dataset_checker/dataset_src/analyse_dataset.py +8 -9
- paddlex/modules/video_detection/dataset_checker/dataset_src/check_dataset.py +3 -5
- paddlex/modules/video_detection/evaluator.py +3 -3
- paddlex/modules/video_detection/exportor.py +1 -1
- paddlex/modules/video_detection/model_list.py +1 -1
- paddlex/modules/video_detection/trainer.py +3 -3
- paddlex/ops/__init__.py +7 -4
- paddlex/ops/iou3d_nms/iou3d_cpu.cpp +8 -6
- paddlex/ops/iou3d_nms/iou3d_cpu.h +3 -2
- paddlex/ops/iou3d_nms/iou3d_nms.cpp +8 -6
- paddlex/ops/iou3d_nms/iou3d_nms.h +6 -4
- paddlex/ops/iou3d_nms/iou3d_nms_api.cpp +24 -18
- paddlex/ops/iou3d_nms/iou3d_nms_kernel.cu +9 -7
- paddlex/ops/setup.py +3 -3
- paddlex/ops/voxel/voxelize_op.cc +22 -19
- paddlex/ops/voxel/voxelize_op.cu +25 -25
- paddlex/paddlex_cli.py +104 -87
- paddlex/repo_apis/Paddle3D_api/__init__.py +1 -1
- paddlex/repo_apis/Paddle3D_api/bev_fusion/__init__.py +1 -1
- paddlex/repo_apis/Paddle3D_api/bev_fusion/config.py +1 -1
- paddlex/repo_apis/Paddle3D_api/bev_fusion/model.py +6 -6
- paddlex/repo_apis/Paddle3D_api/bev_fusion/register.py +2 -2
- paddlex/repo_apis/Paddle3D_api/bev_fusion/runner.py +1 -1
- paddlex/repo_apis/Paddle3D_api/pp3d_config.py +3 -2
- paddlex/repo_apis/PaddleClas_api/__init__.py +1 -1
- paddlex/repo_apis/PaddleClas_api/cls/__init__.py +3 -3
- paddlex/repo_apis/PaddleClas_api/cls/config.py +5 -4
- paddlex/repo_apis/PaddleClas_api/cls/model.py +4 -4
- paddlex/repo_apis/PaddleClas_api/cls/register.py +12 -3
- paddlex/repo_apis/PaddleClas_api/cls/runner.py +2 -3
- paddlex/repo_apis/PaddleClas_api/shitu_rec/__init__.py +2 -2
- paddlex/repo_apis/PaddleClas_api/shitu_rec/config.py +2 -2
- paddlex/repo_apis/PaddleClas_api/shitu_rec/model.py +1 -4
- paddlex/repo_apis/PaddleClas_api/shitu_rec/register.py +2 -2
- paddlex/repo_apis/PaddleClas_api/shitu_rec/runner.py +1 -6
- paddlex/repo_apis/PaddleDetection_api/__init__.py +2 -2
- paddlex/repo_apis/PaddleDetection_api/config_helper.py +3 -3
- paddlex/repo_apis/PaddleDetection_api/instance_seg/__init__.py +2 -2
- paddlex/repo_apis/PaddleDetection_api/instance_seg/config.py +2 -3
- paddlex/repo_apis/PaddleDetection_api/instance_seg/model.py +4 -4
- paddlex/repo_apis/PaddleDetection_api/instance_seg/register.py +2 -3
- paddlex/repo_apis/PaddleDetection_api/instance_seg/runner.py +2 -3
- paddlex/repo_apis/PaddleDetection_api/object_det/__init__.py +3 -3
- paddlex/repo_apis/PaddleDetection_api/object_det/config.py +5 -4
- paddlex/repo_apis/PaddleDetection_api/object_det/model.py +6 -7
- paddlex/repo_apis/PaddleDetection_api/object_det/official_categories.py +26 -1
- paddlex/repo_apis/PaddleDetection_api/object_det/register.py +32 -3
- paddlex/repo_apis/PaddleDetection_api/object_det/runner.py +2 -3
- paddlex/repo_apis/PaddleNLP_api/__init__.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/__init__.py +4 -3
- paddlex/repo_apis/PaddleOCR_api/config_utils.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/formula_rec/__init__.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/formula_rec/config.py +7 -6
- paddlex/repo_apis/PaddleOCR_api/formula_rec/model.py +9 -13
- paddlex/repo_apis/PaddleOCR_api/formula_rec/register.py +29 -3
- paddlex/repo_apis/PaddleOCR_api/formula_rec/runner.py +2 -3
- paddlex/repo_apis/PaddleOCR_api/table_rec/__init__.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/table_rec/config.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/table_rec/model.py +4 -4
- paddlex/repo_apis/PaddleOCR_api/table_rec/register.py +2 -3
- paddlex/repo_apis/PaddleOCR_api/table_rec/runner.py +3 -3
- paddlex/repo_apis/PaddleOCR_api/text_det/__init__.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/text_det/config.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/text_det/model.py +4 -4
- paddlex/repo_apis/PaddleOCR_api/text_det/register.py +20 -3
- paddlex/repo_apis/PaddleOCR_api/text_det/runner.py +3 -3
- paddlex/repo_apis/PaddleOCR_api/text_rec/__init__.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/text_rec/config.py +7 -6
- paddlex/repo_apis/PaddleOCR_api/text_rec/model.py +9 -13
- paddlex/repo_apis/PaddleOCR_api/text_rec/register.py +20 -3
- paddlex/repo_apis/PaddleOCR_api/text_rec/runner.py +2 -3
- paddlex/repo_apis/PaddleSeg_api/__init__.py +1 -1
- paddlex/repo_apis/PaddleSeg_api/base_seg_config.py +2 -2
- paddlex/repo_apis/PaddleSeg_api/seg/__init__.py +1 -1
- paddlex/repo_apis/PaddleSeg_api/seg/config.py +3 -6
- paddlex/repo_apis/PaddleSeg_api/seg/model.py +6 -6
- paddlex/repo_apis/PaddleSeg_api/seg/register.py +2 -3
- paddlex/repo_apis/PaddleSeg_api/seg/runner.py +2 -3
- paddlex/repo_apis/PaddleTS_api/__init__.py +4 -3
- paddlex/repo_apis/PaddleTS_api/ts_ad/__init__.py +1 -1
- paddlex/repo_apis/PaddleTS_api/ts_ad/config.py +5 -6
- paddlex/repo_apis/PaddleTS_api/ts_ad/register.py +2 -2
- paddlex/repo_apis/PaddleTS_api/ts_ad/runner.py +2 -2
- paddlex/repo_apis/PaddleTS_api/ts_base/__init__.py +1 -1
- paddlex/repo_apis/PaddleTS_api/ts_base/config.py +2 -4
- paddlex/repo_apis/PaddleTS_api/ts_base/model.py +4 -4
- paddlex/repo_apis/PaddleTS_api/ts_base/runner.py +2 -2
- paddlex/repo_apis/PaddleTS_api/ts_cls/__init__.py +1 -1
- paddlex/repo_apis/PaddleTS_api/ts_cls/config.py +4 -5
- paddlex/repo_apis/PaddleTS_api/ts_cls/register.py +2 -2
- paddlex/repo_apis/PaddleTS_api/ts_cls/runner.py +2 -2
- paddlex/repo_apis/PaddleTS_api/ts_fc/__init__.py +1 -1
- paddlex/repo_apis/PaddleTS_api/ts_fc/config.py +6 -7
- paddlex/repo_apis/PaddleTS_api/ts_fc/register.py +1 -1
- paddlex/repo_apis/PaddleVideo_api/__init__.py +1 -1
- paddlex/repo_apis/PaddleVideo_api/config_utils.py +1 -1
- paddlex/repo_apis/PaddleVideo_api/video_cls/__init__.py +3 -3
- paddlex/repo_apis/PaddleVideo_api/video_cls/config.py +5 -4
- paddlex/repo_apis/PaddleVideo_api/video_cls/model.py +4 -4
- paddlex/repo_apis/PaddleVideo_api/video_cls/register.py +2 -3
- paddlex/repo_apis/PaddleVideo_api/video_cls/runner.py +2 -3
- paddlex/repo_apis/PaddleVideo_api/video_det/__init__.py +3 -3
- paddlex/repo_apis/PaddleVideo_api/video_det/config.py +5 -4
- paddlex/repo_apis/PaddleVideo_api/video_det/model.py +5 -5
- paddlex/repo_apis/PaddleVideo_api/video_det/register.py +2 -3
- paddlex/repo_apis/PaddleVideo_api/video_det/runner.py +2 -3
- paddlex/repo_apis/__init__.py +1 -1
- paddlex/repo_apis/base/__init__.py +4 -5
- paddlex/repo_apis/base/config.py +3 -4
- paddlex/repo_apis/base/model.py +11 -19
- paddlex/repo_apis/base/register.py +1 -1
- paddlex/repo_apis/base/runner.py +11 -12
- paddlex/repo_apis/base/utils/__init__.py +1 -1
- paddlex/repo_apis/base/utils/arg.py +1 -1
- paddlex/repo_apis/base/utils/subprocess.py +1 -1
- paddlex/repo_manager/__init__.py +2 -9
- paddlex/repo_manager/core.py +12 -30
- paddlex/repo_manager/meta.py +41 -31
- paddlex/repo_manager/repo.py +171 -161
- paddlex/repo_manager/utils.py +13 -224
- paddlex/utils/__init__.py +1 -1
- paddlex/utils/cache.py +8 -10
- paddlex/utils/config.py +6 -5
- paddlex/utils/{custom_device_whitelist.py → custom_device_list.py} +53 -199
- paddlex/utils/deps.py +249 -0
- paddlex/utils/device.py +87 -36
- paddlex/utils/download.py +4 -4
- paddlex/utils/env.py +37 -7
- paddlex/utils/errors/__init__.py +1 -1
- paddlex/utils/errors/dataset_checker.py +1 -1
- paddlex/utils/errors/others.py +2 -16
- paddlex/utils/file_interface.py +4 -5
- paddlex/utils/flags.py +17 -12
- paddlex/utils/fonts/__init__.py +36 -5
- paddlex/utils/func_register.py +1 -1
- paddlex/utils/install.py +87 -0
- paddlex/utils/interactive_get_pipeline.py +3 -3
- paddlex/utils/lazy_loader.py +3 -3
- paddlex/utils/logging.py +10 -1
- paddlex/utils/misc.py +6 -6
- paddlex/utils/pipeline_arguments.py +15 -7
- paddlex/utils/result_saver.py +4 -5
- paddlex/utils/subclass_register.py +2 -4
- paddlex/version.py +2 -1
- {paddlex-3.0.0rc0.dist-info → paddlex-3.0.1.dist-info}/METADATA +237 -102
- paddlex-3.0.1.dist-info/RECORD +1095 -0
- {paddlex-3.0.0rc0.dist-info → paddlex-3.0.1.dist-info}/WHEEL +1 -1
- paddlex/inference/models/base/predictor/basic_predictor.py +0 -139
- paddlex/paddle2onnx_requirements.txt +0 -1
- paddlex/repo_manager/requirements.txt +0 -21
- paddlex/serving_requirements.txt +0 -9
- paddlex-3.0.0rc0.dist-info/RECORD +0 -1015
- {paddlex-3.0.0rc0.dist-info → paddlex-3.0.1.dist-info}/entry_points.txt +0 -0
- {paddlex-3.0.0rc0.dist-info → paddlex-3.0.1.dist-info/licenses}/LICENSE +0 -0
- {paddlex-3.0.0rc0.dist-info → paddlex-3.0.1.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
|
|
1
|
-
#
|
1
|
+
# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -14,14 +14,15 @@
|
|
14
14
|
from __future__ import annotations
|
15
15
|
|
16
16
|
import copy
|
17
|
-
|
18
|
-
from PIL import Image, ImageDraw
|
19
|
-
|
17
|
+
import math
|
20
18
|
import re
|
19
|
+
from functools import partial
|
20
|
+
from typing import List
|
21
|
+
|
21
22
|
import numpy as np
|
22
|
-
from PIL import Image
|
23
|
-
from PIL import ImageDraw
|
23
|
+
from PIL import Image, ImageDraw, ImageFont
|
24
24
|
|
25
|
+
from ....utils.fonts import PINGFANG_FONT_FILE_PATH
|
25
26
|
from ...common.result import (
|
26
27
|
BaseCVResult,
|
27
28
|
HtmlMixin,
|
@@ -29,8 +30,166 @@ from ...common.result import (
|
|
29
30
|
MarkdownMixin,
|
30
31
|
XlsxMixin,
|
31
32
|
)
|
32
|
-
from .
|
33
|
-
|
33
|
+
from .setting import BLOCK_LABEL_MAP
|
34
|
+
|
35
|
+
|
36
|
+
def compile_title_pattern():
|
37
|
+
# Precompiled regex pattern for matching numbering at the beginning of the title
|
38
|
+
numbering_pattern = (
|
39
|
+
r"(?:" + r"[1-9][0-9]*(?:\.[1-9][0-9]*)*[\.、]?|" + r"[\(\(](?:[1-9][0-9]*|["
|
40
|
+
r"一二三四五六七八九十百千万亿零壹贰叁肆伍陆柒捌玖拾]+)[\)\)]|" + r"["
|
41
|
+
r"一二三四五六七八九十百千万亿零壹贰叁肆伍陆柒捌玖拾]+"
|
42
|
+
r"[、\.]?|" + r"(?:I|II|III|IV|V|VI|VII|VIII|IX|X)\.?" + r")"
|
43
|
+
)
|
44
|
+
return re.compile(r"^\s*(" + numbering_pattern + r")(\s*)(.*)$")
|
45
|
+
|
46
|
+
|
47
|
+
TITLE_RE_PATTERN = compile_title_pattern()
|
48
|
+
|
49
|
+
|
50
|
+
def format_title_func(block):
|
51
|
+
"""
|
52
|
+
Normalize chapter title.
|
53
|
+
Add the '#' to indicate the level of the title.
|
54
|
+
If numbering exists, ensure there's exactly one space between it and the title content.
|
55
|
+
If numbering does not exist, return the original title unchanged.
|
56
|
+
|
57
|
+
:param title: Original chapter title string.
|
58
|
+
:return: Normalized chapter title string.
|
59
|
+
"""
|
60
|
+
title = block.content
|
61
|
+
match = TITLE_RE_PATTERN.match(title)
|
62
|
+
if match:
|
63
|
+
numbering = match.group(1).strip()
|
64
|
+
title_content = match.group(3).lstrip()
|
65
|
+
# Return numbering and title content separated by one space
|
66
|
+
title = numbering + " " + title_content
|
67
|
+
|
68
|
+
title = title.rstrip(".")
|
69
|
+
level = (
|
70
|
+
title.count(
|
71
|
+
".",
|
72
|
+
)
|
73
|
+
+ 1
|
74
|
+
if "." in title
|
75
|
+
else 1
|
76
|
+
)
|
77
|
+
return f"#{'#' * level} {title}".replace("-\n", "").replace(
|
78
|
+
"\n",
|
79
|
+
" ",
|
80
|
+
)
|
81
|
+
|
82
|
+
|
83
|
+
def format_centered_by_html(string):
|
84
|
+
return (
|
85
|
+
f'<div style="text-align: center;">{string}</div>'.replace(
|
86
|
+
"-\n",
|
87
|
+
"",
|
88
|
+
).replace("\n", " ")
|
89
|
+
+ "\n"
|
90
|
+
)
|
91
|
+
|
92
|
+
|
93
|
+
def format_text_plain_func(block):
|
94
|
+
return block.content
|
95
|
+
|
96
|
+
|
97
|
+
def format_image_scaled_by_html_func(block, original_image_width):
|
98
|
+
img_tags = []
|
99
|
+
image_path = block.image["path"]
|
100
|
+
image_width = block.image["img"].width
|
101
|
+
scale = int(image_width / original_image_width * 100)
|
102
|
+
img_tags.append(
|
103
|
+
'<img src="{}" alt="Image" width="{}%" />'.format(
|
104
|
+
image_path.replace("-\n", "").replace("\n", " "), scale
|
105
|
+
),
|
106
|
+
)
|
107
|
+
return "\n".join(img_tags)
|
108
|
+
|
109
|
+
|
110
|
+
def format_image_plain_func(block):
|
111
|
+
img_tags = []
|
112
|
+
image_path = block.image["path"]
|
113
|
+
img_tags.append("".format(image_path.replace("-\n", "").replace("\n", " ")))
|
114
|
+
return "\n".join(img_tags)
|
115
|
+
|
116
|
+
|
117
|
+
def format_chart2table_func(block):
|
118
|
+
lines_list = block.content.split("\n")
|
119
|
+
column_num = len(lines_list[0].split("|"))
|
120
|
+
lines_list.insert(1, "|".join(["---"] * column_num))
|
121
|
+
lines_list = [f"|{line}|" for line in lines_list]
|
122
|
+
return "\n".join(lines_list)
|
123
|
+
|
124
|
+
|
125
|
+
def simplify_table_func(table_code):
|
126
|
+
return "\n" + table_code.replace("<html>", "").replace("</html>", "").replace(
|
127
|
+
"<body>", ""
|
128
|
+
).replace("</body>", "")
|
129
|
+
|
130
|
+
|
131
|
+
def format_first_line_func(block, templates, format_func, spliter):
|
132
|
+
lines = block.content.split(spliter)
|
133
|
+
for idx in range(len(lines)):
|
134
|
+
line = lines[idx]
|
135
|
+
if line.strip() == "":
|
136
|
+
continue
|
137
|
+
if line.lower() in templates:
|
138
|
+
lines[idx] = format_func(line)
|
139
|
+
break
|
140
|
+
return spliter.join(lines)
|
141
|
+
|
142
|
+
|
143
|
+
def get_seg_flag(block: LayoutParsingBlock, prev_block: LayoutParsingBlock):
|
144
|
+
|
145
|
+
seg_start_flag = True
|
146
|
+
seg_end_flag = True
|
147
|
+
|
148
|
+
block_box = block.bbox
|
149
|
+
context_left_coordinate = block_box[0]
|
150
|
+
context_right_coordinate = block_box[2]
|
151
|
+
seg_start_coordinate = block.seg_start_coordinate
|
152
|
+
seg_end_coordinate = block.seg_end_coordinate
|
153
|
+
|
154
|
+
if prev_block is not None:
|
155
|
+
prev_block_bbox = prev_block.bbox
|
156
|
+
num_of_prev_lines = prev_block.num_of_lines
|
157
|
+
pre_block_seg_end_coordinate = prev_block.seg_end_coordinate
|
158
|
+
prev_end_space_small = (
|
159
|
+
abs(prev_block_bbox[2] - pre_block_seg_end_coordinate) < 10
|
160
|
+
)
|
161
|
+
prev_lines_more_than_one = num_of_prev_lines > 1
|
162
|
+
|
163
|
+
overlap_blocks = context_left_coordinate < prev_block_bbox[2]
|
164
|
+
|
165
|
+
# update context_left_coordinate and context_right_coordinate
|
166
|
+
if overlap_blocks:
|
167
|
+
context_left_coordinate = min(prev_block_bbox[0], context_left_coordinate)
|
168
|
+
context_right_coordinate = max(prev_block_bbox[2], context_right_coordinate)
|
169
|
+
prev_end_space_small = (
|
170
|
+
abs(context_right_coordinate - pre_block_seg_end_coordinate) < 10
|
171
|
+
)
|
172
|
+
edge_distance = 0
|
173
|
+
else:
|
174
|
+
edge_distance = abs(block_box[0] - prev_block_bbox[2])
|
175
|
+
|
176
|
+
current_start_space_small = seg_start_coordinate - context_left_coordinate < 10
|
177
|
+
|
178
|
+
if (
|
179
|
+
prev_end_space_small
|
180
|
+
and current_start_space_small
|
181
|
+
and prev_lines_more_than_one
|
182
|
+
and edge_distance < max(prev_block.width, block.width)
|
183
|
+
):
|
184
|
+
seg_start_flag = False
|
185
|
+
else:
|
186
|
+
if seg_start_coordinate - context_left_coordinate < 10:
|
187
|
+
seg_start_flag = False
|
188
|
+
|
189
|
+
if context_right_coordinate - seg_end_coordinate < 10:
|
190
|
+
seg_end_flag = False
|
191
|
+
|
192
|
+
return seg_start_flag, seg_end_flag
|
34
193
|
|
35
194
|
|
36
195
|
class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
|
@@ -43,30 +202,10 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
|
|
43
202
|
XlsxMixin.__init__(self)
|
44
203
|
MarkdownMixin.__init__(self)
|
45
204
|
JsonMixin.__init__(self)
|
46
|
-
self.title_pattern = self._build_title_pattern()
|
47
|
-
|
48
|
-
def _build_title_pattern(self):
|
49
|
-
# Precompiled regex pattern for matching numbering at the beginning of the title
|
50
|
-
numbering_pattern = (
|
51
|
-
r"(?:"
|
52
|
-
+ r"[1-9][0-9]*(?:\.[1-9][0-9]*)*[\.、]?|"
|
53
|
-
+ r"[\(\(](?:[1-9][0-9]*|["
|
54
|
-
r"一二三四五六七八九十百千万亿零壹贰叁肆伍陆柒捌玖拾]+)[\)\)]|" + r"["
|
55
|
-
r"一二三四五六七八九十百千万亿零壹贰叁肆伍陆柒捌玖拾]+"
|
56
|
-
r"[、\.]?|" + r"(?:I|II|III|IV|V|VI|VII|VIII|IX|X)\.?" + r")"
|
57
|
-
)
|
58
|
-
return re.compile(r"^\s*(" + numbering_pattern + r")(\s*)(.*)$")
|
59
|
-
|
60
|
-
def _get_input_fn(self):
|
61
|
-
fn = super()._get_input_fn()
|
62
|
-
if (page_idx := self["page_index"]) is not None:
|
63
|
-
fp = Path(fn)
|
64
|
-
stem, suffix = fp.stem, fp.suffix
|
65
|
-
return f"{stem}_{page_idx}{suffix}"
|
66
|
-
else:
|
67
|
-
return fn
|
68
205
|
|
69
206
|
def _to_img(self) -> dict[str, np.ndarray]:
|
207
|
+
from .utils import get_show_color
|
208
|
+
|
70
209
|
res_img_dict = {}
|
71
210
|
model_settings = self["model_settings"]
|
72
211
|
if model_settings["use_doc_preprocessor"]:
|
@@ -74,12 +213,14 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
|
|
74
213
|
res_img_dict[key] = value
|
75
214
|
res_img_dict["layout_det_res"] = self["layout_det_res"].img["res"]
|
76
215
|
|
77
|
-
if model_settings["
|
78
|
-
res_img_dict["
|
216
|
+
if model_settings["use_region_detection"]:
|
217
|
+
res_img_dict["region_det_res"] = self["region_det_res"].img["res"]
|
218
|
+
|
219
|
+
res_img_dict["overall_ocr_res"] = self["overall_ocr_res"].img["ocr_res_img"]
|
79
220
|
|
80
221
|
if model_settings["use_table_recognition"] and len(self["table_res_list"]) > 0:
|
81
222
|
table_cell_img = Image.fromarray(
|
82
|
-
copy.deepcopy(self["doc_preprocessor_res"]["output_img"])
|
223
|
+
copy.deepcopy(self["doc_preprocessor_res"]["output_img"][:, :, ::-1])
|
83
224
|
)
|
84
225
|
table_draw = ImageDraw.Draw(table_cell_img)
|
85
226
|
rectangle_color = (255, 0, 0)
|
@@ -104,16 +245,23 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
|
|
104
245
|
# for layout ordering image
|
105
246
|
image = Image.fromarray(self["doc_preprocessor_res"]["output_img"][:, :, ::-1])
|
106
247
|
draw = ImageDraw.Draw(image, "RGBA")
|
107
|
-
|
248
|
+
font_size = int(0.018 * int(image.width)) + 2
|
249
|
+
font = ImageFont.truetype(PINGFANG_FONT_FILE_PATH, font_size, encoding="utf-8")
|
250
|
+
parsing_result: List[LayoutParsingBlock] = self["parsing_res_list"]
|
108
251
|
for block in parsing_result:
|
109
|
-
bbox = block
|
110
|
-
index = block.
|
111
|
-
label = block
|
112
|
-
fill_color = get_show_color(label)
|
252
|
+
bbox = block.bbox
|
253
|
+
index = block.order_index
|
254
|
+
label = block.label
|
255
|
+
fill_color = get_show_color(label, False)
|
113
256
|
draw.rectangle(bbox, fill=fill_color)
|
114
257
|
if index is not None:
|
115
|
-
text_position = (bbox[2] + 2, bbox[1] -
|
116
|
-
|
258
|
+
text_position = (bbox[2] + 2, bbox[1] - font_size // 2)
|
259
|
+
if int(image.width) - bbox[2] < font_size:
|
260
|
+
text_position = (
|
261
|
+
int(bbox[2] - font_size * 1.1),
|
262
|
+
bbox[1] - font_size // 2,
|
263
|
+
)
|
264
|
+
draw.text(text_position, str(index), font=font, fill="red")
|
117
265
|
|
118
266
|
res_img_dict["layout_order_res"] = image
|
119
267
|
|
@@ -137,8 +285,7 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
|
|
137
285
|
if self["model_settings"]["use_doc_preprocessor"]:
|
138
286
|
data["doc_preprocessor_res"] = self["doc_preprocessor_res"].str["res"]
|
139
287
|
data["layout_det_res"] = self["layout_det_res"].str["res"]
|
140
|
-
|
141
|
-
data["overall_ocr_res"] = self["overall_ocr_res"].str["res"]
|
288
|
+
data["overall_ocr_res"] = self["overall_ocr_res"].str["res"]
|
142
289
|
if model_settings["use_table_recognition"] and len(self["table_res_list"]) > 0:
|
143
290
|
data["table_res_list"] = []
|
144
291
|
for sno in range(len(self["table_res_list"])):
|
@@ -179,9 +326,9 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
|
|
179
326
|
parsing_res_list = self["parsing_res_list"]
|
180
327
|
parsing_res_list = [
|
181
328
|
{
|
182
|
-
"block_label": parsing_res
|
183
|
-
"block_content": parsing_res
|
184
|
-
"block_bbox": parsing_res
|
329
|
+
"block_label": parsing_res.label,
|
330
|
+
"block_content": parsing_res.content,
|
331
|
+
"block_bbox": parsing_res.bbox,
|
185
332
|
}
|
186
333
|
for parsing_res in parsing_res_list
|
187
334
|
]
|
@@ -189,8 +336,7 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
|
|
189
336
|
if self["model_settings"]["use_doc_preprocessor"]:
|
190
337
|
data["doc_preprocessor_res"] = self["doc_preprocessor_res"].json["res"]
|
191
338
|
data["layout_det_res"] = self["layout_det_res"].json["res"]
|
192
|
-
|
193
|
-
data["overall_ocr_res"] = self["overall_ocr_res"].json["res"]
|
339
|
+
data["overall_ocr_res"] = self["overall_ocr_res"].json["res"]
|
194
340
|
if model_settings["use_table_recognition"] and len(self["table_res_list"]) > 0:
|
195
341
|
data["table_res_list"] = []
|
196
342
|
for sno in range(len(self["table_res_list"])):
|
@@ -243,228 +389,357 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
|
|
243
389
|
res_xlsx_dict[key] = table_res.xlsx["pred"]
|
244
390
|
return res_xlsx_dict
|
245
391
|
|
246
|
-
def _to_markdown(self) -> dict:
|
392
|
+
def _to_markdown(self, pretty=True) -> dict:
|
247
393
|
"""
|
248
394
|
Save the parsing result to a Markdown file.
|
249
395
|
|
396
|
+
Args:
|
397
|
+
pretty (Optional[bool]): whether to pretty markdown by HTML, default by True.
|
398
|
+
|
250
399
|
Returns:
|
251
400
|
Dict
|
252
401
|
"""
|
402
|
+
original_image_width = self["doc_preprocessor_res"]["output_img"].shape[1]
|
253
403
|
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
:param title: Original chapter title string.
|
264
|
-
:return: Normalized chapter title string.
|
265
|
-
"""
|
266
|
-
match = self.title_pattern.match(title)
|
267
|
-
if match:
|
268
|
-
numbering = match.group(1).strip()
|
269
|
-
title_content = match.group(3).lstrip()
|
270
|
-
# Return numbering and title content separated by one space
|
271
|
-
title = numbering + " " + title_content
|
272
|
-
|
273
|
-
title = title.rstrip(".")
|
274
|
-
level = (
|
275
|
-
title.count(
|
276
|
-
".",
|
277
|
-
)
|
278
|
-
+ 1
|
279
|
-
if "." in title
|
280
|
-
else 1
|
281
|
-
)
|
282
|
-
return f"#{'#' * level} {title}".replace("-\n", "").replace(
|
283
|
-
"\n",
|
284
|
-
" ",
|
285
|
-
)
|
286
|
-
|
287
|
-
def format_centered_text(key):
|
288
|
-
return (
|
289
|
-
f'<div style="text-align: center;">{block[key]}</div>'.replace(
|
290
|
-
"-\n",
|
291
|
-
"",
|
292
|
-
).replace("\n", " ")
|
293
|
-
+ "\n"
|
404
|
+
if pretty:
|
405
|
+
format_text_func = lambda block: format_centered_by_html(
|
406
|
+
format_text_plain_func(block)
|
407
|
+
)
|
408
|
+
format_image_func = lambda block: format_centered_by_html(
|
409
|
+
format_image_scaled_by_html_func(
|
410
|
+
block,
|
411
|
+
original_image_width=original_image_width,
|
294
412
|
)
|
413
|
+
)
|
414
|
+
else:
|
415
|
+
format_text_func = lambda block: block.content
|
416
|
+
format_image_func = format_image_plain_func
|
295
417
|
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
'<div style="text-align: center;"><img src="{}" alt="Image" /></div>'.format(
|
301
|
-
image_path.replace("-\n", "").replace("\n", " "),
|
302
|
-
),
|
303
|
-
)
|
304
|
-
return "\n".join(img_tags)
|
305
|
-
|
306
|
-
def format_first_line(templates, format_func, spliter):
|
307
|
-
lines = block["block_content"].split(spliter)
|
308
|
-
for idx in range(len(lines)):
|
309
|
-
line = lines[idx]
|
310
|
-
if line.strip() == "":
|
311
|
-
continue
|
312
|
-
if line.lower() in templates:
|
313
|
-
lines[idx] = format_func(line)
|
314
|
-
break
|
315
|
-
return spliter.join(lines)
|
316
|
-
|
317
|
-
def format_table():
|
318
|
-
return "\n" + block["block_content"]
|
319
|
-
|
320
|
-
def get_seg_flag(block, prev_block):
|
321
|
-
|
322
|
-
seg_start_flag = True
|
323
|
-
seg_end_flag = True
|
324
|
-
|
325
|
-
block_box = block["block_bbox"]
|
326
|
-
context_left_coordinate = block_box[0]
|
327
|
-
context_right_coordinate = block_box[2]
|
328
|
-
seg_start_coordinate = block.get("seg_start_coordinate")
|
329
|
-
seg_end_coordinate = block.get("seg_end_coordinate")
|
330
|
-
|
331
|
-
if prev_block is not None:
|
332
|
-
prev_block_bbox = prev_block["block_bbox"]
|
333
|
-
num_of_prev_lines = prev_block.get("num_of_lines")
|
334
|
-
pre_block_seg_end_coordinate = prev_block.get("seg_end_coordinate")
|
335
|
-
prev_end_space_small = (
|
336
|
-
prev_block_bbox[2] - pre_block_seg_end_coordinate < 10
|
337
|
-
)
|
338
|
-
prev_lines_more_than_one = num_of_prev_lines > 1
|
339
|
-
|
340
|
-
overlap_blocks = context_left_coordinate < prev_block_bbox[2]
|
341
|
-
|
342
|
-
# update context_left_coordinate and context_right_coordinate
|
343
|
-
if overlap_blocks:
|
344
|
-
context_left_coordinate = min(
|
345
|
-
prev_block_bbox[0], context_left_coordinate
|
346
|
-
)
|
347
|
-
context_right_coordinate = max(
|
348
|
-
prev_block_bbox[2], context_right_coordinate
|
349
|
-
)
|
350
|
-
prev_end_space_small = (
|
351
|
-
context_right_coordinate - pre_block_seg_end_coordinate < 10
|
352
|
-
)
|
353
|
-
|
354
|
-
current_start_space_small = (
|
355
|
-
seg_start_coordinate - context_left_coordinate < 10
|
356
|
-
)
|
418
|
+
if self["model_settings"].get("use_chart_recognition", False):
|
419
|
+
format_chart_func = format_chart2table_func
|
420
|
+
else:
|
421
|
+
format_chart_func = format_image_func
|
357
422
|
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
and prev_lines_more_than_one
|
362
|
-
):
|
363
|
-
seg_start_flag = False
|
364
|
-
else:
|
365
|
-
if seg_start_coordinate - context_left_coordinate < 10:
|
366
|
-
seg_start_flag = False
|
367
|
-
|
368
|
-
if context_right_coordinate - seg_end_coordinate < 10:
|
369
|
-
seg_end_flag = False
|
370
|
-
|
371
|
-
return seg_start_flag, seg_end_flag
|
372
|
-
|
373
|
-
handlers = {
|
374
|
-
"paragraph_title": lambda: format_title(block["block_content"]),
|
375
|
-
"doc_title": lambda: f"# {block['block_content']}".replace(
|
376
|
-
"-\n",
|
377
|
-
"",
|
378
|
-
).replace("\n", " "),
|
379
|
-
"table_title": lambda: format_centered_text("block_content"),
|
380
|
-
"figure_title": lambda: format_centered_text("block_content"),
|
381
|
-
"chart_title": lambda: format_centered_text("block_content"),
|
382
|
-
"text": lambda: block["block_content"]
|
383
|
-
.replace("-\n", " ")
|
384
|
-
.replace("\n", " "),
|
385
|
-
"abstract": lambda: format_first_line(
|
386
|
-
["摘要", "abstract"], lambda l: f"## {l}\n", " "
|
387
|
-
),
|
388
|
-
"content": lambda: block["block_content"]
|
389
|
-
.replace("-\n", " \n")
|
390
|
-
.replace("\n", " \n"),
|
391
|
-
"image": lambda: format_image("block_image"),
|
392
|
-
"chart": lambda: format_image("block_image"),
|
393
|
-
"formula": lambda: f"$${block['block_content']}$$",
|
394
|
-
"table": format_table,
|
395
|
-
"reference": lambda: format_first_line(
|
396
|
-
["参考文献", "references"], lambda l: f"## {l}", "\n"
|
397
|
-
),
|
398
|
-
"algorithm": lambda: block["block_content"].strip("\n"),
|
399
|
-
"seal": lambda: f"Words of Seals:\n{block['block_content']}",
|
400
|
-
}
|
401
|
-
parsing_res_list = obj["parsing_res_list"]
|
402
|
-
markdown_content = ""
|
403
|
-
last_label = None
|
404
|
-
seg_start_flag = None
|
405
|
-
seg_end_flag = None
|
406
|
-
prev_block = None
|
407
|
-
page_first_element_seg_start_flag = None
|
408
|
-
page_last_element_seg_end_flag = None
|
409
|
-
parsing_res_list = sorted(
|
410
|
-
parsing_res_list,
|
411
|
-
key=lambda x: x.get("sub_index", 999),
|
423
|
+
if self["model_settings"].get("use_seal_recognition", False):
|
424
|
+
format_seal_func = lambda block: "\n".join(
|
425
|
+
[format_image_func(block), format_text_func(block)]
|
412
426
|
)
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
427
|
+
else:
|
428
|
+
format_seal_func = format_image_func
|
429
|
+
|
430
|
+
if self["model_settings"].get("use_table_recognition", False):
|
431
|
+
if pretty:
|
432
|
+
format_table_func = lambda block: "\n" + format_text_func(
|
433
|
+
block
|
434
|
+
).replace("<table>", '<table border="1">')
|
435
|
+
else:
|
436
|
+
format_table_func = lambda block: simplify_table_func(
|
437
|
+
"\n" + block.content
|
421
438
|
)
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
439
|
+
else:
|
440
|
+
format_table_func = format_image_func
|
441
|
+
|
442
|
+
if self["model_settings"].get("use_formula_recognition", False):
|
443
|
+
format_formula_func = lambda block: f"$${block.content}$$"
|
444
|
+
else:
|
445
|
+
format_formula_func = format_image_func
|
446
|
+
|
447
|
+
handle_funcs_dict = {
|
448
|
+
"paragraph_title": format_title_func,
|
449
|
+
"abstract_title": format_title_func,
|
450
|
+
"reference_title": format_title_func,
|
451
|
+
"content_title": format_title_func,
|
452
|
+
"doc_title": lambda block: f"# {block.content}".replace(
|
453
|
+
"-\n",
|
454
|
+
"",
|
455
|
+
).replace("\n", " "),
|
456
|
+
"table_title": format_text_func,
|
457
|
+
"figure_title": format_text_func,
|
458
|
+
"chart_title": format_text_func,
|
459
|
+
"text": lambda block: block.content.replace("\n\n", "\n").replace(
|
460
|
+
"\n", "\n\n"
|
461
|
+
),
|
462
|
+
"abstract": partial(
|
463
|
+
format_first_line_func,
|
464
|
+
templates=["摘要", "abstract"],
|
465
|
+
format_func=lambda l: f"## {l}\n",
|
466
|
+
spliter=" ",
|
467
|
+
),
|
468
|
+
"content": lambda block: block.content.replace("-\n", " \n").replace(
|
469
|
+
"\n", " \n"
|
470
|
+
),
|
471
|
+
"image": format_image_func,
|
472
|
+
"chart": format_chart_func,
|
473
|
+
"formula": format_formula_func,
|
474
|
+
"table": format_table_func,
|
475
|
+
"reference": partial(
|
476
|
+
format_first_line_func,
|
477
|
+
templates=["参考文献", "references"],
|
478
|
+
format_func=lambda l: f"## {l}",
|
479
|
+
spliter="\n",
|
480
|
+
),
|
481
|
+
"algorithm": lambda block: block.content.strip("\n"),
|
482
|
+
"seal": format_seal_func,
|
483
|
+
}
|
484
|
+
|
485
|
+
markdown_content = ""
|
486
|
+
last_label = None
|
487
|
+
seg_start_flag = None
|
488
|
+
seg_end_flag = None
|
489
|
+
prev_block = None
|
490
|
+
page_first_element_seg_start_flag = None
|
491
|
+
page_last_element_seg_end_flag = None
|
492
|
+
markdown_info = {}
|
493
|
+
markdown_info["markdown_images"] = {}
|
494
|
+
for block in self["parsing_res_list"]:
|
495
|
+
seg_start_flag, seg_end_flag = get_seg_flag(block, prev_block)
|
496
|
+
|
497
|
+
label = block.label
|
498
|
+
if block.image is not None:
|
499
|
+
markdown_info["markdown_images"][block.image["path"]] = block.image[
|
500
|
+
"img"
|
501
|
+
]
|
502
|
+
page_first_element_seg_start_flag = (
|
503
|
+
seg_start_flag
|
504
|
+
if (page_first_element_seg_start_flag is None)
|
505
|
+
else page_first_element_seg_start_flag
|
454
506
|
)
|
455
507
|
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
508
|
+
handle_func = handle_funcs_dict.get(label, None)
|
509
|
+
if handle_func:
|
510
|
+
prev_block = block
|
511
|
+
if label == last_label == "text" and seg_start_flag == False:
|
512
|
+
markdown_content += handle_func(block)
|
513
|
+
else:
|
514
|
+
markdown_content += (
|
515
|
+
"\n\n" + handle_func(block)
|
516
|
+
if markdown_content
|
517
|
+
else handle_func(block)
|
518
|
+
)
|
519
|
+
last_label = label
|
520
|
+
page_last_element_seg_end_flag = seg_end_flag
|
521
|
+
|
522
|
+
markdown_info["markdown_texts"] = markdown_content
|
461
523
|
markdown_info["page_continuation_flags"] = (
|
462
524
|
page_first_element_seg_start_flag,
|
463
525
|
page_last_element_seg_end_flag,
|
464
526
|
)
|
465
|
-
|
466
|
-
markdown_info["markdown_images"] = {}
|
467
527
|
for img in self["imgs_in_doc"]:
|
468
528
|
markdown_info["markdown_images"][img["path"]] = img["img"]
|
469
529
|
|
470
530
|
return markdown_info
|
531
|
+
|
532
|
+
|
533
|
+
class LayoutParsingBlock:
|
534
|
+
|
535
|
+
def __init__(self, label, bbox, content="") -> None:
|
536
|
+
self.label = label
|
537
|
+
self.order_label = None
|
538
|
+
self.bbox = list(map(int, bbox))
|
539
|
+
self.content = content
|
540
|
+
self.seg_start_coordinate = float("inf")
|
541
|
+
self.seg_end_coordinate = float("-inf")
|
542
|
+
self.width = bbox[2] - bbox[0]
|
543
|
+
self.height = bbox[3] - bbox[1]
|
544
|
+
self.area = self.width * self.height
|
545
|
+
self.num_of_lines = 1
|
546
|
+
self.image = None
|
547
|
+
self.index = None
|
548
|
+
self.order_index = None
|
549
|
+
self.text_line_width = 1
|
550
|
+
self.text_line_height = 1
|
551
|
+
self.direction = self.get_bbox_direction()
|
552
|
+
self.child_blocks = []
|
553
|
+
self.update_direction_info()
|
554
|
+
|
555
|
+
def __str__(self) -> str:
|
556
|
+
return f"{self.__dict__}"
|
557
|
+
|
558
|
+
def __repr__(self) -> str:
|
559
|
+
_str = f"\n\n#################\nindex:\t{self.index}\nlabel:\t{self.label}\nregion_label:\t{self.order_label}\nbbox:\t{self.bbox}\ncontent:\t{self.content}\n#################"
|
560
|
+
return _str
|
561
|
+
|
562
|
+
def to_dict(self) -> dict:
|
563
|
+
return self.__dict__
|
564
|
+
|
565
|
+
def update_direction_info(self) -> None:
|
566
|
+
if self.direction == "horizontal":
|
567
|
+
self.secondary_direction = "vertical"
|
568
|
+
self.short_side_length = self.height
|
569
|
+
self.long_side_length = self.width
|
570
|
+
self.start_coordinate = self.bbox[0]
|
571
|
+
self.end_coordinate = self.bbox[2]
|
572
|
+
self.secondary_direction_start_coordinate = self.bbox[1]
|
573
|
+
self.secondary_direction_end_coordinate = self.bbox[3]
|
574
|
+
else:
|
575
|
+
self.secondary_direction = "horizontal"
|
576
|
+
self.short_side_length = self.width
|
577
|
+
self.long_side_length = self.height
|
578
|
+
self.start_coordinate = self.bbox[1]
|
579
|
+
self.end_coordinate = self.bbox[3]
|
580
|
+
self.secondary_direction_start_coordinate = self.bbox[0]
|
581
|
+
self.secondary_direction_end_coordinate = self.bbox[2]
|
582
|
+
|
583
|
+
def append_child_block(self, child_block: LayoutParsingBlock) -> None:
|
584
|
+
if not self.child_blocks:
|
585
|
+
self.ori_bbox = self.bbox.copy()
|
586
|
+
x1, y1, x2, y2 = self.bbox
|
587
|
+
x1_child, y1_child, x2_child, y2_child = child_block.bbox
|
588
|
+
union_bbox = (
|
589
|
+
min(x1, x1_child),
|
590
|
+
min(y1, y1_child),
|
591
|
+
max(x2, x2_child),
|
592
|
+
max(y2, y2_child),
|
593
|
+
)
|
594
|
+
self.bbox = union_bbox
|
595
|
+
self.update_direction_info()
|
596
|
+
child_blocks = [child_block]
|
597
|
+
if child_block.child_blocks:
|
598
|
+
child_blocks.extend(child_block.get_child_blocks())
|
599
|
+
self.child_blocks.extend(child_blocks)
|
600
|
+
|
601
|
+
def get_child_blocks(self) -> list:
|
602
|
+
self.bbox = self.ori_bbox
|
603
|
+
child_blocks = self.child_blocks.copy()
|
604
|
+
self.child_blocks = []
|
605
|
+
return child_blocks
|
606
|
+
|
607
|
+
def get_centroid(self) -> tuple:
|
608
|
+
x1, y1, x2, y2 = self.bbox
|
609
|
+
centroid = ((x1 + x2) / 2, (y1 + y2) / 2)
|
610
|
+
return centroid
|
611
|
+
|
612
|
+
def get_bbox_direction(self, direction_ratio: float = 1.0) -> bool:
|
613
|
+
"""
|
614
|
+
Determine if a bounding box is horizontal or vertical.
|
615
|
+
|
616
|
+
Args:
|
617
|
+
bbox (List[float]): Bounding box [x_min, y_min, x_max, y_max].
|
618
|
+
direction_ratio (float): Ratio for determining direction. Default is 1.0.
|
619
|
+
|
620
|
+
Returns:
|
621
|
+
str: "horizontal" or "vertical".
|
622
|
+
"""
|
623
|
+
return (
|
624
|
+
"horizontal" if self.width * direction_ratio >= self.height else "vertical"
|
625
|
+
)
|
626
|
+
|
627
|
+
|
628
|
+
class LayoutParsingRegion:
|
629
|
+
|
630
|
+
def __init__(
|
631
|
+
self, bbox, blocks: List[LayoutParsingBlock] = [], image_shape=None
|
632
|
+
) -> None:
|
633
|
+
self.bbox = bbox
|
634
|
+
self.block_map = {}
|
635
|
+
self.direction = "horizontal"
|
636
|
+
self.calculate_bbox_metrics(image_shape)
|
637
|
+
self.doc_title_block_idxes = []
|
638
|
+
self.paragraph_title_block_idxes = []
|
639
|
+
self.vision_block_idxes = []
|
640
|
+
self.unordered_block_idxes = []
|
641
|
+
self.vision_title_block_idxes = []
|
642
|
+
self.normal_text_block_idxes = []
|
643
|
+
self.header_block_idxes = []
|
644
|
+
self.footer_block_idxes = []
|
645
|
+
self.text_line_width = 20
|
646
|
+
self.text_line_height = 10
|
647
|
+
self.init_region_info_from_layout(blocks)
|
648
|
+
self.init_direction_info()
|
649
|
+
|
650
|
+
def init_region_info_from_layout(self, blocks: List[LayoutParsingBlock]):
|
651
|
+
horizontal_normal_text_block_num = 0
|
652
|
+
text_line_height_list = []
|
653
|
+
text_line_width_list = []
|
654
|
+
for idx, block in enumerate(blocks):
|
655
|
+
self.block_map[idx] = block
|
656
|
+
block.index = idx
|
657
|
+
if block.label in BLOCK_LABEL_MAP["header_labels"]:
|
658
|
+
self.header_block_idxes.append(idx)
|
659
|
+
elif block.label in BLOCK_LABEL_MAP["doc_title_labels"]:
|
660
|
+
self.doc_title_block_idxes.append(idx)
|
661
|
+
elif block.label in BLOCK_LABEL_MAP["paragraph_title_labels"]:
|
662
|
+
self.paragraph_title_block_idxes.append(idx)
|
663
|
+
elif block.label in BLOCK_LABEL_MAP["vision_labels"]:
|
664
|
+
self.vision_block_idxes.append(idx)
|
665
|
+
elif block.label in BLOCK_LABEL_MAP["vision_title_labels"]:
|
666
|
+
self.vision_title_block_idxes.append(idx)
|
667
|
+
elif block.label in BLOCK_LABEL_MAP["footer_labels"]:
|
668
|
+
self.footer_block_idxes.append(idx)
|
669
|
+
elif block.label in BLOCK_LABEL_MAP["unordered_labels"]:
|
670
|
+
self.unordered_block_idxes.append(idx)
|
671
|
+
else:
|
672
|
+
self.normal_text_block_idxes.append(idx)
|
673
|
+
text_line_height_list.append(block.text_line_height)
|
674
|
+
text_line_width_list.append(block.text_line_width)
|
675
|
+
if block.direction == "horizontal":
|
676
|
+
horizontal_normal_text_block_num += 1
|
677
|
+
self.direction = (
|
678
|
+
"horizontal"
|
679
|
+
if horizontal_normal_text_block_num
|
680
|
+
>= len(self.normal_text_block_idxes) * 0.5
|
681
|
+
else "vertical"
|
682
|
+
)
|
683
|
+
self.text_line_width = (
|
684
|
+
np.mean(text_line_width_list) if text_line_width_list else 20
|
685
|
+
)
|
686
|
+
self.text_line_height = (
|
687
|
+
np.mean(text_line_height_list) if text_line_height_list else 10
|
688
|
+
)
|
689
|
+
|
690
|
+
def init_direction_info(self):
|
691
|
+
if self.direction == "horizontal":
|
692
|
+
self.direction_start_index = 0
|
693
|
+
self.direction_end_index = 2
|
694
|
+
self.secondary_direction_start_index = 1
|
695
|
+
self.secondary_direction_end_index = 3
|
696
|
+
self.secondary_direction = "vertical"
|
697
|
+
else:
|
698
|
+
self.direction_start_index = 1
|
699
|
+
self.direction_end_index = 3
|
700
|
+
self.secondary_direction_start_index = 0
|
701
|
+
self.secondary_direction_end_index = 2
|
702
|
+
self.secondary_direction = "horizontal"
|
703
|
+
|
704
|
+
self.direction_center_coordinate = (
|
705
|
+
self.bbox[self.direction_start_index] + self.bbox[self.direction_end_index]
|
706
|
+
) / 2
|
707
|
+
self.secondary_direction_center_coordinate = (
|
708
|
+
self.bbox[self.secondary_direction_start_index]
|
709
|
+
+ self.bbox[self.secondary_direction_end_index]
|
710
|
+
) / 2
|
711
|
+
|
712
|
+
def calculate_bbox_metrics(self, image_shape):
|
713
|
+
x1, y1, x2, y2 = self.bbox
|
714
|
+
image_height, image_width = image_shape
|
715
|
+
width = x2 - x1
|
716
|
+
x_center, y_center = (x1 + x2) / 2, (y1 + y2) / 2
|
717
|
+
self.euclidean_distance = math.sqrt(((x1) ** 2 + (y1) ** 2))
|
718
|
+
self.center_euclidean_distance = math.sqrt(((x_center) ** 2 + (y_center) ** 2))
|
719
|
+
self.angle_rad = math.atan2(y_center, x_center)
|
720
|
+
self.weighted_distance = (
|
721
|
+
y2 + width + (x1 // (image_width // 10)) * (image_width // 10) * 1.5
|
722
|
+
)
|
723
|
+
|
724
|
+
def sort_normal_blocks(self, blocks):
|
725
|
+
if self.direction == "horizontal":
|
726
|
+
blocks.sort(
|
727
|
+
key=lambda x: (
|
728
|
+
x.bbox[1] // self.text_line_height,
|
729
|
+
x.bbox[0] // self.text_line_width,
|
730
|
+
x.bbox[1] ** 2 + x.bbox[0] ** 2,
|
731
|
+
),
|
732
|
+
)
|
733
|
+
else:
|
734
|
+
blocks.sort(
|
735
|
+
key=lambda x: (
|
736
|
+
-x.bbox[0] // self.text_line_width,
|
737
|
+
x.bbox[1] // self.text_line_height,
|
738
|
+
-(x.bbox[2] ** 2 + x.bbox[1] ** 2),
|
739
|
+
),
|
740
|
+
)
|
741
|
+
|
742
|
+
def sort(self):
|
743
|
+
from .xycut_enhanced import xycut_enhanced
|
744
|
+
|
745
|
+
return xycut_enhanced(self)
|