paddlex 3.0.0rc0__py3-none-any.whl → 3.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- paddlex/.version +1 -1
- paddlex/__init__.py +17 -34
- paddlex/__main__.py +1 -1
- paddlex/configs/modules/chart_parsing/PP-Chart2Table.yaml +13 -0
- paddlex/configs/modules/doc_vlm/PP-DocBee-2B.yaml +14 -0
- paddlex/configs/modules/doc_vlm/PP-DocBee-7B.yaml +14 -0
- paddlex/configs/modules/doc_vlm/PP-DocBee2-3B.yaml +14 -0
- paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-L.yaml +40 -0
- paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-M.yaml +40 -0
- paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-S.yaml +40 -0
- paddlex/configs/modules/layout_detection/PP-DocBlockLayout.yaml +40 -0
- paddlex/configs/modules/layout_detection/PP-DocLayout-L.yaml +2 -2
- paddlex/configs/modules/layout_detection/PP-DocLayout-M.yaml +2 -2
- paddlex/configs/modules/layout_detection/PP-DocLayout-S.yaml +2 -2
- paddlex/configs/modules/layout_detection/PP-DocLayout_plus-L.yaml +40 -0
- paddlex/configs/modules/open_vocabulary_detection/YOLO-Worldv2-L.yaml +13 -0
- paddlex/configs/modules/text_detection/PP-OCRv5_mobile_det.yaml +40 -0
- paddlex/configs/modules/text_detection/PP-OCRv5_server_det.yaml +40 -0
- paddlex/configs/modules/text_recognition/PP-OCRv5_mobile_rec.yaml +39 -0
- paddlex/configs/modules/text_recognition/PP-OCRv5_server_rec.yaml +39 -0
- paddlex/configs/modules/textline_orientation/PP-LCNet_x1_0_textline_ori.yaml +41 -0
- paddlex/configs/pipelines/OCR.yaml +7 -6
- paddlex/configs/pipelines/PP-ChatOCRv3-doc.yaml +3 -1
- paddlex/configs/pipelines/PP-ChatOCRv4-doc.yaml +91 -34
- paddlex/configs/pipelines/PP-StructureV3.yaml +72 -72
- paddlex/configs/pipelines/anomaly_detection.yaml +1 -1
- paddlex/configs/pipelines/doc_understanding.yaml +9 -0
- paddlex/configs/pipelines/formula_recognition.yaml +2 -2
- paddlex/configs/pipelines/layout_parsing.yaml +3 -2
- paddlex/configs/pipelines/seal_recognition.yaml +1 -0
- paddlex/configs/pipelines/table_recognition.yaml +2 -1
- paddlex/configs/pipelines/table_recognition_v2.yaml +7 -1
- paddlex/configs/pipelines/ts_anomaly_detection.yaml +1 -1
- paddlex/configs/pipelines/ts_classification.yaml +1 -1
- paddlex/configs/pipelines/ts_forecast.yaml +1 -1
- paddlex/constants.py +17 -0
- paddlex/engine.py +7 -5
- paddlex/hpip_links.html +23 -11
- paddlex/inference/__init__.py +3 -3
- paddlex/inference/common/__init__.py +1 -1
- paddlex/inference/common/batch_sampler/__init__.py +5 -4
- paddlex/inference/common/batch_sampler/audio_batch_sampler.py +5 -6
- paddlex/inference/common/batch_sampler/base_batch_sampler.py +20 -16
- paddlex/inference/common/batch_sampler/det_3d_batch_sampler.py +4 -7
- paddlex/inference/common/batch_sampler/doc_vlm_batch_sampler.py +87 -0
- paddlex/inference/common/batch_sampler/image_batch_sampler.py +45 -60
- paddlex/inference/common/batch_sampler/ts_batch_sampler.py +9 -10
- paddlex/inference/common/batch_sampler/video_batch_sampler.py +2 -22
- paddlex/inference/common/reader/__init__.py +4 -4
- paddlex/inference/common/reader/audio_reader.py +3 -3
- paddlex/inference/common/reader/det_3d_reader.py +7 -5
- paddlex/inference/common/reader/image_reader.py +16 -12
- paddlex/inference/common/reader/ts_reader.py +3 -2
- paddlex/inference/common/reader/video_reader.py +3 -3
- paddlex/inference/common/result/__init__.py +7 -7
- paddlex/inference/common/result/base_cv_result.py +12 -2
- paddlex/inference/common/result/base_result.py +7 -5
- paddlex/inference/common/result/base_ts_result.py +1 -2
- paddlex/inference/common/result/base_video_result.py +2 -2
- paddlex/inference/common/result/mixin.py +31 -25
- paddlex/inference/models/__init__.py +41 -85
- paddlex/inference/models/anomaly_detection/__init__.py +1 -1
- paddlex/inference/models/anomaly_detection/predictor.py +9 -19
- paddlex/inference/models/anomaly_detection/processors.py +9 -2
- paddlex/inference/models/anomaly_detection/result.py +3 -2
- paddlex/inference/models/base/__init__.py +2 -2
- paddlex/inference/models/base/predictor/__init__.py +1 -2
- paddlex/inference/models/base/predictor/base_predictor.py +278 -39
- paddlex/inference/models/common/__init__.py +6 -15
- paddlex/inference/models/common/static_infer.py +724 -251
- paddlex/inference/models/common/tokenizer/__init__.py +7 -3
- paddlex/inference/models/common/tokenizer/bert_tokenizer.py +1 -1
- paddlex/inference/models/common/tokenizer/clip_tokenizer.py +609 -0
- paddlex/inference/models/common/tokenizer/gpt_tokenizer.py +9 -7
- paddlex/inference/models/common/tokenizer/qwen2_5_tokenizer.py +112 -0
- paddlex/inference/models/common/tokenizer/qwen2_tokenizer.py +438 -0
- paddlex/inference/models/common/tokenizer/qwen_tokenizer.py +288 -0
- paddlex/inference/models/common/tokenizer/tokenizer_utils.py +85 -77
- paddlex/inference/models/common/tokenizer/tokenizer_utils_base.py +339 -123
- paddlex/inference/models/common/tokenizer/utils.py +1 -1
- paddlex/inference/models/common/tokenizer/vocab.py +8 -8
- paddlex/inference/models/common/ts/__init__.py +1 -1
- paddlex/inference/models/common/ts/funcs.py +13 -6
- paddlex/inference/models/common/ts/processors.py +14 -5
- paddlex/inference/models/common/vision/__init__.py +3 -3
- paddlex/inference/models/common/vision/funcs.py +17 -12
- paddlex/inference/models/common/vision/processors.py +61 -46
- paddlex/inference/models/common/vlm/__init__.py +13 -0
- paddlex/inference/models/common/vlm/activations.py +189 -0
- paddlex/inference/models/common/vlm/bert_padding.py +127 -0
- paddlex/inference/models/common/vlm/conversion_utils.py +99 -0
- paddlex/inference/models/common/vlm/distributed.py +229 -0
- paddlex/inference/models/common/vlm/flash_attn_utils.py +119 -0
- paddlex/inference/models/common/vlm/fusion_ops.py +205 -0
- paddlex/inference/models/common/vlm/generation/__init__.py +34 -0
- paddlex/inference/models/common/vlm/generation/configuration_utils.py +533 -0
- paddlex/inference/models/common/vlm/generation/logits_process.py +730 -0
- paddlex/inference/models/common/vlm/generation/stopping_criteria.py +106 -0
- paddlex/inference/models/common/vlm/generation/utils.py +2162 -0
- paddlex/inference/models/common/vlm/transformers/__init__.py +16 -0
- paddlex/inference/models/common/vlm/transformers/configuration_utils.py +1037 -0
- paddlex/inference/models/common/vlm/transformers/conversion_utils.py +408 -0
- paddlex/inference/models/common/vlm/transformers/model_outputs.py +1612 -0
- paddlex/inference/models/common/vlm/transformers/model_utils.py +2014 -0
- paddlex/inference/models/common/vlm/transformers/utils.py +178 -0
- paddlex/inference/models/common/vlm/utils.py +109 -0
- paddlex/inference/models/doc_vlm/__init__.py +15 -0
- paddlex/inference/models/doc_vlm/modeling/GOT_ocr_2_0.py +830 -0
- paddlex/inference/models/doc_vlm/modeling/__init__.py +17 -0
- paddlex/inference/models/doc_vlm/modeling/qwen2.py +1606 -0
- paddlex/inference/models/doc_vlm/modeling/qwen2_5_vl.py +3006 -0
- paddlex/inference/models/doc_vlm/modeling/qwen2_vl.py +2495 -0
- paddlex/inference/models/doc_vlm/predictor.py +253 -0
- paddlex/inference/models/doc_vlm/processors/GOT_ocr_2_0.py +97 -0
- paddlex/inference/models/doc_vlm/processors/__init__.py +17 -0
- paddlex/inference/models/doc_vlm/processors/common.py +561 -0
- paddlex/inference/models/doc_vlm/processors/qwen2_5_vl.py +548 -0
- paddlex/inference/models/doc_vlm/processors/qwen2_vl.py +543 -0
- paddlex/inference/models/doc_vlm/result.py +21 -0
- paddlex/inference/models/face_feature/__init__.py +1 -1
- paddlex/inference/models/face_feature/predictor.py +2 -1
- paddlex/inference/models/formula_recognition/__init__.py +1 -1
- paddlex/inference/models/formula_recognition/predictor.py +18 -28
- paddlex/inference/models/formula_recognition/processors.py +126 -97
- paddlex/inference/models/formula_recognition/result.py +43 -35
- paddlex/inference/models/image_classification/__init__.py +1 -1
- paddlex/inference/models/image_classification/predictor.py +9 -19
- paddlex/inference/models/image_classification/processors.py +4 -2
- paddlex/inference/models/image_classification/result.py +4 -3
- paddlex/inference/models/image_feature/__init__.py +1 -1
- paddlex/inference/models/image_feature/predictor.py +9 -19
- paddlex/inference/models/image_feature/processors.py +7 -5
- paddlex/inference/models/image_feature/result.py +2 -3
- paddlex/inference/models/image_multilabel_classification/__init__.py +1 -1
- paddlex/inference/models/image_multilabel_classification/predictor.py +7 -6
- paddlex/inference/models/image_multilabel_classification/processors.py +6 -2
- paddlex/inference/models/image_multilabel_classification/result.py +4 -3
- paddlex/inference/models/image_unwarping/__init__.py +1 -1
- paddlex/inference/models/image_unwarping/predictor.py +8 -16
- paddlex/inference/models/image_unwarping/processors.py +6 -2
- paddlex/inference/models/image_unwarping/result.py +4 -2
- paddlex/inference/models/instance_segmentation/__init__.py +1 -1
- paddlex/inference/models/instance_segmentation/predictor.py +7 -15
- paddlex/inference/models/instance_segmentation/processors.py +4 -7
- paddlex/inference/models/instance_segmentation/result.py +11 -10
- paddlex/inference/models/keypoint_detection/__init__.py +1 -1
- paddlex/inference/models/keypoint_detection/predictor.py +5 -3
- paddlex/inference/models/keypoint_detection/processors.py +11 -3
- paddlex/inference/models/keypoint_detection/result.py +9 -4
- paddlex/inference/models/{3d_bev_detection → m_3d_bev_detection}/__init__.py +1 -1
- paddlex/inference/models/{3d_bev_detection → m_3d_bev_detection}/predictor.py +15 -26
- paddlex/inference/models/{3d_bev_detection → m_3d_bev_detection}/processors.py +26 -14
- paddlex/inference/models/{3d_bev_detection → m_3d_bev_detection}/result.py +15 -12
- paddlex/inference/models/{3d_bev_detection → m_3d_bev_detection}/visualizer_3d.py +77 -39
- paddlex/inference/models/multilingual_speech_recognition/__init__.py +1 -1
- paddlex/inference/models/multilingual_speech_recognition/predictor.py +11 -15
- paddlex/inference/models/multilingual_speech_recognition/processors.py +45 -53
- paddlex/inference/models/multilingual_speech_recognition/result.py +1 -1
- paddlex/inference/models/object_detection/__init__.py +1 -1
- paddlex/inference/models/object_detection/predictor.py +8 -12
- paddlex/inference/models/object_detection/processors.py +63 -33
- paddlex/inference/models/object_detection/result.py +5 -4
- paddlex/inference/models/object_detection/utils.py +3 -1
- paddlex/inference/models/open_vocabulary_detection/__init__.py +1 -1
- paddlex/inference/models/open_vocabulary_detection/predictor.py +31 -14
- paddlex/inference/models/open_vocabulary_detection/processors/__init__.py +3 -2
- paddlex/inference/models/open_vocabulary_detection/processors/common.py +114 -0
- paddlex/inference/models/open_vocabulary_detection/processors/groundingdino_processors.py +19 -8
- paddlex/inference/models/open_vocabulary_detection/processors/yoloworld_processors.py +209 -0
- paddlex/inference/models/open_vocabulary_segmentation/__init__.py +1 -1
- paddlex/inference/models/open_vocabulary_segmentation/predictor.py +6 -13
- paddlex/inference/models/open_vocabulary_segmentation/processors/__init__.py +1 -1
- paddlex/inference/models/open_vocabulary_segmentation/processors/sam_processer.py +12 -12
- paddlex/inference/models/open_vocabulary_segmentation/results/__init__.py +1 -1
- paddlex/inference/models/open_vocabulary_segmentation/results/sam_result.py +11 -9
- paddlex/inference/models/semantic_segmentation/__init__.py +1 -1
- paddlex/inference/models/semantic_segmentation/predictor.py +9 -18
- paddlex/inference/models/semantic_segmentation/processors.py +11 -8
- paddlex/inference/models/semantic_segmentation/result.py +4 -3
- paddlex/inference/models/table_structure_recognition/__init__.py +1 -1
- paddlex/inference/models/table_structure_recognition/predictor.py +8 -18
- paddlex/inference/models/table_structure_recognition/processors.py +23 -29
- paddlex/inference/models/table_structure_recognition/result.py +8 -15
- paddlex/inference/models/text_detection/__init__.py +1 -1
- paddlex/inference/models/text_detection/predictor.py +24 -24
- paddlex/inference/models/text_detection/processors.py +116 -44
- paddlex/inference/models/text_detection/result.py +8 -13
- paddlex/inference/models/text_recognition/__init__.py +1 -1
- paddlex/inference/models/text_recognition/predictor.py +11 -19
- paddlex/inference/models/text_recognition/processors.py +27 -13
- paddlex/inference/models/text_recognition/result.py +3 -2
- paddlex/inference/models/ts_anomaly_detection/__init__.py +1 -1
- paddlex/inference/models/ts_anomaly_detection/predictor.py +12 -17
- paddlex/inference/models/ts_anomaly_detection/processors.py +6 -2
- paddlex/inference/models/ts_anomaly_detection/result.py +21 -10
- paddlex/inference/models/ts_classification/__init__.py +1 -1
- paddlex/inference/models/ts_classification/predictor.py +14 -27
- paddlex/inference/models/ts_classification/processors.py +7 -2
- paddlex/inference/models/ts_classification/result.py +21 -12
- paddlex/inference/models/ts_forecasting/__init__.py +1 -1
- paddlex/inference/models/ts_forecasting/predictor.py +13 -18
- paddlex/inference/models/ts_forecasting/processors.py +12 -3
- paddlex/inference/models/ts_forecasting/result.py +24 -11
- paddlex/inference/models/video_classification/__init__.py +1 -1
- paddlex/inference/models/video_classification/predictor.py +9 -15
- paddlex/inference/models/video_classification/processors.py +24 -24
- paddlex/inference/models/video_classification/result.py +7 -3
- paddlex/inference/models/video_detection/__init__.py +1 -1
- paddlex/inference/models/video_detection/predictor.py +8 -15
- paddlex/inference/models/video_detection/processors.py +24 -11
- paddlex/inference/models/video_detection/result.py +10 -5
- paddlex/inference/pipelines/__init__.py +48 -37
- paddlex/inference/pipelines/_parallel.py +172 -0
- paddlex/inference/pipelines/anomaly_detection/__init__.py +1 -1
- paddlex/inference/pipelines/anomaly_detection/pipeline.py +29 -9
- paddlex/inference/pipelines/attribute_recognition/__init__.py +1 -1
- paddlex/inference/pipelines/attribute_recognition/pipeline.py +24 -9
- paddlex/inference/pipelines/attribute_recognition/result.py +10 -8
- paddlex/inference/pipelines/base.py +43 -13
- paddlex/inference/pipelines/components/__init__.py +14 -8
- paddlex/inference/pipelines/components/chat_server/__init__.py +1 -1
- paddlex/inference/pipelines/components/chat_server/base.py +2 -2
- paddlex/inference/pipelines/components/chat_server/openai_bot_chat.py +8 -8
- paddlex/inference/pipelines/components/common/__init__.py +5 -4
- paddlex/inference/pipelines/components/common/base_operator.py +2 -1
- paddlex/inference/pipelines/components/common/base_result.py +3 -2
- paddlex/inference/pipelines/components/common/convert_points_and_boxes.py +1 -2
- paddlex/inference/pipelines/components/common/crop_image_regions.py +11 -5
- paddlex/inference/pipelines/components/common/seal_det_warp.py +44 -13
- paddlex/inference/pipelines/components/common/sort_boxes.py +4 -2
- paddlex/inference/pipelines/components/common/warp_image.py +50 -0
- paddlex/inference/pipelines/components/faisser.py +10 -5
- paddlex/inference/pipelines/components/prompt_engineering/__init__.py +2 -2
- paddlex/inference/pipelines/components/prompt_engineering/base.py +2 -2
- paddlex/inference/pipelines/components/prompt_engineering/generate_ensemble_prompt.py +2 -1
- paddlex/inference/pipelines/components/prompt_engineering/generate_kie_prompt.py +2 -2
- paddlex/inference/pipelines/components/retriever/__init__.py +2 -2
- paddlex/inference/pipelines/components/retriever/base.py +18 -16
- paddlex/inference/pipelines/components/retriever/openai_bot_retriever.py +2 -2
- paddlex/inference/pipelines/components/retriever/qianfan_bot_retriever.py +87 -84
- paddlex/inference/pipelines/components/utils/__init__.py +1 -1
- paddlex/inference/pipelines/components/utils/mixin.py +7 -7
- paddlex/inference/pipelines/doc_preprocessor/__init__.py +1 -1
- paddlex/inference/pipelines/doc_preprocessor/pipeline.py +70 -51
- paddlex/inference/pipelines/doc_preprocessor/result.py +5 -10
- paddlex/inference/pipelines/doc_understanding/__init__.py +15 -0
- paddlex/inference/pipelines/doc_understanding/pipeline.py +71 -0
- paddlex/inference/pipelines/face_recognition/__init__.py +1 -1
- paddlex/inference/pipelines/face_recognition/pipeline.py +3 -1
- paddlex/inference/pipelines/face_recognition/result.py +3 -2
- paddlex/inference/pipelines/formula_recognition/__init__.py +1 -1
- paddlex/inference/pipelines/formula_recognition/pipeline.py +137 -93
- paddlex/inference/pipelines/formula_recognition/result.py +20 -29
- paddlex/inference/pipelines/image_classification/__init__.py +1 -1
- paddlex/inference/pipelines/image_classification/pipeline.py +30 -11
- paddlex/inference/pipelines/image_multilabel_classification/__init__.py +1 -1
- paddlex/inference/pipelines/image_multilabel_classification/pipeline.py +31 -12
- paddlex/inference/pipelines/instance_segmentation/__init__.py +1 -1
- paddlex/inference/pipelines/instance_segmentation/pipeline.py +30 -9
- paddlex/inference/pipelines/keypoint_detection/__init__.py +1 -1
- paddlex/inference/pipelines/keypoint_detection/pipeline.py +30 -9
- paddlex/inference/pipelines/layout_parsing/__init__.py +1 -1
- paddlex/inference/pipelines/layout_parsing/pipeline.py +54 -56
- paddlex/inference/pipelines/layout_parsing/pipeline_v2.py +904 -261
- paddlex/inference/pipelines/layout_parsing/result.py +9 -21
- paddlex/inference/pipelines/layout_parsing/result_v2.py +525 -250
- paddlex/inference/pipelines/layout_parsing/setting.py +87 -0
- paddlex/inference/pipelines/layout_parsing/utils.py +570 -2004
- paddlex/inference/pipelines/layout_parsing/xycut_enhanced/__init__.py +16 -0
- paddlex/inference/pipelines/layout_parsing/xycut_enhanced/utils.py +1144 -0
- paddlex/inference/pipelines/layout_parsing/xycut_enhanced/xycuts.py +563 -0
- paddlex/inference/pipelines/{3d_bev_detection → m_3d_bev_detection}/__init__.py +1 -1
- paddlex/inference/pipelines/{3d_bev_detection → m_3d_bev_detection}/pipeline.py +17 -10
- paddlex/inference/pipelines/multilingual_speech_recognition/__init__.py +1 -1
- paddlex/inference/pipelines/multilingual_speech_recognition/pipeline.py +17 -6
- paddlex/inference/pipelines/object_detection/__init__.py +1 -1
- paddlex/inference/pipelines/object_detection/pipeline.py +29 -9
- paddlex/inference/pipelines/ocr/__init__.py +1 -1
- paddlex/inference/pipelines/ocr/pipeline.py +151 -77
- paddlex/inference/pipelines/ocr/result.py +31 -24
- paddlex/inference/pipelines/open_vocabulary_detection/__init__.py +1 -1
- paddlex/inference/pipelines/open_vocabulary_detection/pipeline.py +17 -6
- paddlex/inference/pipelines/open_vocabulary_segmentation/__init__.py +1 -1
- paddlex/inference/pipelines/open_vocabulary_segmentation/pipeline.py +17 -6
- paddlex/inference/pipelines/pp_chatocr/__init__.py +1 -1
- paddlex/inference/pipelines/pp_chatocr/pipeline_base.py +14 -5
- paddlex/inference/pipelines/pp_chatocr/pipeline_v3.py +22 -14
- paddlex/inference/pipelines/pp_chatocr/pipeline_v4.py +34 -16
- paddlex/inference/pipelines/pp_shitu_v2/__init__.py +1 -1
- paddlex/inference/pipelines/pp_shitu_v2/pipeline.py +12 -8
- paddlex/inference/pipelines/pp_shitu_v2/result.py +4 -4
- paddlex/inference/pipelines/rotated_object_detection/__init__.py +1 -1
- paddlex/inference/pipelines/rotated_object_detection/pipeline.py +30 -9
- paddlex/inference/pipelines/seal_recognition/__init__.py +1 -1
- paddlex/inference/pipelines/seal_recognition/pipeline.py +127 -63
- paddlex/inference/pipelines/seal_recognition/result.py +4 -2
- paddlex/inference/pipelines/semantic_segmentation/__init__.py +1 -1
- paddlex/inference/pipelines/semantic_segmentation/pipeline.py +30 -9
- paddlex/inference/pipelines/small_object_detection/__init__.py +1 -1
- paddlex/inference/pipelines/small_object_detection/pipeline.py +30 -9
- paddlex/inference/pipelines/table_recognition/__init__.py +1 -1
- paddlex/inference/pipelines/table_recognition/pipeline.py +61 -37
- paddlex/inference/pipelines/table_recognition/pipeline_v2.py +668 -65
- paddlex/inference/pipelines/table_recognition/result.py +12 -10
- paddlex/inference/pipelines/table_recognition/table_recognition_post_processing.py +12 -8
- paddlex/inference/pipelines/table_recognition/table_recognition_post_processing_v2.py +55 -37
- paddlex/inference/pipelines/table_recognition/utils.py +1 -1
- paddlex/inference/pipelines/ts_anomaly_detection/__init__.py +1 -1
- paddlex/inference/pipelines/ts_anomaly_detection/pipeline.py +16 -6
- paddlex/inference/pipelines/ts_classification/__init__.py +1 -1
- paddlex/inference/pipelines/ts_classification/pipeline.py +16 -6
- paddlex/inference/pipelines/ts_forecasting/__init__.py +1 -1
- paddlex/inference/pipelines/ts_forecasting/pipeline.py +16 -6
- paddlex/inference/pipelines/video_classification/__init__.py +1 -1
- paddlex/inference/pipelines/video_classification/pipeline.py +17 -6
- paddlex/inference/pipelines/video_detection/__init__.py +1 -1
- paddlex/inference/pipelines/video_detection/pipeline.py +20 -7
- paddlex/inference/serving/__init__.py +5 -1
- paddlex/inference/serving/basic_serving/__init__.py +1 -1
- paddlex/inference/serving/basic_serving/_app.py +31 -19
- paddlex/inference/serving/basic_serving/_pipeline_apps/__init__.py +7 -4
- paddlex/inference/serving/basic_serving/_pipeline_apps/_common/__init__.py +1 -1
- paddlex/inference/serving/basic_serving/_pipeline_apps/_common/common.py +12 -4
- paddlex/inference/serving/basic_serving/_pipeline_apps/_common/image_recognition.py +1 -1
- paddlex/inference/serving/basic_serving/_pipeline_apps/_common/ocr.py +7 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/anomaly_detection.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/doc_preprocessor.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/doc_understanding.py +153 -0
- paddlex/inference/serving/basic_serving/_pipeline_apps/face_recognition.py +16 -13
- paddlex/inference/serving/basic_serving/_pipeline_apps/formula_recognition.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/human_keypoint_detection.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/image_classification.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/image_multilabel_classification.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/instance_segmentation.py +13 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/layout_parsing.py +10 -8
- paddlex/inference/serving/basic_serving/_pipeline_apps/m_3d_bev_detection.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/multilingual_speech_recognition.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/object_detection.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/ocr.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/open_vocabulary_detection.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/open_vocabulary_segmentation.py +13 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/pedestrian_attribute_recognition.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv3_doc.py +14 -12
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv4_doc.py +17 -14
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_shituv2.py +16 -13
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_structurev3.py +16 -9
- paddlex/inference/serving/basic_serving/_pipeline_apps/rotated_object_detection.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/seal_recognition.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/semantic_segmentation.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/small_object_detection.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition.py +11 -12
- paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition_v2.py +14 -12
- paddlex/inference/serving/basic_serving/_pipeline_apps/ts_anomaly_detection.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/ts_classification.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/ts_forecast.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/vehicle_attribute_recognition.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/video_classification.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/video_detection.py +10 -7
- paddlex/inference/serving/basic_serving/_server.py +9 -4
- paddlex/inference/serving/infra/__init__.py +1 -1
- paddlex/inference/serving/infra/config.py +1 -1
- paddlex/inference/serving/infra/models.py +13 -6
- paddlex/inference/serving/infra/storage.py +9 -4
- paddlex/inference/serving/infra/utils.py +54 -28
- paddlex/inference/serving/schemas/__init__.py +1 -1
- paddlex/inference/serving/schemas/anomaly_detection.py +1 -1
- paddlex/inference/serving/schemas/doc_preprocessor.py +1 -1
- paddlex/inference/serving/schemas/doc_understanding.py +78 -0
- paddlex/inference/serving/schemas/face_recognition.py +1 -1
- paddlex/inference/serving/schemas/formula_recognition.py +2 -2
- paddlex/inference/serving/schemas/human_keypoint_detection.py +1 -1
- paddlex/inference/serving/schemas/image_classification.py +1 -1
- paddlex/inference/serving/schemas/image_multilabel_classification.py +1 -1
- paddlex/inference/serving/schemas/instance_segmentation.py +1 -1
- paddlex/inference/serving/schemas/layout_parsing.py +2 -3
- paddlex/inference/serving/schemas/m_3d_bev_detection.py +1 -1
- paddlex/inference/serving/schemas/multilingual_speech_recognition.py +1 -1
- paddlex/inference/serving/schemas/object_detection.py +1 -1
- paddlex/inference/serving/schemas/ocr.py +1 -1
- paddlex/inference/serving/schemas/open_vocabulary_detection.py +1 -1
- paddlex/inference/serving/schemas/open_vocabulary_segmentation.py +1 -1
- paddlex/inference/serving/schemas/pedestrian_attribute_recognition.py +1 -1
- paddlex/inference/serving/schemas/pp_chatocrv3_doc.py +2 -3
- paddlex/inference/serving/schemas/pp_chatocrv4_doc.py +3 -3
- paddlex/inference/serving/schemas/pp_shituv2.py +1 -1
- paddlex/inference/serving/schemas/pp_structurev3.py +11 -7
- paddlex/inference/serving/schemas/rotated_object_detection.py +1 -1
- paddlex/inference/serving/schemas/seal_recognition.py +2 -2
- paddlex/inference/serving/schemas/semantic_segmentation.py +1 -1
- paddlex/inference/serving/schemas/shared/__init__.py +1 -1
- paddlex/inference/serving/schemas/shared/classification.py +1 -1
- paddlex/inference/serving/schemas/shared/image_segmentation.py +1 -1
- paddlex/inference/serving/schemas/shared/object_detection.py +1 -1
- paddlex/inference/serving/schemas/shared/ocr.py +1 -1
- paddlex/inference/serving/schemas/small_object_detection.py +1 -1
- paddlex/inference/serving/schemas/table_recognition.py +3 -7
- paddlex/inference/serving/schemas/table_recognition_v2.py +6 -7
- paddlex/inference/serving/schemas/ts_anomaly_detection.py +1 -1
- paddlex/inference/serving/schemas/ts_classification.py +1 -1
- paddlex/inference/serving/schemas/ts_forecast.py +1 -1
- paddlex/inference/serving/schemas/vehicle_attribute_recognition.py +1 -1
- paddlex/inference/serving/schemas/video_classification.py +1 -1
- paddlex/inference/serving/schemas/video_detection.py +1 -1
- paddlex/inference/utils/__init__.py +1 -1
- paddlex/inference/utils/benchmark.py +332 -179
- paddlex/inference/utils/color_map.py +1 -1
- paddlex/inference/utils/get_pipeline_path.py +1 -1
- paddlex/inference/utils/hpi.py +258 -0
- paddlex/inference/utils/hpi_model_info_collection.json +2331 -0
- paddlex/inference/utils/io/__init__.py +11 -11
- paddlex/inference/utils/io/readers.py +31 -27
- paddlex/inference/utils/io/style.py +21 -14
- paddlex/inference/utils/io/tablepyxl.py +13 -5
- paddlex/inference/utils/io/writers.py +9 -10
- paddlex/inference/utils/mkldnn_blocklist.py +25 -0
- paddlex/inference/utils/model_paths.py +48 -0
- paddlex/inference/utils/{new_ir_blacklist.py → new_ir_blocklist.py} +1 -2
- paddlex/inference/utils/official_models.py +278 -262
- paddlex/inference/utils/pp_option.py +184 -92
- paddlex/inference/utils/trt_blocklist.py +43 -0
- paddlex/inference/utils/trt_config.py +420 -0
- paddlex/model.py +30 -12
- paddlex/modules/__init__.py +57 -80
- paddlex/modules/anomaly_detection/__init__.py +2 -2
- paddlex/modules/anomaly_detection/dataset_checker/__init__.py +2 -3
- paddlex/modules/anomaly_detection/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/anomaly_detection/dataset_checker/dataset_src/analyse_dataset.py +6 -3
- paddlex/modules/anomaly_detection/dataset_checker/dataset_src/check_dataset.py +8 -4
- paddlex/modules/anomaly_detection/dataset_checker/dataset_src/convert_dataset.py +7 -4
- paddlex/modules/anomaly_detection/dataset_checker/dataset_src/split_dataset.py +2 -2
- paddlex/modules/anomaly_detection/dataset_checker/dataset_src/utils/__init__.py +1 -1
- paddlex/modules/anomaly_detection/dataset_checker/dataset_src/utils/visualizer.py +7 -2
- paddlex/modules/anomaly_detection/evaluator.py +3 -3
- paddlex/modules/anomaly_detection/exportor.py +1 -1
- paddlex/modules/anomaly_detection/model_list.py +1 -1
- paddlex/modules/anomaly_detection/trainer.py +3 -4
- paddlex/modules/base/__init__.py +5 -5
- paddlex/modules/base/build_model.py +1 -2
- paddlex/modules/base/dataset_checker/__init__.py +2 -2
- paddlex/modules/base/dataset_checker/dataset_checker.py +4 -4
- paddlex/modules/base/dataset_checker/utils.py +1 -3
- paddlex/modules/base/evaluator.py +13 -13
- paddlex/modules/base/exportor.py +12 -13
- paddlex/modules/base/trainer.py +21 -11
- paddlex/modules/base/utils/__init__.py +13 -0
- paddlex/modules/base/utils/cinn_setting.py +89 -0
- paddlex/modules/base/utils/coco_eval.py +94 -0
- paddlex/modules/base/utils/topk_eval.py +118 -0
- paddlex/modules/doc_vlm/__init__.py +18 -0
- paddlex/modules/doc_vlm/dataset_checker.py +29 -0
- paddlex/modules/doc_vlm/evaluator.py +29 -0
- paddlex/modules/doc_vlm/exportor.py +29 -0
- paddlex/modules/doc_vlm/model_list.py +16 -0
- paddlex/modules/doc_vlm/trainer.py +41 -0
- paddlex/modules/face_recognition/__init__.py +2 -2
- paddlex/modules/face_recognition/dataset_checker/__init__.py +2 -2
- paddlex/modules/face_recognition/dataset_checker/dataset_src/__init__.py +1 -1
- paddlex/modules/face_recognition/dataset_checker/dataset_src/check_dataset.py +3 -5
- paddlex/modules/face_recognition/dataset_checker/dataset_src/utils/__init__.py +1 -1
- paddlex/modules/face_recognition/dataset_checker/dataset_src/utils/visualizer.py +2 -5
- paddlex/modules/face_recognition/evaluator.py +3 -3
- paddlex/modules/face_recognition/exportor.py +1 -1
- paddlex/modules/face_recognition/model_list.py +1 -1
- paddlex/modules/face_recognition/trainer.py +1 -1
- paddlex/modules/formula_recognition/__init__.py +2 -2
- paddlex/modules/formula_recognition/dataset_checker/__init__.py +3 -3
- paddlex/modules/formula_recognition/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/formula_recognition/dataset_checker/dataset_src/analyse_dataset.py +13 -12
- paddlex/modules/formula_recognition/dataset_checker/dataset_src/check_dataset.py +2 -6
- paddlex/modules/formula_recognition/dataset_checker/dataset_src/convert_dataset.py +11 -10
- paddlex/modules/formula_recognition/dataset_checker/dataset_src/split_dataset.py +1 -2
- paddlex/modules/formula_recognition/evaluator.py +6 -3
- paddlex/modules/formula_recognition/exportor.py +1 -1
- paddlex/modules/formula_recognition/model_list.py +4 -1
- paddlex/modules/formula_recognition/trainer.py +5 -3
- paddlex/modules/general_recognition/__init__.py +2 -2
- paddlex/modules/general_recognition/dataset_checker/__init__.py +2 -2
- paddlex/modules/general_recognition/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/general_recognition/dataset_checker/dataset_src/analyse_dataset.py +7 -9
- paddlex/modules/general_recognition/dataset_checker/dataset_src/check_dataset.py +4 -5
- paddlex/modules/general_recognition/dataset_checker/dataset_src/convert_dataset.py +6 -5
- paddlex/modules/general_recognition/dataset_checker/dataset_src/split_dataset.py +1 -1
- paddlex/modules/general_recognition/dataset_checker/dataset_src/utils/__init__.py +1 -1
- paddlex/modules/general_recognition/dataset_checker/dataset_src/utils/visualizer.py +2 -5
- paddlex/modules/general_recognition/evaluator.py +2 -2
- paddlex/modules/general_recognition/exportor.py +1 -1
- paddlex/modules/general_recognition/model_list.py +1 -1
- paddlex/modules/general_recognition/trainer.py +1 -1
- paddlex/modules/image_classification/__init__.py +2 -2
- paddlex/modules/image_classification/dataset_checker/__init__.py +2 -2
- paddlex/modules/image_classification/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/image_classification/dataset_checker/dataset_src/analyse_dataset.py +8 -9
- paddlex/modules/image_classification/dataset_checker/dataset_src/check_dataset.py +4 -3
- paddlex/modules/image_classification/dataset_checker/dataset_src/convert_dataset.py +4 -4
- paddlex/modules/image_classification/dataset_checker/dataset_src/split_dataset.py +1 -1
- paddlex/modules/image_classification/dataset_checker/dataset_src/utils/__init__.py +1 -1
- paddlex/modules/image_classification/dataset_checker/dataset_src/utils/visualizer.py +2 -5
- paddlex/modules/image_classification/evaluator.py +3 -3
- paddlex/modules/image_classification/exportor.py +1 -1
- paddlex/modules/image_classification/model_list.py +2 -1
- paddlex/modules/image_classification/trainer.py +3 -3
- paddlex/modules/image_unwarping/__init__.py +1 -1
- paddlex/modules/image_unwarping/model_list.py +1 -1
- paddlex/modules/instance_segmentation/__init__.py +2 -2
- paddlex/modules/instance_segmentation/dataset_checker/__init__.py +2 -3
- paddlex/modules/instance_segmentation/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/instance_segmentation/dataset_checker/dataset_src/analyse_dataset.py +9 -5
- paddlex/modules/instance_segmentation/dataset_checker/dataset_src/check_dataset.py +8 -5
- paddlex/modules/instance_segmentation/dataset_checker/dataset_src/convert_dataset.py +8 -8
- paddlex/modules/instance_segmentation/dataset_checker/dataset_src/split_dataset.py +7 -4
- paddlex/modules/instance_segmentation/dataset_checker/dataset_src/utils/__init__.py +1 -1
- paddlex/modules/instance_segmentation/dataset_checker/dataset_src/utils/visualizer.py +10 -8
- paddlex/modules/instance_segmentation/evaluator.py +2 -2
- paddlex/modules/instance_segmentation/exportor.py +1 -1
- paddlex/modules/instance_segmentation/model_list.py +1 -1
- paddlex/modules/instance_segmentation/trainer.py +1 -1
- paddlex/modules/keypoint_detection/__init__.py +2 -2
- paddlex/modules/keypoint_detection/dataset_checker/__init__.py +2 -2
- paddlex/modules/keypoint_detection/dataset_checker/dataset_src/__init__.py +1 -1
- paddlex/modules/keypoint_detection/dataset_checker/dataset_src/check_dataset.py +10 -5
- paddlex/modules/keypoint_detection/dataset_checker/dataset_src/utils/__init__.py +1 -1
- paddlex/modules/keypoint_detection/dataset_checker/dataset_src/utils/visualizer.py +8 -3
- paddlex/modules/keypoint_detection/evaluator.py +2 -2
- paddlex/modules/keypoint_detection/exportor.py +1 -1
- paddlex/modules/keypoint_detection/model_list.py +1 -1
- paddlex/modules/keypoint_detection/trainer.py +2 -2
- paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/__init__.py +2 -2
- paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/dataset_checker/__init__.py +3 -3
- paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/dataset_checker/dataset_src/analyse_dataset.py +8 -8
- paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/dataset_checker/dataset_src/check_dataset.py +1 -2
- paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/evaluator.py +3 -3
- paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/exportor.py +1 -1
- paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/model_list.py +1 -1
- paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/trainer.py +5 -7
- paddlex/modules/multilabel_classification/__init__.py +2 -2
- paddlex/modules/multilabel_classification/dataset_checker/__init__.py +2 -2
- paddlex/modules/multilabel_classification/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/multilabel_classification/dataset_checker/dataset_src/analyse_dataset.py +8 -9
- paddlex/modules/multilabel_classification/dataset_checker/dataset_src/check_dataset.py +4 -3
- paddlex/modules/multilabel_classification/dataset_checker/dataset_src/convert_dataset.py +10 -7
- paddlex/modules/multilabel_classification/dataset_checker/dataset_src/split_dataset.py +1 -1
- paddlex/modules/multilabel_classification/dataset_checker/dataset_src/utils/__init__.py +1 -1
- paddlex/modules/multilabel_classification/dataset_checker/dataset_src/utils/visualizer.py +1 -5
- paddlex/modules/multilabel_classification/evaluator.py +3 -3
- paddlex/modules/multilabel_classification/exportor.py +1 -1
- paddlex/modules/multilabel_classification/model_list.py +1 -1
- paddlex/modules/multilabel_classification/trainer.py +3 -3
- paddlex/modules/multilingual_speech_recognition/__init__.py +2 -2
- paddlex/modules/multilingual_speech_recognition/dataset_checker.py +3 -3
- paddlex/modules/multilingual_speech_recognition/evaluator.py +3 -3
- paddlex/modules/multilingual_speech_recognition/exportor.py +3 -3
- paddlex/modules/multilingual_speech_recognition/model_list.py +1 -1
- paddlex/modules/multilingual_speech_recognition/trainer.py +7 -5
- paddlex/modules/object_detection/__init__.py +2 -2
- paddlex/modules/object_detection/dataset_checker/__init__.py +2 -11
- paddlex/modules/object_detection/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/object_detection/dataset_checker/dataset_src/analyse_dataset.py +10 -8
- paddlex/modules/object_detection/dataset_checker/dataset_src/check_dataset.py +10 -5
- paddlex/modules/object_detection/dataset_checker/dataset_src/convert_dataset.py +17 -12
- paddlex/modules/object_detection/dataset_checker/dataset_src/split_dataset.py +8 -4
- paddlex/modules/object_detection/dataset_checker/dataset_src/utils/__init__.py +1 -1
- paddlex/modules/object_detection/dataset_checker/dataset_src/utils/visualizer.py +9 -8
- paddlex/modules/object_detection/evaluator.py +11 -6
- paddlex/modules/object_detection/exportor.py +1 -1
- paddlex/modules/object_detection/model_list.py +3 -1
- paddlex/modules/object_detection/trainer.py +4 -5
- paddlex/modules/open_vocabulary_detection/__init__.py +2 -2
- paddlex/modules/open_vocabulary_detection/dataset_checker.py +3 -3
- paddlex/modules/open_vocabulary_detection/evaluator.py +3 -3
- paddlex/modules/open_vocabulary_detection/exportor.py +3 -3
- paddlex/modules/open_vocabulary_detection/model_list.py +2 -4
- paddlex/modules/open_vocabulary_detection/trainer.py +7 -5
- paddlex/modules/open_vocabulary_segmentation/__init__.py +2 -2
- paddlex/modules/open_vocabulary_segmentation/dataset_checker.py +3 -3
- paddlex/modules/open_vocabulary_segmentation/evaluator.py +3 -3
- paddlex/modules/open_vocabulary_segmentation/exportor.py +3 -3
- paddlex/modules/open_vocabulary_segmentation/model_list.py +1 -1
- paddlex/modules/open_vocabulary_segmentation/trainer.py +7 -5
- paddlex/modules/semantic_segmentation/__init__.py +2 -2
- paddlex/modules/semantic_segmentation/dataset_checker/__init__.py +2 -3
- paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/analyse_dataset.py +6 -3
- paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/check_dataset.py +2 -2
- paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/convert_dataset.py +7 -4
- paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/split_dataset.py +2 -2
- paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/utils/__init__.py +1 -1
- paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/utils/visualizer.py +6 -2
- paddlex/modules/semantic_segmentation/evaluator.py +3 -3
- paddlex/modules/semantic_segmentation/exportor.py +1 -1
- paddlex/modules/semantic_segmentation/model_list.py +1 -1
- paddlex/modules/semantic_segmentation/trainer.py +3 -4
- paddlex/modules/table_recognition/__init__.py +2 -2
- paddlex/modules/table_recognition/dataset_checker/__init__.py +5 -5
- paddlex/modules/table_recognition/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/table_recognition/dataset_checker/dataset_src/analyse_dataset.py +3 -2
- paddlex/modules/table_recognition/dataset_checker/dataset_src/check_dataset.py +8 -7
- paddlex/modules/table_recognition/dataset_checker/dataset_src/split_dataset.py +2 -1
- paddlex/modules/table_recognition/evaluator.py +3 -3
- paddlex/modules/table_recognition/exportor.py +1 -1
- paddlex/modules/table_recognition/model_list.py +1 -1
- paddlex/modules/table_recognition/trainer.py +2 -5
- paddlex/modules/text_detection/__init__.py +2 -2
- paddlex/modules/text_detection/dataset_checker/__init__.py +4 -6
- paddlex/modules/text_detection/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/text_detection/dataset_checker/dataset_src/analyse_dataset.py +12 -9
- paddlex/modules/text_detection/dataset_checker/dataset_src/check_dataset.py +3 -3
- paddlex/modules/text_detection/dataset_checker/dataset_src/split_dataset.py +3 -3
- paddlex/modules/text_detection/evaluator.py +3 -3
- paddlex/modules/text_detection/exportor.py +1 -1
- paddlex/modules/text_detection/model_list.py +3 -1
- paddlex/modules/text_detection/trainer.py +2 -5
- paddlex/modules/text_recognition/__init__.py +2 -2
- paddlex/modules/text_recognition/dataset_checker/__init__.py +4 -5
- paddlex/modules/text_recognition/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/text_recognition/dataset_checker/dataset_src/analyse_dataset.py +13 -12
- paddlex/modules/text_recognition/dataset_checker/dataset_src/check_dataset.py +2 -5
- paddlex/modules/text_recognition/dataset_checker/dataset_src/convert_dataset.py +11 -10
- paddlex/modules/text_recognition/dataset_checker/dataset_src/split_dataset.py +1 -2
- paddlex/modules/text_recognition/evaluator.py +3 -3
- paddlex/modules/text_recognition/exportor.py +1 -1
- paddlex/modules/text_recognition/model_list.py +3 -1
- paddlex/modules/text_recognition/trainer.py +2 -3
- paddlex/modules/ts_anomaly_detection/__init__.py +2 -2
- paddlex/modules/ts_anomaly_detection/dataset_checker/__init__.py +4 -5
- paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/analyse_dataset.py +1 -9
- paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/check_dataset.py +2 -2
- paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/convert_dataset.py +2 -6
- paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/split_dataset.py +4 -4
- paddlex/modules/ts_anomaly_detection/evaluator.py +3 -3
- paddlex/modules/ts_anomaly_detection/exportor.py +2 -3
- paddlex/modules/ts_anomaly_detection/model_list.py +1 -1
- paddlex/modules/ts_anomaly_detection/trainer.py +8 -8
- paddlex/modules/ts_classification/__init__.py +2 -2
- paddlex/modules/ts_classification/dataset_checker/__init__.py +4 -5
- paddlex/modules/ts_classification/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/ts_classification/dataset_checker/dataset_src/analyse_dataset.py +8 -5
- paddlex/modules/ts_classification/dataset_checker/dataset_src/check_dataset.py +2 -2
- paddlex/modules/ts_classification/dataset_checker/dataset_src/convert_dataset.py +2 -6
- paddlex/modules/ts_classification/dataset_checker/dataset_src/split_dataset.py +5 -5
- paddlex/modules/ts_classification/evaluator.py +3 -3
- paddlex/modules/ts_classification/exportor.py +2 -3
- paddlex/modules/ts_classification/model_list.py +1 -1
- paddlex/modules/ts_classification/trainer.py +7 -7
- paddlex/modules/ts_forecast/__init__.py +2 -2
- paddlex/modules/ts_forecast/dataset_checker/__init__.py +4 -5
- paddlex/modules/ts_forecast/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/ts_forecast/dataset_checker/dataset_src/analyse_dataset.py +1 -9
- paddlex/modules/ts_forecast/dataset_checker/dataset_src/check_dataset.py +2 -2
- paddlex/modules/ts_forecast/dataset_checker/dataset_src/convert_dataset.py +2 -6
- paddlex/modules/ts_forecast/dataset_checker/dataset_src/split_dataset.py +4 -4
- paddlex/modules/ts_forecast/evaluator.py +3 -3
- paddlex/modules/ts_forecast/exportor.py +2 -3
- paddlex/modules/ts_forecast/model_list.py +1 -1
- paddlex/modules/ts_forecast/trainer.py +7 -7
- paddlex/modules/video_classification/__init__.py +2 -2
- paddlex/modules/video_classification/dataset_checker/__init__.py +2 -2
- paddlex/modules/video_classification/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/video_classification/dataset_checker/dataset_src/analyse_dataset.py +9 -9
- paddlex/modules/video_classification/dataset_checker/dataset_src/check_dataset.py +2 -3
- paddlex/modules/video_classification/dataset_checker/dataset_src/split_dataset.py +1 -1
- paddlex/modules/video_classification/evaluator.py +3 -3
- paddlex/modules/video_classification/exportor.py +1 -1
- paddlex/modules/video_classification/model_list.py +1 -1
- paddlex/modules/video_classification/trainer.py +3 -3
- paddlex/modules/video_detection/__init__.py +2 -2
- paddlex/modules/video_detection/dataset_checker/__init__.py +2 -2
- paddlex/modules/video_detection/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/video_detection/dataset_checker/dataset_src/analyse_dataset.py +8 -9
- paddlex/modules/video_detection/dataset_checker/dataset_src/check_dataset.py +3 -5
- paddlex/modules/video_detection/evaluator.py +3 -3
- paddlex/modules/video_detection/exportor.py +1 -1
- paddlex/modules/video_detection/model_list.py +1 -1
- paddlex/modules/video_detection/trainer.py +3 -3
- paddlex/ops/__init__.py +7 -4
- paddlex/ops/iou3d_nms/iou3d_cpu.cpp +8 -6
- paddlex/ops/iou3d_nms/iou3d_cpu.h +3 -2
- paddlex/ops/iou3d_nms/iou3d_nms.cpp +8 -6
- paddlex/ops/iou3d_nms/iou3d_nms.h +6 -4
- paddlex/ops/iou3d_nms/iou3d_nms_api.cpp +24 -18
- paddlex/ops/iou3d_nms/iou3d_nms_kernel.cu +9 -7
- paddlex/ops/setup.py +3 -3
- paddlex/ops/voxel/voxelize_op.cc +22 -19
- paddlex/ops/voxel/voxelize_op.cu +25 -25
- paddlex/paddlex_cli.py +104 -87
- paddlex/repo_apis/Paddle3D_api/__init__.py +1 -1
- paddlex/repo_apis/Paddle3D_api/bev_fusion/__init__.py +1 -1
- paddlex/repo_apis/Paddle3D_api/bev_fusion/config.py +1 -1
- paddlex/repo_apis/Paddle3D_api/bev_fusion/model.py +6 -6
- paddlex/repo_apis/Paddle3D_api/bev_fusion/register.py +2 -2
- paddlex/repo_apis/Paddle3D_api/bev_fusion/runner.py +1 -1
- paddlex/repo_apis/Paddle3D_api/pp3d_config.py +3 -2
- paddlex/repo_apis/PaddleClas_api/__init__.py +1 -1
- paddlex/repo_apis/PaddleClas_api/cls/__init__.py +3 -3
- paddlex/repo_apis/PaddleClas_api/cls/config.py +5 -4
- paddlex/repo_apis/PaddleClas_api/cls/model.py +4 -4
- paddlex/repo_apis/PaddleClas_api/cls/register.py +12 -3
- paddlex/repo_apis/PaddleClas_api/cls/runner.py +2 -3
- paddlex/repo_apis/PaddleClas_api/shitu_rec/__init__.py +2 -2
- paddlex/repo_apis/PaddleClas_api/shitu_rec/config.py +2 -2
- paddlex/repo_apis/PaddleClas_api/shitu_rec/model.py +1 -4
- paddlex/repo_apis/PaddleClas_api/shitu_rec/register.py +2 -2
- paddlex/repo_apis/PaddleClas_api/shitu_rec/runner.py +1 -6
- paddlex/repo_apis/PaddleDetection_api/__init__.py +2 -2
- paddlex/repo_apis/PaddleDetection_api/config_helper.py +3 -3
- paddlex/repo_apis/PaddleDetection_api/instance_seg/__init__.py +2 -2
- paddlex/repo_apis/PaddleDetection_api/instance_seg/config.py +2 -3
- paddlex/repo_apis/PaddleDetection_api/instance_seg/model.py +4 -4
- paddlex/repo_apis/PaddleDetection_api/instance_seg/register.py +2 -3
- paddlex/repo_apis/PaddleDetection_api/instance_seg/runner.py +2 -3
- paddlex/repo_apis/PaddleDetection_api/object_det/__init__.py +3 -3
- paddlex/repo_apis/PaddleDetection_api/object_det/config.py +5 -4
- paddlex/repo_apis/PaddleDetection_api/object_det/model.py +6 -7
- paddlex/repo_apis/PaddleDetection_api/object_det/official_categories.py +26 -1
- paddlex/repo_apis/PaddleDetection_api/object_det/register.py +32 -3
- paddlex/repo_apis/PaddleDetection_api/object_det/runner.py +2 -3
- paddlex/repo_apis/PaddleNLP_api/__init__.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/__init__.py +4 -3
- paddlex/repo_apis/PaddleOCR_api/config_utils.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/formula_rec/__init__.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/formula_rec/config.py +7 -6
- paddlex/repo_apis/PaddleOCR_api/formula_rec/model.py +9 -13
- paddlex/repo_apis/PaddleOCR_api/formula_rec/register.py +29 -3
- paddlex/repo_apis/PaddleOCR_api/formula_rec/runner.py +2 -3
- paddlex/repo_apis/PaddleOCR_api/table_rec/__init__.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/table_rec/config.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/table_rec/model.py +4 -4
- paddlex/repo_apis/PaddleOCR_api/table_rec/register.py +2 -3
- paddlex/repo_apis/PaddleOCR_api/table_rec/runner.py +3 -3
- paddlex/repo_apis/PaddleOCR_api/text_det/__init__.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/text_det/config.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/text_det/model.py +4 -4
- paddlex/repo_apis/PaddleOCR_api/text_det/register.py +20 -3
- paddlex/repo_apis/PaddleOCR_api/text_det/runner.py +3 -3
- paddlex/repo_apis/PaddleOCR_api/text_rec/__init__.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/text_rec/config.py +7 -6
- paddlex/repo_apis/PaddleOCR_api/text_rec/model.py +9 -13
- paddlex/repo_apis/PaddleOCR_api/text_rec/register.py +20 -3
- paddlex/repo_apis/PaddleOCR_api/text_rec/runner.py +2 -3
- paddlex/repo_apis/PaddleSeg_api/__init__.py +1 -1
- paddlex/repo_apis/PaddleSeg_api/base_seg_config.py +2 -2
- paddlex/repo_apis/PaddleSeg_api/seg/__init__.py +1 -1
- paddlex/repo_apis/PaddleSeg_api/seg/config.py +3 -6
- paddlex/repo_apis/PaddleSeg_api/seg/model.py +6 -6
- paddlex/repo_apis/PaddleSeg_api/seg/register.py +2 -3
- paddlex/repo_apis/PaddleSeg_api/seg/runner.py +2 -3
- paddlex/repo_apis/PaddleTS_api/__init__.py +4 -3
- paddlex/repo_apis/PaddleTS_api/ts_ad/__init__.py +1 -1
- paddlex/repo_apis/PaddleTS_api/ts_ad/config.py +5 -6
- paddlex/repo_apis/PaddleTS_api/ts_ad/register.py +2 -2
- paddlex/repo_apis/PaddleTS_api/ts_ad/runner.py +2 -2
- paddlex/repo_apis/PaddleTS_api/ts_base/__init__.py +1 -1
- paddlex/repo_apis/PaddleTS_api/ts_base/config.py +2 -4
- paddlex/repo_apis/PaddleTS_api/ts_base/model.py +4 -4
- paddlex/repo_apis/PaddleTS_api/ts_base/runner.py +2 -2
- paddlex/repo_apis/PaddleTS_api/ts_cls/__init__.py +1 -1
- paddlex/repo_apis/PaddleTS_api/ts_cls/config.py +4 -5
- paddlex/repo_apis/PaddleTS_api/ts_cls/register.py +2 -2
- paddlex/repo_apis/PaddleTS_api/ts_cls/runner.py +2 -2
- paddlex/repo_apis/PaddleTS_api/ts_fc/__init__.py +1 -1
- paddlex/repo_apis/PaddleTS_api/ts_fc/config.py +6 -7
- paddlex/repo_apis/PaddleTS_api/ts_fc/register.py +1 -1
- paddlex/repo_apis/PaddleVideo_api/__init__.py +1 -1
- paddlex/repo_apis/PaddleVideo_api/config_utils.py +1 -1
- paddlex/repo_apis/PaddleVideo_api/video_cls/__init__.py +3 -3
- paddlex/repo_apis/PaddleVideo_api/video_cls/config.py +5 -4
- paddlex/repo_apis/PaddleVideo_api/video_cls/model.py +4 -4
- paddlex/repo_apis/PaddleVideo_api/video_cls/register.py +2 -3
- paddlex/repo_apis/PaddleVideo_api/video_cls/runner.py +2 -3
- paddlex/repo_apis/PaddleVideo_api/video_det/__init__.py +3 -3
- paddlex/repo_apis/PaddleVideo_api/video_det/config.py +5 -4
- paddlex/repo_apis/PaddleVideo_api/video_det/model.py +5 -5
- paddlex/repo_apis/PaddleVideo_api/video_det/register.py +2 -3
- paddlex/repo_apis/PaddleVideo_api/video_det/runner.py +2 -3
- paddlex/repo_apis/__init__.py +1 -1
- paddlex/repo_apis/base/__init__.py +4 -5
- paddlex/repo_apis/base/config.py +3 -4
- paddlex/repo_apis/base/model.py +11 -19
- paddlex/repo_apis/base/register.py +1 -1
- paddlex/repo_apis/base/runner.py +11 -12
- paddlex/repo_apis/base/utils/__init__.py +1 -1
- paddlex/repo_apis/base/utils/arg.py +1 -1
- paddlex/repo_apis/base/utils/subprocess.py +1 -1
- paddlex/repo_manager/__init__.py +2 -9
- paddlex/repo_manager/core.py +12 -30
- paddlex/repo_manager/meta.py +41 -31
- paddlex/repo_manager/repo.py +171 -161
- paddlex/repo_manager/utils.py +13 -224
- paddlex/utils/__init__.py +1 -1
- paddlex/utils/cache.py +8 -10
- paddlex/utils/config.py +6 -5
- paddlex/utils/{custom_device_whitelist.py → custom_device_list.py} +53 -199
- paddlex/utils/deps.py +249 -0
- paddlex/utils/device.py +87 -36
- paddlex/utils/download.py +4 -4
- paddlex/utils/env.py +37 -7
- paddlex/utils/errors/__init__.py +1 -1
- paddlex/utils/errors/dataset_checker.py +1 -1
- paddlex/utils/errors/others.py +2 -16
- paddlex/utils/file_interface.py +4 -5
- paddlex/utils/flags.py +17 -12
- paddlex/utils/fonts/__init__.py +36 -5
- paddlex/utils/func_register.py +1 -1
- paddlex/utils/install.py +87 -0
- paddlex/utils/interactive_get_pipeline.py +3 -3
- paddlex/utils/lazy_loader.py +3 -3
- paddlex/utils/logging.py +10 -1
- paddlex/utils/misc.py +6 -6
- paddlex/utils/pipeline_arguments.py +15 -7
- paddlex/utils/result_saver.py +4 -5
- paddlex/utils/subclass_register.py +2 -4
- paddlex/version.py +2 -1
- {paddlex-3.0.0rc0.dist-info → paddlex-3.0.1.dist-info}/METADATA +237 -102
- paddlex-3.0.1.dist-info/RECORD +1095 -0
- {paddlex-3.0.0rc0.dist-info → paddlex-3.0.1.dist-info}/WHEEL +1 -1
- paddlex/inference/models/base/predictor/basic_predictor.py +0 -139
- paddlex/paddle2onnx_requirements.txt +0 -1
- paddlex/repo_manager/requirements.txt +0 -21
- paddlex/serving_requirements.txt +0 -9
- paddlex-3.0.0rc0.dist-info/RECORD +0 -1015
- {paddlex-3.0.0rc0.dist-info → paddlex-3.0.1.dist-info}/entry_points.txt +0 -0
- {paddlex-3.0.0rc0.dist-info → paddlex-3.0.1.dist-info/licenses}/LICENSE +0 -0
- {paddlex-3.0.0rc0.dist-info → paddlex-3.0.1.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
|
|
1
|
-
#
|
1
|
+
# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -13,33 +13,52 @@
|
|
13
13
|
# limitations under the License.
|
14
14
|
from __future__ import annotations
|
15
15
|
|
16
|
-
from typing import Optional, Union, Tuple, Iterator
|
17
|
-
import numpy as np
|
18
|
-
import re
|
19
16
|
import copy
|
17
|
+
import re
|
18
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
19
|
+
|
20
|
+
import numpy as np
|
21
|
+
from PIL import Image
|
20
22
|
|
21
23
|
from ....utils import logging
|
24
|
+
from ....utils.deps import pipeline_requires_extra
|
22
25
|
from ...common.batch_sampler import ImageBatchSampler
|
23
26
|
from ...common.reader import ReadImage
|
24
27
|
from ...models.object_detection.result import DetResult
|
28
|
+
from ...utils.hpi import HPIConfig
|
25
29
|
from ...utils.pp_option import PaddlePredictorOption
|
30
|
+
from .._parallel import AutoParallelImageSimpleInferencePipeline
|
26
31
|
from ..base import BasePipeline
|
27
32
|
from ..ocr.result import OCRResult
|
28
|
-
from .result_v2 import LayoutParsingResultV2
|
29
|
-
from .
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
+
from .result_v2 import LayoutParsingBlock, LayoutParsingRegion, LayoutParsingResultV2
|
34
|
+
from .setting import BLOCK_LABEL_MAP, BLOCK_SETTINGS, LINE_SETTINGS, REGION_SETTINGS
|
35
|
+
from .utils import (
|
36
|
+
caculate_bbox_area,
|
37
|
+
calculate_minimum_enclosing_bbox,
|
38
|
+
calculate_overlap_ratio,
|
39
|
+
convert_formula_res_to_ocr_format,
|
40
|
+
format_line,
|
41
|
+
gather_imgs,
|
42
|
+
get_bbox_intersection,
|
43
|
+
get_sub_regions_ocr_res,
|
44
|
+
group_boxes_into_lines,
|
45
|
+
remove_overlap_blocks,
|
46
|
+
shrink_supplement_region_bbox,
|
47
|
+
split_boxes_by_projection,
|
48
|
+
update_region_box,
|
49
|
+
)
|
50
|
+
|
51
|
+
|
52
|
+
class _LayoutParsingPipelineV2(BasePipeline):
|
33
53
|
"""Layout Parsing Pipeline V2"""
|
34
54
|
|
35
|
-
entities = ["PP-StructureV3"]
|
36
|
-
|
37
55
|
def __init__(
|
38
56
|
self,
|
39
57
|
config: dict,
|
40
58
|
device: str = None,
|
41
59
|
pp_option: PaddlePredictorOption = None,
|
42
60
|
use_hpip: bool = False,
|
61
|
+
hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
|
43
62
|
) -> None:
|
44
63
|
"""Initializes the layout parsing pipeline.
|
45
64
|
|
@@ -47,19 +66,23 @@ class LayoutParsingPipelineV2(BasePipeline):
|
|
47
66
|
config (Dict): Configuration dictionary containing various settings.
|
48
67
|
device (str, optional): Device to run the predictions on. Defaults to None.
|
49
68
|
pp_option (PaddlePredictorOption, optional): PaddlePredictor options. Defaults to None.
|
50
|
-
use_hpip (bool, optional): Whether to use high-performance
|
69
|
+
use_hpip (bool, optional): Whether to use the high-performance
|
70
|
+
inference plugin (HPIP) by default. Defaults to False.
|
71
|
+
hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
|
72
|
+
The default high-performance inference configuration dictionary.
|
73
|
+
Defaults to None.
|
51
74
|
"""
|
52
75
|
|
53
76
|
super().__init__(
|
54
77
|
device=device,
|
55
78
|
pp_option=pp_option,
|
56
79
|
use_hpip=use_hpip,
|
80
|
+
hpi_config=hpi_config,
|
57
81
|
)
|
58
82
|
|
59
83
|
self.inintial_predictor(config)
|
60
84
|
|
61
|
-
self.batch_sampler = ImageBatchSampler(batch_size=1)
|
62
|
-
|
85
|
+
self.batch_sampler = ImageBatchSampler(batch_size=config.get("batch_size", 1))
|
63
86
|
self.img_reader = ReadImage(format="BGR")
|
64
87
|
|
65
88
|
def inintial_predictor(self, config: dict) -> None:
|
@@ -73,13 +96,20 @@ class LayoutParsingPipelineV2(BasePipeline):
|
|
73
96
|
"""
|
74
97
|
|
75
98
|
self.use_doc_preprocessor = config.get("use_doc_preprocessor", True)
|
76
|
-
self.use_general_ocr = config.get("use_general_ocr", True)
|
77
99
|
self.use_table_recognition = config.get("use_table_recognition", True)
|
78
100
|
self.use_seal_recognition = config.get("use_seal_recognition", True)
|
101
|
+
self.use_region_detection = config.get(
|
102
|
+
"use_region_detection",
|
103
|
+
True,
|
104
|
+
)
|
79
105
|
self.use_formula_recognition = config.get(
|
80
106
|
"use_formula_recognition",
|
81
107
|
True,
|
82
108
|
)
|
109
|
+
self.use_chart_recognition = config.get(
|
110
|
+
"use_chart_recognition",
|
111
|
+
False,
|
112
|
+
)
|
83
113
|
|
84
114
|
if self.use_doc_preprocessor:
|
85
115
|
doc_preprocessor_config = config.get("SubPipelines", {}).get(
|
@@ -91,6 +121,16 @@ class LayoutParsingPipelineV2(BasePipeline):
|
|
91
121
|
self.doc_preprocessor_pipeline = self.create_pipeline(
|
92
122
|
doc_preprocessor_config,
|
93
123
|
)
|
124
|
+
if self.use_region_detection:
|
125
|
+
region_detection_config = config.get("SubModules", {}).get(
|
126
|
+
"RegionDetection",
|
127
|
+
{
|
128
|
+
"model_config_error": "config error for block_region_detection_model!"
|
129
|
+
},
|
130
|
+
)
|
131
|
+
self.region_detection_model = self.create_model(
|
132
|
+
region_detection_config,
|
133
|
+
)
|
94
134
|
|
95
135
|
layout_det_config = config.get("SubModules", {}).get(
|
96
136
|
"LayoutDetection",
|
@@ -113,14 +153,13 @@ class LayoutParsingPipelineV2(BasePipeline):
|
|
113
153
|
layout_kwargs["layout_merge_bboxes_mode"] = layout_merge_bboxes_mode
|
114
154
|
self.layout_det_model = self.create_model(layout_det_config, **layout_kwargs)
|
115
155
|
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
)
|
156
|
+
general_ocr_config = config.get("SubPipelines", {}).get(
|
157
|
+
"GeneralOCR",
|
158
|
+
{"pipeline_config_error": "config error for general_ocr_pipeline!"},
|
159
|
+
)
|
160
|
+
self.general_ocr_pipeline = self.create_pipeline(
|
161
|
+
general_ocr_config,
|
162
|
+
)
|
124
163
|
|
125
164
|
if self.use_seal_recognition:
|
126
165
|
seal_recognition_config = config.get("SubPipelines", {}).get(
|
@@ -155,6 +194,17 @@ class LayoutParsingPipelineV2(BasePipeline):
|
|
155
194
|
formula_recognition_config,
|
156
195
|
)
|
157
196
|
|
197
|
+
if self.use_chart_recognition:
|
198
|
+
chart_recognition_config = config.get("SubModules", {}).get(
|
199
|
+
"ChartRecognition",
|
200
|
+
{
|
201
|
+
"model_config_error": "config error for block_region_detection_model!"
|
202
|
+
},
|
203
|
+
)
|
204
|
+
self.chart_recognition_model = self.create_model(
|
205
|
+
chart_recognition_config,
|
206
|
+
)
|
207
|
+
|
158
208
|
return
|
159
209
|
|
160
210
|
def get_text_paragraphs_ocr_res(
|
@@ -199,12 +249,6 @@ class LayoutParsingPipelineV2(BasePipeline):
|
|
199
249
|
)
|
200
250
|
return False
|
201
251
|
|
202
|
-
if input_params["use_general_ocr"] and not self.use_general_ocr:
|
203
|
-
logging.error(
|
204
|
-
"Set use_general_ocr, but the models for general OCR are not initialized.",
|
205
|
-
)
|
206
|
-
return False
|
207
|
-
|
208
252
|
if input_params["use_seal_recognition"] and not self.use_seal_recognition:
|
209
253
|
logging.error(
|
210
254
|
"Set use_seal_recognition, but the models for seal recognition are not initialized.",
|
@@ -219,159 +263,643 @@ class LayoutParsingPipelineV2(BasePipeline):
|
|
219
263
|
|
220
264
|
return True
|
221
265
|
|
222
|
-
def
|
266
|
+
def standardized_data(
|
223
267
|
self,
|
224
268
|
image: list,
|
269
|
+
region_det_res: DetResult,
|
225
270
|
layout_det_res: DetResult,
|
226
271
|
overall_ocr_res: OCRResult,
|
227
|
-
table_res_list: list,
|
228
|
-
seal_res_list: list,
|
229
272
|
formula_res_list: list,
|
230
|
-
|
231
|
-
|
232
|
-
text_det_limit_type: Optional[str] = None,
|
233
|
-
text_det_thresh: Optional[float] = None,
|
234
|
-
text_det_box_thresh: Optional[float] = None,
|
235
|
-
text_det_unclip_ratio: Optional[float] = None,
|
236
|
-
text_rec_score_thresh: Optional[float] = None,
|
273
|
+
text_rec_model: Any,
|
274
|
+
text_rec_score_thresh: Union[float, None] = None,
|
237
275
|
) -> list:
|
238
276
|
"""
|
239
277
|
Retrieves the layout parsing result based on the layout detection result, OCR result, and other recognition results.
|
240
278
|
Args:
|
241
279
|
image (list): The input image.
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
280
|
+
overall_ocr_res (OCRResult): An object containing the overall OCR results, including detected text boxes and recognized text. The structure is expected to have:
|
281
|
+
- "input_img": The image on which OCR was performed.
|
282
|
+
- "dt_boxes": A list of detected text box coordinates.
|
283
|
+
- "rec_texts": A list of recognized text corresponding to the detected boxes.
|
284
|
+
|
285
|
+
layout_det_res (DetResult): An object containing the layout detection results, including detected layout boxes and their labels. The structure is expected to have:
|
286
|
+
- "boxes": A list of dictionaries with keys "coordinate" for box coordinates and "block_label" for the type of content.
|
287
|
+
|
288
|
+
table_res_list (list): A list of table detection results, where each item is a dictionary containing:
|
289
|
+
- "block_bbox": The bounding box of the table layout.
|
290
|
+
- "pred_html": The predicted HTML representation of the table.
|
291
|
+
|
246
292
|
formula_res_list (list): A list of formula recognition results.
|
247
|
-
|
248
|
-
text_det_limit_type (Optional[str], optional): The type of limit for the text detection region. Defaults to None.
|
249
|
-
text_det_thresh (Optional[float], optional): The confidence threshold for text detection. Defaults to None.
|
250
|
-
text_det_box_thresh (Optional[float], optional): The confidence threshold for text detection bounding boxes. Defaults to None
|
251
|
-
text_det_unclip_ratio (Optional[float], optional): The unclip ratio for text detection. Defaults to None.
|
293
|
+
text_rec_model (Any): The text recognition model.
|
252
294
|
text_rec_score_thresh (Optional[float], optional): The score threshold for text recognition. Defaults to None.
|
253
295
|
Returns:
|
254
296
|
list: A list of dictionaries representing the layout parsing result.
|
255
297
|
"""
|
298
|
+
|
256
299
|
matched_ocr_dict = {}
|
257
|
-
|
300
|
+
region_to_block_map = {}
|
301
|
+
block_to_ocr_map = {}
|
258
302
|
object_boxes = []
|
259
303
|
footnote_list = []
|
260
|
-
|
304
|
+
paragraph_title_list = []
|
305
|
+
bottom_text_y_max = 0
|
306
|
+
max_block_area = 0.0
|
307
|
+
doc_title_num = 0
|
308
|
+
|
309
|
+
base_region_bbox = [65535, 65535, 0, 0]
|
310
|
+
layout_det_res = remove_overlap_blocks(
|
311
|
+
layout_det_res,
|
312
|
+
threshold=0.5,
|
313
|
+
smaller=True,
|
314
|
+
)
|
315
|
+
|
316
|
+
# convert formula_res_list to OCRResult format
|
317
|
+
convert_formula_res_to_ocr_format(formula_res_list, overall_ocr_res)
|
261
318
|
|
262
|
-
|
319
|
+
# match layout boxes and ocr boxes and get some information for layout_order_config
|
320
|
+
for box_idx, box_info in enumerate(layout_det_res["boxes"]):
|
263
321
|
box = box_info["coordinate"]
|
264
322
|
label = box_info["label"].lower()
|
265
323
|
object_boxes.append(box)
|
324
|
+
_, _, _, y2 = box
|
325
|
+
|
326
|
+
# update the region box and max_block_area according to the layout boxes
|
327
|
+
base_region_bbox = update_region_box(box, base_region_bbox)
|
328
|
+
max_block_area = max(max_block_area, caculate_bbox_area(box))
|
329
|
+
|
330
|
+
# update_layout_order_config_block_index(layout_order_config, label, box_idx)
|
266
331
|
|
267
332
|
# set the label of footnote to text, when it is above the text boxes
|
268
333
|
if label == "footnote":
|
269
|
-
footnote_list.append(
|
270
|
-
|
271
|
-
|
334
|
+
footnote_list.append(box_idx)
|
335
|
+
elif label == "paragraph_title":
|
336
|
+
paragraph_title_list.append(box_idx)
|
337
|
+
if label == "text":
|
338
|
+
bottom_text_y_max = max(y2, bottom_text_y_max)
|
339
|
+
if label == "doc_title":
|
340
|
+
doc_title_num += 1
|
272
341
|
|
273
342
|
if label not in ["formula", "table", "seal"]:
|
274
|
-
_,
|
343
|
+
_, matched_idxes = get_sub_regions_ocr_res(
|
275
344
|
overall_ocr_res, [box], return_match_idx=True
|
276
345
|
)
|
277
|
-
|
346
|
+
block_to_ocr_map[box_idx] = matched_idxes
|
347
|
+
for matched_idx in matched_idxes:
|
278
348
|
if matched_ocr_dict.get(matched_idx, None) is None:
|
279
|
-
matched_ocr_dict[matched_idx] = [
|
349
|
+
matched_ocr_dict[matched_idx] = [box_idx]
|
280
350
|
else:
|
281
|
-
matched_ocr_dict[matched_idx].append(
|
351
|
+
matched_ocr_dict[matched_idx].append(box_idx)
|
282
352
|
|
353
|
+
# fix the footnote label
|
283
354
|
for footnote_idx in footnote_list:
|
284
355
|
if (
|
285
356
|
layout_det_res["boxes"][footnote_idx]["coordinate"][3]
|
286
|
-
<
|
357
|
+
< bottom_text_y_max
|
287
358
|
):
|
288
359
|
layout_det_res["boxes"][footnote_idx]["label"] = "text"
|
289
360
|
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
361
|
+
# check if there is only one paragraph title and without doc_title
|
362
|
+
only_one_paragraph_title = len(paragraph_title_list) == 1 and doc_title_num == 0
|
363
|
+
if only_one_paragraph_title:
|
364
|
+
paragraph_title_block_area = caculate_bbox_area(
|
365
|
+
layout_det_res["boxes"][paragraph_title_list[0]]["coordinate"]
|
366
|
+
)
|
367
|
+
title_area_max_block_threshold = BLOCK_SETTINGS.get(
|
368
|
+
"title_conversion_area_ratio_threshold", 0.3
|
369
|
+
)
|
370
|
+
if (
|
371
|
+
paragraph_title_block_area
|
372
|
+
> max_block_area * title_area_max_block_threshold
|
373
|
+
):
|
374
|
+
layout_det_res["boxes"][paragraph_title_list[0]]["label"] = "doc_title"
|
375
|
+
|
376
|
+
# Replace the OCR information of the hurdles.
|
377
|
+
for overall_ocr_idx, layout_box_ids in matched_ocr_dict.items():
|
378
|
+
if len(layout_box_ids) > 1:
|
379
|
+
matched_no = 0
|
380
|
+
overall_ocr_box = copy.deepcopy(
|
381
|
+
overall_ocr_res["rec_boxes"][overall_ocr_idx]
|
382
|
+
)
|
383
|
+
overall_ocr_dt_poly = copy.deepcopy(
|
384
|
+
overall_ocr_res["dt_polys"][overall_ocr_idx]
|
385
|
+
)
|
386
|
+
for box_idx in layout_box_ids:
|
387
|
+
layout_box = layout_det_res["boxes"][box_idx]["coordinate"]
|
388
|
+
crop_box = get_bbox_intersection(overall_ocr_box, layout_box)
|
389
|
+
for ocr_idx in block_to_ocr_map[box_idx]:
|
390
|
+
ocr_box = overall_ocr_res["rec_boxes"][ocr_idx]
|
391
|
+
iou = calculate_overlap_ratio(ocr_box, crop_box, "small")
|
392
|
+
if iou > 0.8:
|
393
|
+
overall_ocr_res["rec_texts"][ocr_idx] = ""
|
394
|
+
x1, y1, x2, y2 = [int(i) for i in crop_box]
|
395
|
+
crop_img = np.array(image)[y1:y2, x1:x2]
|
396
|
+
crop_img_rec_res = list(text_rec_model([crop_img]))[0]
|
397
|
+
crop_img_dt_poly = get_bbox_intersection(
|
398
|
+
overall_ocr_dt_poly, layout_box, return_format="poly"
|
399
|
+
)
|
400
|
+
crop_img_rec_score = crop_img_rec_res["rec_score"]
|
401
|
+
crop_img_rec_text = crop_img_rec_res["rec_text"]
|
402
|
+
text_rec_score_thresh = (
|
403
|
+
text_rec_score_thresh
|
404
|
+
if text_rec_score_thresh is not None
|
405
|
+
else (self.general_ocr_pipeline.text_rec_score_thresh)
|
314
406
|
)
|
407
|
+
if crop_img_rec_score >= text_rec_score_thresh:
|
408
|
+
matched_no += 1
|
409
|
+
if matched_no == 1:
|
410
|
+
# the first matched ocr be replaced by the first matched layout box
|
411
|
+
overall_ocr_res["dt_polys"][
|
412
|
+
overall_ocr_idx
|
413
|
+
] = crop_img_dt_poly
|
414
|
+
overall_ocr_res["rec_boxes"][overall_ocr_idx] = crop_box
|
415
|
+
overall_ocr_res["rec_polys"][
|
416
|
+
overall_ocr_idx
|
417
|
+
] = crop_img_dt_poly
|
418
|
+
overall_ocr_res["rec_scores"][
|
419
|
+
overall_ocr_idx
|
420
|
+
] = crop_img_rec_score
|
421
|
+
overall_ocr_res["rec_texts"][
|
422
|
+
overall_ocr_idx
|
423
|
+
] = crop_img_rec_text
|
424
|
+
else:
|
425
|
+
# the other matched ocr be appended to the overall ocr result
|
426
|
+
overall_ocr_res["dt_polys"].append(crop_img_dt_poly)
|
427
|
+
overall_ocr_res["rec_boxes"] = np.vstack(
|
428
|
+
(overall_ocr_res["rec_boxes"], crop_box)
|
429
|
+
)
|
430
|
+
overall_ocr_res["rec_polys"].append(crop_img_dt_poly)
|
431
|
+
overall_ocr_res["rec_scores"].append(crop_img_rec_score)
|
432
|
+
overall_ocr_res["rec_texts"].append(crop_img_rec_text)
|
433
|
+
overall_ocr_res["rec_labels"].append("text")
|
434
|
+
block_to_ocr_map[box_idx].remove(overall_ocr_idx)
|
435
|
+
block_to_ocr_map[box_idx].append(
|
436
|
+
len(overall_ocr_res["rec_texts"]) - 1
|
437
|
+
)
|
438
|
+
|
439
|
+
# use layout bbox to do ocr recognition when there is no matched ocr
|
440
|
+
for layout_box_idx, overall_ocr_idxes in block_to_ocr_map.items():
|
441
|
+
has_text = False
|
442
|
+
for idx in overall_ocr_idxes:
|
443
|
+
if overall_ocr_res["rec_texts"][idx] != "":
|
444
|
+
has_text = True
|
445
|
+
break
|
446
|
+
if not has_text and layout_det_res["boxes"][layout_box_idx][
|
447
|
+
"label"
|
448
|
+
] not in BLOCK_LABEL_MAP.get("vision_labels", []):
|
449
|
+
crop_box = layout_det_res["boxes"][layout_box_idx]["coordinate"]
|
450
|
+
x1, y1, x2, y2 = [int(i) for i in crop_box]
|
451
|
+
crop_img = np.array(image)[y1:y2, x1:x2]
|
452
|
+
crop_img_rec_res = next(text_rec_model([crop_img]))
|
453
|
+
crop_img_dt_poly = get_bbox_intersection(
|
454
|
+
crop_box, crop_box, return_format="poly"
|
315
455
|
)
|
316
|
-
|
317
|
-
|
456
|
+
crop_img_rec_score = crop_img_rec_res["rec_score"]
|
457
|
+
crop_img_rec_text = crop_img_rec_res["rec_text"]
|
458
|
+
text_rec_score_thresh = (
|
459
|
+
text_rec_score_thresh
|
460
|
+
if text_rec_score_thresh is not None
|
461
|
+
else (self.general_ocr_pipeline.text_rec_score_thresh)
|
318
462
|
)
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
463
|
+
if crop_img_rec_score >= text_rec_score_thresh:
|
464
|
+
overall_ocr_res["rec_boxes"] = np.vstack(
|
465
|
+
(overall_ocr_res["rec_boxes"], crop_box)
|
466
|
+
)
|
467
|
+
overall_ocr_res["rec_polys"].append(crop_img_dt_poly)
|
468
|
+
overall_ocr_res["rec_scores"].append(crop_img_rec_score)
|
469
|
+
overall_ocr_res["rec_texts"].append(crop_img_rec_text)
|
470
|
+
overall_ocr_res["rec_labels"].append("text")
|
471
|
+
block_to_ocr_map[layout_box_idx].append(
|
472
|
+
len(overall_ocr_res["rec_texts"]) - 1
|
324
473
|
)
|
325
|
-
del overall_ocr_res["rec_polys"][matched_idx]
|
326
|
-
del overall_ocr_res["rec_scores"][matched_idx]
|
327
474
|
|
328
|
-
|
329
|
-
|
475
|
+
# when there is no layout detection result but there is ocr result, convert ocr detection result to layout detection result
|
476
|
+
if len(layout_det_res["boxes"]) == 0 and len(overall_ocr_res["rec_boxes"]) > 0:
|
477
|
+
for idx, ocr_rec_box in enumerate(overall_ocr_res["rec_boxes"]):
|
478
|
+
base_region_bbox = update_region_box(ocr_rec_box, base_region_bbox)
|
479
|
+
layout_det_res["boxes"].append(
|
480
|
+
{
|
481
|
+
"label": "text",
|
482
|
+
"coordinate": ocr_rec_box,
|
483
|
+
"score": overall_ocr_res["rec_scores"][idx],
|
484
|
+
}
|
485
|
+
)
|
486
|
+
block_to_ocr_map[idx] = [idx]
|
330
487
|
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
488
|
+
block_bboxes = [box["coordinate"] for box in layout_det_res["boxes"]]
|
489
|
+
region_det_res["boxes"] = sorted(
|
490
|
+
region_det_res["boxes"],
|
491
|
+
key=lambda item: caculate_bbox_area(item["coordinate"]),
|
492
|
+
)
|
493
|
+
if len(region_det_res["boxes"]) == 0:
|
494
|
+
region_det_res["boxes"] = [
|
495
|
+
{
|
496
|
+
"coordinate": base_region_bbox,
|
497
|
+
"label": "SupplementaryRegion",
|
498
|
+
"score": 1,
|
499
|
+
}
|
500
|
+
]
|
501
|
+
region_to_block_map[0] = range(len(block_bboxes))
|
502
|
+
else:
|
503
|
+
block_idxes_set = set(range(len(block_bboxes)))
|
504
|
+
# match block to region
|
505
|
+
for region_idx, region_info in enumerate(region_det_res["boxes"]):
|
506
|
+
matched_idxes = []
|
507
|
+
region_to_block_map[region_idx] = []
|
508
|
+
region_bbox = region_info["coordinate"]
|
509
|
+
for block_idx in block_idxes_set:
|
510
|
+
overlap_ratio = calculate_overlap_ratio(
|
511
|
+
region_bbox, block_bboxes[block_idx], mode="small"
|
512
|
+
)
|
513
|
+
if overlap_ratio > REGION_SETTINGS.get(
|
514
|
+
"match_block_overlap_ratio_threshold", 0.8
|
515
|
+
):
|
516
|
+
region_to_block_map[region_idx].append(block_idx)
|
517
|
+
matched_idxes.append(block_idx)
|
518
|
+
if len(matched_idxes) > 0:
|
519
|
+
for block_idx in matched_idxes:
|
520
|
+
block_idxes_set.remove(block_idx)
|
521
|
+
matched_bboxes = [block_bboxes[idx] for idx in matched_idxes]
|
522
|
+
new_region_bbox = calculate_minimum_enclosing_bbox(matched_bboxes)
|
523
|
+
region_det_res["boxes"][region_idx]["coordinate"] = new_region_bbox
|
524
|
+
# Supplement region when there is no matched block
|
525
|
+
if len(block_idxes_set) > 0:
|
526
|
+
while len(block_idxes_set) > 0:
|
527
|
+
matched_idxes = []
|
528
|
+
unmatched_bboxes = [block_bboxes[idx] for idx in block_idxes_set]
|
529
|
+
supplement_region_bbox = calculate_minimum_enclosing_bbox(
|
530
|
+
unmatched_bboxes
|
531
|
+
)
|
532
|
+
# check if the new region bbox is overlapped with other region bbox, if have, then shrink the new region bbox
|
533
|
+
for region_info in region_det_res["boxes"]:
|
534
|
+
region_bbox = region_info["coordinate"]
|
535
|
+
overlap_ratio = calculate_overlap_ratio(
|
536
|
+
supplement_region_bbox, region_bbox
|
537
|
+
)
|
538
|
+
if overlap_ratio > 0:
|
539
|
+
supplement_region_bbox, matched_idxes = (
|
540
|
+
shrink_supplement_region_bbox(
|
541
|
+
supplement_region_bbox,
|
542
|
+
region_bbox,
|
543
|
+
image.shape[1],
|
544
|
+
image.shape[0],
|
545
|
+
block_idxes_set,
|
546
|
+
block_bboxes,
|
547
|
+
)
|
548
|
+
)
|
549
|
+
if len(matched_idxes) == 0:
|
550
|
+
matched_idxes = list(block_idxes_set)
|
551
|
+
region_idx = len(region_det_res["boxes"])
|
552
|
+
region_to_block_map[region_idx] = list(matched_idxes)
|
553
|
+
for block_idx in matched_idxes:
|
554
|
+
block_idxes_set.remove(block_idx)
|
555
|
+
region_det_res["boxes"].append(
|
556
|
+
{
|
557
|
+
"coordinate": supplement_region_bbox,
|
558
|
+
"label": "SupplementaryRegion",
|
559
|
+
"score": 1,
|
560
|
+
}
|
561
|
+
)
|
562
|
+
|
563
|
+
region_block_ocr_idx_map = dict(
|
564
|
+
region_to_block_map=region_to_block_map,
|
565
|
+
block_to_ocr_map=block_to_ocr_map,
|
566
|
+
)
|
567
|
+
|
568
|
+
return region_block_ocr_idx_map, region_det_res, layout_det_res
|
569
|
+
|
570
|
+
def sort_line_by_projection(
|
571
|
+
self,
|
572
|
+
line: List[List[Union[List[int], str]]],
|
573
|
+
input_img: np.ndarray,
|
574
|
+
text_rec_model: Any,
|
575
|
+
text_rec_score_thresh: Union[float, None] = None,
|
576
|
+
direction: str = "vertical",
|
577
|
+
) -> None:
|
578
|
+
"""
|
579
|
+
Sort a line of text spans based on their vertical position within the layout bounding box.
|
580
|
+
|
581
|
+
Args:
|
582
|
+
line (list): A list of spans, where each span is a list containing a bounding box and text.
|
583
|
+
input_img (ndarray): The input image used for OCR.
|
584
|
+
general_ocr_pipeline (Any): The general OCR pipeline used for text recognition.
|
585
|
+
|
586
|
+
Returns:
|
587
|
+
list: The sorted line of text spans.
|
588
|
+
"""
|
589
|
+
sort_index = 0 if direction == "horizontal" else 1
|
590
|
+
splited_boxes = split_boxes_by_projection(line, direction)
|
591
|
+
splited_lines = []
|
592
|
+
if len(line) != len(splited_boxes):
|
593
|
+
splited_boxes.sort(key=lambda span: span[0][sort_index])
|
594
|
+
for span in splited_boxes:
|
595
|
+
bbox, text, label = span
|
596
|
+
if label == "text":
|
597
|
+
crop_img = input_img[
|
598
|
+
int(bbox[1]) : int(bbox[3]),
|
599
|
+
int(bbox[0]) : int(bbox[2]),
|
600
|
+
]
|
601
|
+
crop_img_rec_res = list(text_rec_model([crop_img]))[0]
|
602
|
+
crop_img_rec_score = crop_img_rec_res["rec_score"]
|
603
|
+
crop_img_rec_text = crop_img_rec_res["rec_text"]
|
604
|
+
text = (
|
605
|
+
crop_img_rec_text
|
606
|
+
if crop_img_rec_score >= text_rec_score_thresh
|
607
|
+
else ""
|
335
608
|
)
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
609
|
+
span[1] = text
|
610
|
+
|
611
|
+
splited_lines.append(span)
|
612
|
+
else:
|
613
|
+
splited_lines = line
|
614
|
+
|
615
|
+
return splited_lines
|
616
|
+
|
617
|
+
def get_block_rec_content(
|
618
|
+
self,
|
619
|
+
image: list,
|
620
|
+
ocr_rec_res: dict,
|
621
|
+
block: LayoutParsingBlock,
|
622
|
+
text_rec_model: Any,
|
623
|
+
text_rec_score_thresh: Union[float, None] = None,
|
624
|
+
) -> str:
|
625
|
+
|
626
|
+
if len(ocr_rec_res["rec_texts"]) == 0:
|
627
|
+
block.content = ""
|
628
|
+
return block
|
629
|
+
|
630
|
+
lines, text_direction, text_line_height = group_boxes_into_lines(
|
631
|
+
ocr_rec_res,
|
632
|
+
LINE_SETTINGS.get("line_height_iou_threshold", 0.8),
|
633
|
+
)
|
634
|
+
|
635
|
+
# format line
|
636
|
+
text_lines = []
|
637
|
+
need_new_line_num = 0
|
638
|
+
# words start coordinate and stop coordinate in the line
|
639
|
+
words_start_index = 0 if text_direction == "horizontal" else 1
|
640
|
+
words_stop_index = words_start_index + 2
|
641
|
+
lines_start_index = 1 if text_direction == "horizontal" else 3
|
642
|
+
line_width_list = []
|
643
|
+
|
644
|
+
if block.label == "reference":
|
645
|
+
rec_boxes = ocr_rec_res["boxes"]
|
646
|
+
block_start_coordinate = min([box[words_start_index] for box in rec_boxes])
|
647
|
+
block_stop_coordinate = max([box[words_stop_index] for box in rec_boxes])
|
648
|
+
else:
|
649
|
+
block_start_coordinate = block.bbox[words_start_index]
|
650
|
+
block_stop_coordinate = block.bbox[words_stop_index]
|
651
|
+
|
652
|
+
for idx, line in enumerate(lines):
|
653
|
+
line.sort(
|
654
|
+
key=lambda span: (
|
655
|
+
span[0][words_start_index] // 2,
|
656
|
+
(
|
657
|
+
span[0][lines_start_index]
|
658
|
+
if text_direction == "horizontal"
|
659
|
+
else -span[0][lines_start_index]
|
660
|
+
),
|
661
|
+
)
|
662
|
+
)
|
663
|
+
|
664
|
+
line_width = line[-1][0][words_stop_index] - line[0][0][words_start_index]
|
665
|
+
line_width_list.append(line_width)
|
666
|
+
# merge formula and text
|
667
|
+
ocr_labels = [span[2] for span in line]
|
668
|
+
if "formula" in ocr_labels:
|
669
|
+
line = self.sort_line_by_projection(
|
670
|
+
line, image, text_rec_model, text_rec_score_thresh, text_direction
|
671
|
+
)
|
672
|
+
|
673
|
+
line_text, need_new_line = format_line(
|
674
|
+
line,
|
675
|
+
text_direction,
|
676
|
+
np.max(line_width_list),
|
677
|
+
block_start_coordinate,
|
678
|
+
block_stop_coordinate,
|
679
|
+
line_gap_limit=text_line_height * 1.5,
|
680
|
+
block_label=block.label,
|
681
|
+
)
|
682
|
+
if need_new_line:
|
683
|
+
need_new_line_num += 1
|
684
|
+
if idx == 0:
|
685
|
+
line_start_coordinate = line[0][0][0]
|
686
|
+
block.seg_start_coordinate = line_start_coordinate
|
687
|
+
elif idx == len(lines) - 1:
|
688
|
+
line_end_coordinate = line[-1][0][2]
|
689
|
+
block.seg_end_coordinate = line_end_coordinate
|
690
|
+
text_lines.append(line_text)
|
691
|
+
|
692
|
+
delim = LINE_SETTINGS["delimiter_map"].get(block.label, "")
|
693
|
+
if need_new_line_num > len(text_lines) * 0.5 and delim == "":
|
694
|
+
text_lines = [text.replace("\n", "") for text in text_lines]
|
695
|
+
delim = "\n"
|
696
|
+
content = delim.join(text_lines)
|
697
|
+
block.content = content
|
698
|
+
block.num_of_lines = len(text_lines)
|
699
|
+
block.direction = text_direction
|
700
|
+
block.text_line_height = text_line_height
|
701
|
+
block.text_line_width = np.mean(line_width_list)
|
702
|
+
|
703
|
+
return block
|
704
|
+
|
705
|
+
def get_layout_parsing_blocks(
|
706
|
+
self,
|
707
|
+
image: list,
|
708
|
+
region_block_ocr_idx_map: dict,
|
709
|
+
region_det_res: DetResult,
|
710
|
+
overall_ocr_res: OCRResult,
|
711
|
+
layout_det_res: DetResult,
|
712
|
+
table_res_list: list,
|
713
|
+
seal_res_list: list,
|
714
|
+
chart_res_list: list,
|
715
|
+
text_rec_model: Any,
|
716
|
+
text_rec_score_thresh: Union[float, None] = None,
|
717
|
+
) -> list:
|
718
|
+
"""
|
719
|
+
Extract structured information from OCR and layout detection results.
|
720
|
+
|
721
|
+
Args:
|
722
|
+
image (list): The input image.
|
723
|
+
overall_ocr_res (OCRResult): An object containing the overall OCR results, including detected text boxes and recognized text. The structure is expected to have:
|
724
|
+
- "input_img": The image on which OCR was performed.
|
725
|
+
- "dt_boxes": A list of detected text box coordinates.
|
726
|
+
- "rec_texts": A list of recognized text corresponding to the detected boxes.
|
727
|
+
|
728
|
+
layout_det_res (DetResult): An object containing the layout detection results, including detected layout boxes and their labels. The structure is expected to have:
|
729
|
+
- "boxes": A list of dictionaries with keys "coordinate" for box coordinates and "block_label" for the type of content.
|
730
|
+
|
731
|
+
table_res_list (list): A list of table detection results, where each item is a dictionary containing:
|
732
|
+
- "block_bbox": The bounding box of the table layout.
|
733
|
+
- "pred_html": The predicted HTML representation of the table.
|
734
|
+
|
735
|
+
seal_res_list (List): A list of seal detection results. The details of each item depend on the specific application context.
|
736
|
+
text_rec_model (Any): A model for text recognition.
|
737
|
+
text_rec_score_thresh (Union[float, None]): The minimum score required for a recognized character to be considered valid. If None, use the default value specified during initialization. Default is None.
|
738
|
+
|
739
|
+
Returns:
|
740
|
+
list: A list of structured boxes where each item is a dictionary containing:
|
741
|
+
- "block_label": The label of the content (e.g., 'table', 'chart', 'image').
|
742
|
+
- The label as a key with either table HTML or image data and text.
|
743
|
+
- "block_bbox": The coordinates of the layout box.
|
744
|
+
"""
|
745
|
+
|
746
|
+
table_index = 0
|
747
|
+
seal_index = 0
|
748
|
+
chart_index = 0
|
749
|
+
layout_parsing_blocks: List[LayoutParsingBlock] = []
|
750
|
+
|
751
|
+
for box_idx, box_info in enumerate(layout_det_res["boxes"]):
|
752
|
+
|
753
|
+
label = box_info["label"]
|
754
|
+
block_bbox = box_info["coordinate"]
|
755
|
+
rec_res = {"boxes": [], "rec_texts": [], "rec_labels": []}
|
756
|
+
|
757
|
+
block = LayoutParsingBlock(label=label, bbox=block_bbox)
|
758
|
+
|
759
|
+
if label == "table" and len(table_res_list) > 0:
|
760
|
+
block.content = table_res_list[table_index]["pred_html"]
|
761
|
+
table_index += 1
|
762
|
+
elif label == "seal" and len(seal_res_list) > 0:
|
763
|
+
block.content = "\n".join(seal_res_list[seal_index]["rec_texts"])
|
764
|
+
seal_index += 1
|
765
|
+
elif label == "chart" and len(chart_res_list) > 0:
|
766
|
+
block.content = chart_res_list[chart_index]
|
767
|
+
chart_index += 1
|
768
|
+
else:
|
769
|
+
if label == "formula":
|
770
|
+
_, ocr_idx_list = get_sub_regions_ocr_res(
|
771
|
+
overall_ocr_res, [block_bbox], return_match_idx=True
|
772
|
+
)
|
773
|
+
region_block_ocr_idx_map["block_to_ocr_map"][box_idx] = ocr_idx_list
|
774
|
+
else:
|
775
|
+
ocr_idx_list = region_block_ocr_idx_map["block_to_ocr_map"].get(
|
776
|
+
box_idx, []
|
777
|
+
)
|
778
|
+
for box_no in ocr_idx_list:
|
779
|
+
rec_res["boxes"].append(overall_ocr_res["rec_boxes"][box_no])
|
780
|
+
rec_res["rec_texts"].append(
|
781
|
+
overall_ocr_res["rec_texts"][box_no],
|
782
|
+
)
|
783
|
+
rec_res["rec_labels"].append(
|
784
|
+
overall_ocr_res["rec_labels"][box_no],
|
785
|
+
)
|
786
|
+
block = self.get_block_rec_content(
|
787
|
+
image=image,
|
788
|
+
block=block,
|
789
|
+
ocr_rec_res=rec_res,
|
790
|
+
text_rec_model=text_rec_model,
|
791
|
+
text_rec_score_thresh=text_rec_score_thresh,
|
792
|
+
)
|
793
|
+
|
794
|
+
if (
|
795
|
+
label
|
796
|
+
in ["seal", "table", "formula", "chart"]
|
797
|
+
+ BLOCK_LABEL_MAP["image_labels"]
|
798
|
+
):
|
799
|
+
x_min, y_min, x_max, y_max = list(map(int, block_bbox))
|
800
|
+
img_path = (
|
801
|
+
f"imgs/img_in_{block.label}_box_{x_min}_{y_min}_{x_max}_{y_max}.jpg"
|
802
|
+
)
|
803
|
+
img = Image.fromarray(image[y_min:y_max, x_min:x_max, ::-1])
|
804
|
+
block.image = {"path": img_path, "img": img}
|
805
|
+
|
806
|
+
layout_parsing_blocks.append(block)
|
807
|
+
|
808
|
+
region_list: List[LayoutParsingRegion] = []
|
809
|
+
for region_idx, region_info in enumerate(region_det_res["boxes"]):
|
810
|
+
region_bbox = region_info["coordinate"]
|
811
|
+
region_blocks = [
|
812
|
+
layout_parsing_blocks[idx]
|
813
|
+
for idx in region_block_ocr_idx_map["region_to_block_map"][region_idx]
|
347
814
|
]
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
815
|
+
region = LayoutParsingRegion(
|
816
|
+
bbox=region_bbox,
|
817
|
+
blocks=region_blocks,
|
818
|
+
image_shape=image.shape[:2],
|
819
|
+
)
|
820
|
+
region_list.append(region)
|
821
|
+
|
822
|
+
region_list = sorted(
|
823
|
+
region_list,
|
824
|
+
key=lambda r: (r.weighted_distance),
|
825
|
+
)
|
826
|
+
|
827
|
+
return region_list
|
828
|
+
|
829
|
+
def get_layout_parsing_res(
|
830
|
+
self,
|
831
|
+
image: list,
|
832
|
+
region_det_res: DetResult,
|
833
|
+
layout_det_res: DetResult,
|
834
|
+
overall_ocr_res: OCRResult,
|
835
|
+
table_res_list: list,
|
836
|
+
seal_res_list: list,
|
837
|
+
chart_res_list: list,
|
838
|
+
formula_res_list: list,
|
839
|
+
text_rec_score_thresh: Union[float, None] = None,
|
840
|
+
) -> list:
|
841
|
+
"""
|
842
|
+
Retrieves the layout parsing result based on the layout detection result, OCR result, and other recognition results.
|
843
|
+
Args:
|
844
|
+
image (list): The input image.
|
845
|
+
layout_det_res (DetResult): The detection result containing the layout information of the document.
|
846
|
+
overall_ocr_res (OCRResult): The overall OCR result containing text information.
|
847
|
+
table_res_list (list): A list of table recognition results.
|
848
|
+
seal_res_list (list): A list of seal recognition results.
|
849
|
+
formula_res_list (list): A list of formula recognition results.
|
850
|
+
text_rec_score_thresh (Optional[float], optional): The score threshold for text recognition. Defaults to None.
|
851
|
+
Returns:
|
852
|
+
list: A list of dictionaries representing the layout parsing result.
|
853
|
+
"""
|
854
|
+
|
855
|
+
# Standardize data
|
856
|
+
region_block_ocr_idx_map, region_det_res, layout_det_res = (
|
857
|
+
self.standardized_data(
|
858
|
+
image=image,
|
859
|
+
region_det_res=region_det_res,
|
860
|
+
layout_det_res=layout_det_res,
|
861
|
+
overall_ocr_res=overall_ocr_res,
|
862
|
+
formula_res_list=formula_res_list,
|
863
|
+
text_rec_model=self.general_ocr_pipeline.text_rec_model,
|
864
|
+
text_rec_score_thresh=text_rec_score_thresh,
|
352
865
|
)
|
353
|
-
|
354
|
-
overall_ocr_res["rec_polys"].append(poly_points)
|
355
|
-
overall_ocr_res["rec_scores"].append(1)
|
866
|
+
)
|
356
867
|
|
357
|
-
|
358
|
-
|
868
|
+
# Format layout parsing block
|
869
|
+
region_list = self.get_layout_parsing_blocks(
|
870
|
+
image=image,
|
871
|
+
region_block_ocr_idx_map=region_block_ocr_idx_map,
|
872
|
+
region_det_res=region_det_res,
|
359
873
|
overall_ocr_res=overall_ocr_res,
|
360
874
|
layout_det_res=layout_det_res,
|
361
875
|
table_res_list=table_res_list,
|
362
876
|
seal_res_list=seal_res_list,
|
877
|
+
chart_res_list=chart_res_list,
|
878
|
+
text_rec_model=self.general_ocr_pipeline.text_rec_model,
|
879
|
+
text_rec_score_thresh=self.general_ocr_pipeline.text_rec_score_thresh,
|
363
880
|
)
|
364
881
|
|
882
|
+
parsing_res_list = []
|
883
|
+
for region in region_list:
|
884
|
+
parsing_res_list.extend(region.sort())
|
885
|
+
|
886
|
+
index = 1
|
887
|
+
for block in parsing_res_list:
|
888
|
+
if block.label in BLOCK_LABEL_MAP["visualize_index_labels"]:
|
889
|
+
block.order_index = index
|
890
|
+
index += 1
|
891
|
+
|
365
892
|
return parsing_res_list
|
366
893
|
|
367
894
|
def get_model_settings(
|
368
895
|
self,
|
369
896
|
use_doc_orientation_classify: Union[bool, None],
|
370
897
|
use_doc_unwarping: Union[bool, None],
|
371
|
-
use_general_ocr: Union[bool, None],
|
372
898
|
use_seal_recognition: Union[bool, None],
|
373
899
|
use_table_recognition: Union[bool, None],
|
374
900
|
use_formula_recognition: Union[bool, None],
|
901
|
+
use_chart_recognition: Union[bool, None],
|
902
|
+
use_region_detection: Union[bool, None],
|
375
903
|
) -> dict:
|
376
904
|
"""
|
377
905
|
Get the model settings based on the provided parameters or default values.
|
@@ -379,7 +907,6 @@ class LayoutParsingPipelineV2(BasePipeline):
|
|
379
907
|
Args:
|
380
908
|
use_doc_orientation_classify (Union[bool, None]): Enables document orientation classification if True. Defaults to system setting if None.
|
381
909
|
use_doc_unwarping (Union[bool, None]): Enables document unwarping if True. Defaults to system setting if None.
|
382
|
-
use_general_ocr (Union[bool, None]): Enables general OCR if True. Defaults to system setting if None.
|
383
910
|
use_seal_recognition (Union[bool, None]): Enables seal recognition if True. Defaults to system setting if None.
|
384
911
|
use_table_recognition (Union[bool, None]): Enables table recognition if True. Defaults to system setting if None.
|
385
912
|
use_formula_recognition (Union[bool, None]): Enables formula recognition if True. Defaults to system setting if None.
|
@@ -396,9 +923,6 @@ class LayoutParsingPipelineV2(BasePipeline):
|
|
396
923
|
else:
|
397
924
|
use_doc_preprocessor = False
|
398
925
|
|
399
|
-
if use_general_ocr is None:
|
400
|
-
use_general_ocr = self.use_general_ocr
|
401
|
-
|
402
926
|
if use_seal_recognition is None:
|
403
927
|
use_seal_recognition = self.use_seal_recognition
|
404
928
|
|
@@ -408,24 +932,32 @@ class LayoutParsingPipelineV2(BasePipeline):
|
|
408
932
|
if use_formula_recognition is None:
|
409
933
|
use_formula_recognition = self.use_formula_recognition
|
410
934
|
|
935
|
+
if use_region_detection is None:
|
936
|
+
use_region_detection = self.use_region_detection
|
937
|
+
|
938
|
+
if use_chart_recognition is None:
|
939
|
+
use_chart_recognition = self.use_chart_recognition
|
940
|
+
|
411
941
|
return dict(
|
412
942
|
use_doc_preprocessor=use_doc_preprocessor,
|
413
|
-
use_general_ocr=use_general_ocr,
|
414
943
|
use_seal_recognition=use_seal_recognition,
|
415
944
|
use_table_recognition=use_table_recognition,
|
416
945
|
use_formula_recognition=use_formula_recognition,
|
946
|
+
use_chart_recognition=use_chart_recognition,
|
947
|
+
use_region_detection=use_region_detection,
|
417
948
|
)
|
418
949
|
|
419
950
|
def predict(
|
420
951
|
self,
|
421
952
|
input: Union[str, list[str], np.ndarray, list[np.ndarray]],
|
422
|
-
use_doc_orientation_classify: Union[bool, None] =
|
423
|
-
use_doc_unwarping: Union[bool, None] =
|
953
|
+
use_doc_orientation_classify: Union[bool, None] = False,
|
954
|
+
use_doc_unwarping: Union[bool, None] = False,
|
424
955
|
use_textline_orientation: Optional[bool] = None,
|
425
|
-
use_general_ocr: Union[bool, None] = None,
|
426
956
|
use_seal_recognition: Union[bool, None] = None,
|
427
957
|
use_table_recognition: Union[bool, None] = None,
|
428
958
|
use_formula_recognition: Union[bool, None] = None,
|
959
|
+
use_chart_recognition: Union[bool, None] = False,
|
960
|
+
use_region_detection: Union[bool, None] = None,
|
429
961
|
layout_threshold: Optional[Union[float, dict]] = None,
|
430
962
|
layout_nms: Optional[bool] = None,
|
431
963
|
layout_unclip_ratio: Optional[Union[float, Tuple[float, float], dict]] = None,
|
@@ -442,7 +974,10 @@ class LayoutParsingPipelineV2(BasePipeline):
|
|
442
974
|
seal_det_box_thresh: Union[float, None] = None,
|
443
975
|
seal_det_unclip_ratio: Union[float, None] = None,
|
444
976
|
seal_rec_score_thresh: Union[float, None] = None,
|
445
|
-
|
977
|
+
use_wired_table_cells_trans_to_html: bool = False,
|
978
|
+
use_wireless_table_cells_trans_to_html: bool = False,
|
979
|
+
use_table_orientation_classify: bool = True,
|
980
|
+
use_ocr_results_with_table_cells: bool = True,
|
446
981
|
use_e2e_wired_table_rec_model: bool = False,
|
447
982
|
use_e2e_wireless_table_rec_model: bool = True,
|
448
983
|
**kwargs,
|
@@ -454,10 +989,10 @@ class LayoutParsingPipelineV2(BasePipeline):
|
|
454
989
|
use_doc_orientation_classify (Optional[bool]): Whether to use document orientation classification.
|
455
990
|
use_doc_unwarping (Optional[bool]): Whether to use document unwarping.
|
456
991
|
use_textline_orientation (Optional[bool]): Whether to use textline orientation prediction.
|
457
|
-
use_general_ocr (Optional[bool]): Whether to use general OCR.
|
458
992
|
use_seal_recognition (Optional[bool]): Whether to use seal recognition.
|
459
993
|
use_table_recognition (Optional[bool]): Whether to use table recognition.
|
460
994
|
use_formula_recognition (Optional[bool]): Whether to use formula recognition.
|
995
|
+
use_region_detection (Optional[bool]): Whether to use region detection.
|
461
996
|
layout_threshold (Optional[float]): The threshold value to filter out low-confidence predictions. Default is None.
|
462
997
|
layout_nms (bool, optional): Whether to use layout-aware NMS. Defaults to False.
|
463
998
|
layout_unclip_ratio (Optional[Union[float, Tuple[float, float]]], optional): The ratio of unclipping the bounding box.
|
@@ -478,7 +1013,10 @@ class LayoutParsingPipelineV2(BasePipeline):
|
|
478
1013
|
seal_det_box_thresh (Optional[float]): Threshold for seal detection boxes.
|
479
1014
|
seal_det_unclip_ratio (Optional[float]): Ratio for unclipping seal detection boxes.
|
480
1015
|
seal_rec_score_thresh (Optional[float]): Score threshold for seal recognition.
|
481
|
-
|
1016
|
+
use_wired_table_cells_trans_to_html (bool): Whether to use wired table cells trans to HTML.
|
1017
|
+
use_wireless_table_cells_trans_to_html (bool): Whether to use wireless table cells trans to HTML.
|
1018
|
+
use_table_orientation_classify (bool): Whether to use table orientation classification.
|
1019
|
+
use_ocr_results_with_table_cells (bool): Whether to use OCR results processed by table cells.
|
482
1020
|
use_e2e_wired_table_rec_model (bool): Whether to use end-to-end wired table recognition model.
|
483
1021
|
use_e2e_wireless_table_rec_model (bool): Whether to use end-to-end wireless table recognition model.
|
484
1022
|
**kwargs (Any): Additional settings to extend functionality.
|
@@ -490,150 +1028,204 @@ class LayoutParsingPipelineV2(BasePipeline):
|
|
490
1028
|
model_settings = self.get_model_settings(
|
491
1029
|
use_doc_orientation_classify,
|
492
1030
|
use_doc_unwarping,
|
493
|
-
use_general_ocr,
|
494
1031
|
use_seal_recognition,
|
495
1032
|
use_table_recognition,
|
496
1033
|
use_formula_recognition,
|
1034
|
+
use_chart_recognition,
|
1035
|
+
use_region_detection,
|
497
1036
|
)
|
498
1037
|
|
499
1038
|
if not self.check_model_settings_valid(model_settings):
|
500
1039
|
yield {"error": "the input params for model settings are invalid!"}
|
501
1040
|
|
502
1041
|
for batch_data in self.batch_sampler(input):
|
503
|
-
|
1042
|
+
image_arrays = self.img_reader(batch_data.instances)
|
504
1043
|
|
505
1044
|
if model_settings["use_doc_preprocessor"]:
|
506
|
-
|
1045
|
+
doc_preprocessor_results = list(
|
507
1046
|
self.doc_preprocessor_pipeline(
|
508
|
-
|
1047
|
+
image_arrays,
|
509
1048
|
use_doc_orientation_classify=use_doc_orientation_classify,
|
510
1049
|
use_doc_unwarping=use_doc_unwarping,
|
511
|
-
)
|
1050
|
+
)
|
512
1051
|
)
|
513
1052
|
else:
|
514
|
-
|
1053
|
+
doc_preprocessor_results = [{"output_img": arr} for arr in image_arrays]
|
515
1054
|
|
516
|
-
|
1055
|
+
doc_preprocessor_images = [
|
1056
|
+
item["output_img"] for item in doc_preprocessor_results
|
1057
|
+
]
|
517
1058
|
|
518
|
-
|
1059
|
+
layout_det_results = list(
|
519
1060
|
self.layout_det_model(
|
520
|
-
|
1061
|
+
doc_preprocessor_images,
|
521
1062
|
threshold=layout_threshold,
|
522
1063
|
layout_nms=layout_nms,
|
523
1064
|
layout_unclip_ratio=layout_unclip_ratio,
|
524
1065
|
layout_merge_bboxes_mode=layout_merge_bboxes_mode,
|
525
1066
|
)
|
526
1067
|
)
|
527
|
-
imgs_in_doc =
|
1068
|
+
imgs_in_doc = [
|
1069
|
+
gather_imgs(img, res["boxes"])
|
1070
|
+
for img, res in zip(doc_preprocessor_images, layout_det_results)
|
1071
|
+
]
|
1072
|
+
|
1073
|
+
if model_settings["use_region_detection"]:
|
1074
|
+
region_det_results = list(
|
1075
|
+
self.region_detection_model(
|
1076
|
+
doc_preprocessor_images,
|
1077
|
+
layout_nms=True,
|
1078
|
+
layout_merge_bboxes_mode="small",
|
1079
|
+
),
|
1080
|
+
)
|
1081
|
+
else:
|
1082
|
+
region_det_results = [{"boxes": []} for _ in doc_preprocessor_images]
|
528
1083
|
|
529
1084
|
if model_settings["use_formula_recognition"]:
|
530
|
-
formula_res_all =
|
1085
|
+
formula_res_all = list(
|
531
1086
|
self.formula_recognition_pipeline(
|
532
|
-
|
1087
|
+
doc_preprocessor_images,
|
533
1088
|
use_layout_detection=False,
|
534
1089
|
use_doc_orientation_classify=False,
|
535
1090
|
use_doc_unwarping=False,
|
536
|
-
layout_det_res=
|
1091
|
+
layout_det_res=layout_det_results,
|
537
1092
|
),
|
538
1093
|
)
|
539
|
-
|
1094
|
+
formula_res_lists = [
|
1095
|
+
item["formula_res_list"] for item in formula_res_all
|
1096
|
+
]
|
540
1097
|
else:
|
541
|
-
|
1098
|
+
formula_res_lists = [[] for _ in doc_preprocessor_images]
|
542
1099
|
|
543
|
-
for
|
544
|
-
|
545
|
-
doc_preprocessor_image[y_min:y_max, x_min:x_max, :] = 255.0
|
546
|
-
|
547
|
-
if (
|
548
|
-
model_settings["use_general_ocr"]
|
549
|
-
or model_settings["use_table_recognition"]
|
1100
|
+
for doc_preprocessor_image, formula_res_list in zip(
|
1101
|
+
doc_preprocessor_images, formula_res_lists
|
550
1102
|
):
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
|
557
|
-
|
558
|
-
|
559
|
-
|
560
|
-
|
561
|
-
|
562
|
-
|
563
|
-
|
564
|
-
|
1103
|
+
for formula_res in formula_res_list:
|
1104
|
+
x_min, y_min, x_max, y_max = list(map(int, formula_res["dt_polys"]))
|
1105
|
+
doc_preprocessor_image[y_min:y_max, x_min:x_max, :] = 255.0
|
1106
|
+
|
1107
|
+
overall_ocr_results = list(
|
1108
|
+
self.general_ocr_pipeline(
|
1109
|
+
doc_preprocessor_images,
|
1110
|
+
use_textline_orientation=use_textline_orientation,
|
1111
|
+
text_det_limit_side_len=text_det_limit_side_len,
|
1112
|
+
text_det_limit_type=text_det_limit_type,
|
1113
|
+
text_det_thresh=text_det_thresh,
|
1114
|
+
text_det_box_thresh=text_det_box_thresh,
|
1115
|
+
text_det_unclip_ratio=text_det_unclip_ratio,
|
1116
|
+
text_rec_score_thresh=text_rec_score_thresh,
|
1117
|
+
),
|
1118
|
+
)
|
565
1119
|
|
566
|
-
overall_ocr_res
|
1120
|
+
for overall_ocr_res in overall_ocr_results:
|
1121
|
+
overall_ocr_res["rec_labels"] = ["text"] * len(
|
1122
|
+
overall_ocr_res["rec_texts"]
|
1123
|
+
)
|
567
1124
|
|
568
1125
|
if model_settings["use_table_recognition"]:
|
569
|
-
|
570
|
-
for
|
571
|
-
|
572
|
-
|
573
|
-
|
574
|
-
|
575
|
-
|
576
|
-
|
577
|
-
|
578
|
-
|
579
|
-
|
580
|
-
|
581
|
-
|
582
|
-
|
583
|
-
|
1126
|
+
table_res_lists = []
|
1127
|
+
for (
|
1128
|
+
layout_det_res,
|
1129
|
+
doc_preprocessor_image,
|
1130
|
+
overall_ocr_res,
|
1131
|
+
formula_res_list,
|
1132
|
+
imgs_in_doc_for_img,
|
1133
|
+
) in zip(
|
1134
|
+
layout_det_results,
|
1135
|
+
doc_preprocessor_images,
|
1136
|
+
overall_ocr_results,
|
1137
|
+
formula_res_lists,
|
1138
|
+
imgs_in_doc,
|
1139
|
+
):
|
1140
|
+
table_contents_for_img = copy.deepcopy(overall_ocr_res)
|
1141
|
+
for formula_res in formula_res_list:
|
1142
|
+
x_min, y_min, x_max, y_max = list(
|
1143
|
+
map(int, formula_res["dt_polys"])
|
1144
|
+
)
|
1145
|
+
poly_points = [
|
1146
|
+
(x_min, y_min),
|
1147
|
+
(x_max, y_min),
|
1148
|
+
(x_max, y_max),
|
1149
|
+
(x_min, y_max),
|
1150
|
+
]
|
1151
|
+
table_contents_for_img["dt_polys"].append(poly_points)
|
1152
|
+
rec_formula = formula_res["rec_formula"]
|
1153
|
+
if not rec_formula.startswith("$") or not rec_formula.endswith(
|
1154
|
+
"$"
|
1155
|
+
):
|
1156
|
+
rec_formula = f"${rec_formula}$"
|
1157
|
+
table_contents_for_img["rec_texts"].append(f"{rec_formula}")
|
1158
|
+
if table_contents_for_img["rec_boxes"].size == 0:
|
1159
|
+
table_contents_for_img["rec_boxes"] = np.array(
|
1160
|
+
[formula_res["dt_polys"]]
|
1161
|
+
)
|
1162
|
+
else:
|
1163
|
+
table_contents_for_img["rec_boxes"] = np.vstack(
|
1164
|
+
(
|
1165
|
+
table_contents_for_img["rec_boxes"],
|
1166
|
+
[formula_res["dt_polys"]],
|
1167
|
+
)
|
1168
|
+
)
|
1169
|
+
table_contents_for_img["rec_polys"].append(poly_points)
|
1170
|
+
table_contents_for_img["rec_scores"].append(1)
|
1171
|
+
|
1172
|
+
for img in imgs_in_doc_for_img:
|
1173
|
+
img_path = img["path"]
|
1174
|
+
x_min, y_min, x_max, y_max = img["coordinate"]
|
1175
|
+
poly_points = [
|
1176
|
+
(x_min, y_min),
|
1177
|
+
(x_max, y_min),
|
1178
|
+
(x_max, y_max),
|
1179
|
+
(x_min, y_max),
|
1180
|
+
]
|
1181
|
+
table_contents_for_img["dt_polys"].append(poly_points)
|
1182
|
+
table_contents_for_img["rec_texts"].append(
|
1183
|
+
f'<div style="text-align: center;"><img src="{img_path}" alt="Image" /></div>'
|
1184
|
+
)
|
1185
|
+
if table_contents_for_img["rec_boxes"].size == 0:
|
1186
|
+
table_contents_for_img["rec_boxes"] = np.array(
|
1187
|
+
[img["coordinate"]]
|
1188
|
+
)
|
1189
|
+
else:
|
1190
|
+
table_contents_for_img["rec_boxes"] = np.vstack(
|
1191
|
+
(table_contents_for_img["rec_boxes"], img["coordinate"])
|
1192
|
+
)
|
1193
|
+
table_contents_for_img["rec_polys"].append(poly_points)
|
1194
|
+
table_contents_for_img["rec_scores"].append(img["score"])
|
1195
|
+
|
1196
|
+
table_res_all = list(
|
1197
|
+
self.table_recognition_pipeline(
|
1198
|
+
doc_preprocessor_image,
|
1199
|
+
use_doc_orientation_classify=False,
|
1200
|
+
use_doc_unwarping=False,
|
1201
|
+
use_layout_detection=False,
|
1202
|
+
use_ocr_model=False,
|
1203
|
+
overall_ocr_res=table_contents_for_img,
|
1204
|
+
layout_det_res=layout_det_res,
|
1205
|
+
cell_sort_by_y_projection=True,
|
1206
|
+
use_wired_table_cells_trans_to_html=use_wired_table_cells_trans_to_html,
|
1207
|
+
use_wireless_table_cells_trans_to_html=use_wireless_table_cells_trans_to_html,
|
1208
|
+
use_table_orientation_classify=use_table_orientation_classify,
|
1209
|
+
use_ocr_results_with_table_cells=use_ocr_results_with_table_cells,
|
1210
|
+
use_e2e_wired_table_rec_model=use_e2e_wired_table_rec_model,
|
1211
|
+
use_e2e_wireless_table_rec_model=use_e2e_wireless_table_rec_model,
|
1212
|
+
),
|
584
1213
|
)
|
585
|
-
|
586
|
-
|
587
|
-
|
588
|
-
for img in imgs_in_doc:
|
589
|
-
img_path = img["path"]
|
590
|
-
x_min, y_min, x_max, y_max = img["coordinate"]
|
591
|
-
poly_points = [
|
592
|
-
(x_min, y_min),
|
593
|
-
(x_max, y_min),
|
594
|
-
(x_max, y_max),
|
595
|
-
(x_min, y_max),
|
1214
|
+
single_table_res_lists = [
|
1215
|
+
item["table_res_list"] for item in table_res_all
|
596
1216
|
]
|
597
|
-
|
598
|
-
table_contents["rec_texts"].append(
|
599
|
-
f'<div style="text-align: center;"><img src="{img_path}" alt="Image" /></div>'
|
600
|
-
)
|
601
|
-
if table_contents["rec_boxes"].size == 0:
|
602
|
-
table_contents["rec_boxes"] = np.array([img["coordinate"]])
|
603
|
-
else:
|
604
|
-
table_contents["rec_boxes"] = np.vstack(
|
605
|
-
(table_contents["rec_boxes"], img["coordinate"])
|
606
|
-
)
|
607
|
-
table_contents["rec_polys"].append(poly_points)
|
608
|
-
table_contents["rec_scores"].append(img["score"])
|
609
|
-
|
610
|
-
table_res_all = next(
|
611
|
-
self.table_recognition_pipeline(
|
612
|
-
doc_preprocessor_image,
|
613
|
-
use_doc_orientation_classify=False,
|
614
|
-
use_doc_unwarping=False,
|
615
|
-
use_layout_detection=False,
|
616
|
-
use_ocr_model=False,
|
617
|
-
overall_ocr_res=table_contents,
|
618
|
-
layout_det_res=layout_det_res,
|
619
|
-
cell_sort_by_y_projection=True,
|
620
|
-
use_table_cells_ocr_results=use_table_cells_ocr_results,
|
621
|
-
use_e2e_wired_table_rec_model=use_e2e_wired_table_rec_model,
|
622
|
-
use_e2e_wireless_table_rec_model=use_e2e_wireless_table_rec_model,
|
623
|
-
),
|
624
|
-
)
|
625
|
-
table_res_list = table_res_all["table_res_list"]
|
1217
|
+
table_res_lists.extend(single_table_res_lists)
|
626
1218
|
else:
|
627
|
-
|
1219
|
+
table_res_lists = [[] for _ in doc_preprocessor_images]
|
628
1220
|
|
629
1221
|
if model_settings["use_seal_recognition"]:
|
630
|
-
seal_res_all =
|
1222
|
+
seal_res_all = list(
|
631
1223
|
self.seal_recognition_pipeline(
|
632
|
-
|
1224
|
+
doc_preprocessor_images,
|
633
1225
|
use_doc_orientation_classify=False,
|
634
1226
|
use_doc_unwarping=False,
|
635
1227
|
use_layout_detection=False,
|
636
|
-
layout_det_res=
|
1228
|
+
layout_det_res=layout_det_results,
|
637
1229
|
seal_det_limit_side_len=seal_det_limit_side_len,
|
638
1230
|
seal_det_limit_type=seal_det_limit_type,
|
639
1231
|
seal_det_thresh=seal_det_thresh,
|
@@ -642,46 +1234,85 @@ class LayoutParsingPipelineV2(BasePipeline):
|
|
642
1234
|
seal_rec_score_thresh=seal_rec_score_thresh,
|
643
1235
|
),
|
644
1236
|
)
|
645
|
-
|
1237
|
+
seal_res_lists = [item["seal_res_list"] for item in seal_res_all]
|
646
1238
|
else:
|
647
|
-
|
1239
|
+
seal_res_lists = [[] for _ in doc_preprocessor_images]
|
648
1240
|
|
649
|
-
|
1241
|
+
for (
|
1242
|
+
input_path,
|
1243
|
+
page_index,
|
650
1244
|
doc_preprocessor_image,
|
651
|
-
|
652
|
-
|
653
|
-
|
654
|
-
|
655
|
-
|
656
|
-
|
657
|
-
|
658
|
-
|
659
|
-
|
660
|
-
|
661
|
-
|
662
|
-
|
663
|
-
|
1245
|
+
doc_preprocessor_res,
|
1246
|
+
layout_det_res,
|
1247
|
+
region_det_res,
|
1248
|
+
overall_ocr_res,
|
1249
|
+
table_res_list,
|
1250
|
+
seal_res_list,
|
1251
|
+
formula_res_list,
|
1252
|
+
imgs_in_doc_for_img,
|
1253
|
+
) in zip(
|
1254
|
+
batch_data.input_paths,
|
1255
|
+
batch_data.page_indexes,
|
1256
|
+
doc_preprocessor_images,
|
1257
|
+
doc_preprocessor_results,
|
1258
|
+
layout_det_results,
|
1259
|
+
region_det_results,
|
1260
|
+
overall_ocr_results,
|
1261
|
+
table_res_lists,
|
1262
|
+
seal_res_lists,
|
1263
|
+
formula_res_lists,
|
1264
|
+
imgs_in_doc,
|
1265
|
+
):
|
1266
|
+
chart_res_list = []
|
1267
|
+
if model_settings["use_chart_recognition"]:
|
1268
|
+
chart_imgs_list = []
|
1269
|
+
for bbox in layout_det_res["boxes"]:
|
1270
|
+
if bbox["label"] == "chart":
|
1271
|
+
x_min, y_min, x_max, y_max = bbox["coordinate"]
|
1272
|
+
chart_img = doc_preprocessor_image[
|
1273
|
+
int(y_min) : int(y_max), int(x_min) : int(x_max), :
|
1274
|
+
]
|
1275
|
+
chart_imgs_list.append({"image": chart_img})
|
1276
|
+
|
1277
|
+
for chart_res_batch in self.chart_recognition_model(
|
1278
|
+
input=chart_imgs_list
|
1279
|
+
):
|
1280
|
+
chart_res_list.append(chart_res_batch["result"])
|
1281
|
+
|
1282
|
+
parsing_res_list = self.get_layout_parsing_res(
|
1283
|
+
doc_preprocessor_image,
|
1284
|
+
region_det_res=region_det_res,
|
1285
|
+
layout_det_res=layout_det_res,
|
1286
|
+
overall_ocr_res=overall_ocr_res,
|
1287
|
+
table_res_list=table_res_list,
|
1288
|
+
seal_res_list=seal_res_list,
|
1289
|
+
chart_res_list=chart_res_list,
|
1290
|
+
formula_res_list=formula_res_list,
|
1291
|
+
text_rec_score_thresh=text_rec_score_thresh,
|
1292
|
+
)
|
664
1293
|
|
665
|
-
|
666
|
-
|
667
|
-
|
668
|
-
|
669
|
-
|
1294
|
+
for formula_res in formula_res_list:
|
1295
|
+
x_min, y_min, x_max, y_max = list(map(int, formula_res["dt_polys"]))
|
1296
|
+
doc_preprocessor_image[y_min:y_max, x_min:x_max, :] = formula_res[
|
1297
|
+
"input_img"
|
1298
|
+
]
|
670
1299
|
|
671
|
-
|
672
|
-
|
673
|
-
|
674
|
-
|
675
|
-
|
676
|
-
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
|
1300
|
+
single_img_res = {
|
1301
|
+
"input_path": input_path,
|
1302
|
+
"page_index": page_index,
|
1303
|
+
"doc_preprocessor_res": doc_preprocessor_res,
|
1304
|
+
"layout_det_res": layout_det_res,
|
1305
|
+
"region_det_res": region_det_res,
|
1306
|
+
"overall_ocr_res": overall_ocr_res,
|
1307
|
+
"table_res_list": table_res_list,
|
1308
|
+
"seal_res_list": seal_res_list,
|
1309
|
+
"chart_res_list": chart_res_list,
|
1310
|
+
"formula_res_list": formula_res_list,
|
1311
|
+
"parsing_res_list": parsing_res_list,
|
1312
|
+
"imgs_in_doc": imgs_in_doc_for_img,
|
1313
|
+
"model_settings": model_settings,
|
1314
|
+
}
|
1315
|
+
yield LayoutParsingResultV2(single_img_res)
|
685
1316
|
|
686
1317
|
def concatenate_markdown_pages(self, markdown_list: list) -> tuple:
|
687
1318
|
"""
|
@@ -737,3 +1368,15 @@ class LayoutParsingPipelineV2(BasePipeline):
|
|
737
1368
|
)
|
738
1369
|
|
739
1370
|
return markdown_texts
|
1371
|
+
|
1372
|
+
|
1373
|
+
@pipeline_requires_extra("ocr")
|
1374
|
+
class LayoutParsingPipelineV2(AutoParallelImageSimpleInferencePipeline):
|
1375
|
+
entities = ["PP-StructureV3"]
|
1376
|
+
|
1377
|
+
@property
|
1378
|
+
def _pipeline_cls(self):
|
1379
|
+
return _LayoutParsingPipelineV2
|
1380
|
+
|
1381
|
+
def _get_batch_size(self, config):
|
1382
|
+
return config.get("batch_size", 1)
|