paddlex 3.0.0rc0__py3-none-any.whl → 3.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- paddlex/.version +1 -1
- paddlex/__init__.py +17 -34
- paddlex/__main__.py +1 -1
- paddlex/configs/modules/chart_parsing/PP-Chart2Table.yaml +13 -0
- paddlex/configs/modules/doc_vlm/PP-DocBee-2B.yaml +14 -0
- paddlex/configs/modules/doc_vlm/PP-DocBee-7B.yaml +14 -0
- paddlex/configs/modules/doc_vlm/PP-DocBee2-3B.yaml +14 -0
- paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-L.yaml +40 -0
- paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-M.yaml +40 -0
- paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-S.yaml +40 -0
- paddlex/configs/modules/layout_detection/PP-DocBlockLayout.yaml +40 -0
- paddlex/configs/modules/layout_detection/PP-DocLayout-L.yaml +2 -2
- paddlex/configs/modules/layout_detection/PP-DocLayout-M.yaml +2 -2
- paddlex/configs/modules/layout_detection/PP-DocLayout-S.yaml +2 -2
- paddlex/configs/modules/layout_detection/PP-DocLayout_plus-L.yaml +40 -0
- paddlex/configs/modules/open_vocabulary_detection/YOLO-Worldv2-L.yaml +13 -0
- paddlex/configs/modules/text_detection/PP-OCRv5_mobile_det.yaml +40 -0
- paddlex/configs/modules/text_detection/PP-OCRv5_server_det.yaml +40 -0
- paddlex/configs/modules/text_recognition/PP-OCRv5_mobile_rec.yaml +39 -0
- paddlex/configs/modules/text_recognition/PP-OCRv5_server_rec.yaml +39 -0
- paddlex/configs/modules/textline_orientation/PP-LCNet_x1_0_textline_ori.yaml +41 -0
- paddlex/configs/pipelines/OCR.yaml +7 -6
- paddlex/configs/pipelines/PP-ChatOCRv3-doc.yaml +3 -1
- paddlex/configs/pipelines/PP-ChatOCRv4-doc.yaml +91 -34
- paddlex/configs/pipelines/PP-StructureV3.yaml +72 -72
- paddlex/configs/pipelines/anomaly_detection.yaml +1 -1
- paddlex/configs/pipelines/doc_understanding.yaml +9 -0
- paddlex/configs/pipelines/formula_recognition.yaml +2 -2
- paddlex/configs/pipelines/layout_parsing.yaml +3 -2
- paddlex/configs/pipelines/seal_recognition.yaml +1 -0
- paddlex/configs/pipelines/table_recognition.yaml +2 -1
- paddlex/configs/pipelines/table_recognition_v2.yaml +7 -1
- paddlex/configs/pipelines/ts_anomaly_detection.yaml +1 -1
- paddlex/configs/pipelines/ts_classification.yaml +1 -1
- paddlex/configs/pipelines/ts_forecast.yaml +1 -1
- paddlex/constants.py +17 -0
- paddlex/engine.py +7 -5
- paddlex/hpip_links.html +23 -11
- paddlex/inference/__init__.py +3 -3
- paddlex/inference/common/__init__.py +1 -1
- paddlex/inference/common/batch_sampler/__init__.py +5 -4
- paddlex/inference/common/batch_sampler/audio_batch_sampler.py +5 -6
- paddlex/inference/common/batch_sampler/base_batch_sampler.py +20 -16
- paddlex/inference/common/batch_sampler/det_3d_batch_sampler.py +4 -7
- paddlex/inference/common/batch_sampler/doc_vlm_batch_sampler.py +87 -0
- paddlex/inference/common/batch_sampler/image_batch_sampler.py +45 -60
- paddlex/inference/common/batch_sampler/ts_batch_sampler.py +9 -10
- paddlex/inference/common/batch_sampler/video_batch_sampler.py +2 -22
- paddlex/inference/common/reader/__init__.py +4 -4
- paddlex/inference/common/reader/audio_reader.py +3 -3
- paddlex/inference/common/reader/det_3d_reader.py +7 -5
- paddlex/inference/common/reader/image_reader.py +16 -12
- paddlex/inference/common/reader/ts_reader.py +3 -2
- paddlex/inference/common/reader/video_reader.py +3 -3
- paddlex/inference/common/result/__init__.py +7 -7
- paddlex/inference/common/result/base_cv_result.py +12 -2
- paddlex/inference/common/result/base_result.py +7 -5
- paddlex/inference/common/result/base_ts_result.py +1 -2
- paddlex/inference/common/result/base_video_result.py +2 -2
- paddlex/inference/common/result/mixin.py +31 -25
- paddlex/inference/models/__init__.py +41 -85
- paddlex/inference/models/anomaly_detection/__init__.py +1 -1
- paddlex/inference/models/anomaly_detection/predictor.py +9 -19
- paddlex/inference/models/anomaly_detection/processors.py +9 -2
- paddlex/inference/models/anomaly_detection/result.py +3 -2
- paddlex/inference/models/base/__init__.py +2 -2
- paddlex/inference/models/base/predictor/__init__.py +1 -2
- paddlex/inference/models/base/predictor/base_predictor.py +278 -39
- paddlex/inference/models/common/__init__.py +6 -15
- paddlex/inference/models/common/static_infer.py +724 -251
- paddlex/inference/models/common/tokenizer/__init__.py +7 -3
- paddlex/inference/models/common/tokenizer/bert_tokenizer.py +1 -1
- paddlex/inference/models/common/tokenizer/clip_tokenizer.py +609 -0
- paddlex/inference/models/common/tokenizer/gpt_tokenizer.py +9 -7
- paddlex/inference/models/common/tokenizer/qwen2_5_tokenizer.py +112 -0
- paddlex/inference/models/common/tokenizer/qwen2_tokenizer.py +438 -0
- paddlex/inference/models/common/tokenizer/qwen_tokenizer.py +288 -0
- paddlex/inference/models/common/tokenizer/tokenizer_utils.py +85 -77
- paddlex/inference/models/common/tokenizer/tokenizer_utils_base.py +339 -123
- paddlex/inference/models/common/tokenizer/utils.py +1 -1
- paddlex/inference/models/common/tokenizer/vocab.py +8 -8
- paddlex/inference/models/common/ts/__init__.py +1 -1
- paddlex/inference/models/common/ts/funcs.py +13 -6
- paddlex/inference/models/common/ts/processors.py +14 -5
- paddlex/inference/models/common/vision/__init__.py +3 -3
- paddlex/inference/models/common/vision/funcs.py +17 -12
- paddlex/inference/models/common/vision/processors.py +61 -46
- paddlex/inference/models/common/vlm/__init__.py +13 -0
- paddlex/inference/models/common/vlm/activations.py +189 -0
- paddlex/inference/models/common/vlm/bert_padding.py +127 -0
- paddlex/inference/models/common/vlm/conversion_utils.py +99 -0
- paddlex/inference/models/common/vlm/distributed.py +229 -0
- paddlex/inference/models/common/vlm/flash_attn_utils.py +119 -0
- paddlex/inference/models/common/vlm/fusion_ops.py +205 -0
- paddlex/inference/models/common/vlm/generation/__init__.py +34 -0
- paddlex/inference/models/common/vlm/generation/configuration_utils.py +533 -0
- paddlex/inference/models/common/vlm/generation/logits_process.py +730 -0
- paddlex/inference/models/common/vlm/generation/stopping_criteria.py +106 -0
- paddlex/inference/models/common/vlm/generation/utils.py +2162 -0
- paddlex/inference/models/common/vlm/transformers/__init__.py +16 -0
- paddlex/inference/models/common/vlm/transformers/configuration_utils.py +1037 -0
- paddlex/inference/models/common/vlm/transformers/conversion_utils.py +408 -0
- paddlex/inference/models/common/vlm/transformers/model_outputs.py +1612 -0
- paddlex/inference/models/common/vlm/transformers/model_utils.py +2014 -0
- paddlex/inference/models/common/vlm/transformers/utils.py +178 -0
- paddlex/inference/models/common/vlm/utils.py +109 -0
- paddlex/inference/models/doc_vlm/__init__.py +15 -0
- paddlex/inference/models/doc_vlm/modeling/GOT_ocr_2_0.py +830 -0
- paddlex/inference/models/doc_vlm/modeling/__init__.py +17 -0
- paddlex/inference/models/doc_vlm/modeling/qwen2.py +1606 -0
- paddlex/inference/models/doc_vlm/modeling/qwen2_5_vl.py +3006 -0
- paddlex/inference/models/doc_vlm/modeling/qwen2_vl.py +2495 -0
- paddlex/inference/models/doc_vlm/predictor.py +253 -0
- paddlex/inference/models/doc_vlm/processors/GOT_ocr_2_0.py +97 -0
- paddlex/inference/models/doc_vlm/processors/__init__.py +17 -0
- paddlex/inference/models/doc_vlm/processors/common.py +561 -0
- paddlex/inference/models/doc_vlm/processors/qwen2_5_vl.py +548 -0
- paddlex/inference/models/doc_vlm/processors/qwen2_vl.py +543 -0
- paddlex/inference/models/doc_vlm/result.py +21 -0
- paddlex/inference/models/face_feature/__init__.py +1 -1
- paddlex/inference/models/face_feature/predictor.py +2 -1
- paddlex/inference/models/formula_recognition/__init__.py +1 -1
- paddlex/inference/models/formula_recognition/predictor.py +18 -28
- paddlex/inference/models/formula_recognition/processors.py +126 -97
- paddlex/inference/models/formula_recognition/result.py +43 -35
- paddlex/inference/models/image_classification/__init__.py +1 -1
- paddlex/inference/models/image_classification/predictor.py +9 -19
- paddlex/inference/models/image_classification/processors.py +4 -2
- paddlex/inference/models/image_classification/result.py +4 -3
- paddlex/inference/models/image_feature/__init__.py +1 -1
- paddlex/inference/models/image_feature/predictor.py +9 -19
- paddlex/inference/models/image_feature/processors.py +7 -5
- paddlex/inference/models/image_feature/result.py +2 -3
- paddlex/inference/models/image_multilabel_classification/__init__.py +1 -1
- paddlex/inference/models/image_multilabel_classification/predictor.py +7 -6
- paddlex/inference/models/image_multilabel_classification/processors.py +6 -2
- paddlex/inference/models/image_multilabel_classification/result.py +4 -3
- paddlex/inference/models/image_unwarping/__init__.py +1 -1
- paddlex/inference/models/image_unwarping/predictor.py +8 -16
- paddlex/inference/models/image_unwarping/processors.py +6 -2
- paddlex/inference/models/image_unwarping/result.py +4 -2
- paddlex/inference/models/instance_segmentation/__init__.py +1 -1
- paddlex/inference/models/instance_segmentation/predictor.py +7 -15
- paddlex/inference/models/instance_segmentation/processors.py +4 -7
- paddlex/inference/models/instance_segmentation/result.py +11 -10
- paddlex/inference/models/keypoint_detection/__init__.py +1 -1
- paddlex/inference/models/keypoint_detection/predictor.py +5 -3
- paddlex/inference/models/keypoint_detection/processors.py +11 -3
- paddlex/inference/models/keypoint_detection/result.py +9 -4
- paddlex/inference/models/{3d_bev_detection → m_3d_bev_detection}/__init__.py +1 -1
- paddlex/inference/models/{3d_bev_detection → m_3d_bev_detection}/predictor.py +15 -26
- paddlex/inference/models/{3d_bev_detection → m_3d_bev_detection}/processors.py +26 -14
- paddlex/inference/models/{3d_bev_detection → m_3d_bev_detection}/result.py +15 -12
- paddlex/inference/models/{3d_bev_detection → m_3d_bev_detection}/visualizer_3d.py +77 -39
- paddlex/inference/models/multilingual_speech_recognition/__init__.py +1 -1
- paddlex/inference/models/multilingual_speech_recognition/predictor.py +11 -15
- paddlex/inference/models/multilingual_speech_recognition/processors.py +45 -53
- paddlex/inference/models/multilingual_speech_recognition/result.py +1 -1
- paddlex/inference/models/object_detection/__init__.py +1 -1
- paddlex/inference/models/object_detection/predictor.py +8 -12
- paddlex/inference/models/object_detection/processors.py +63 -33
- paddlex/inference/models/object_detection/result.py +5 -4
- paddlex/inference/models/object_detection/utils.py +3 -1
- paddlex/inference/models/open_vocabulary_detection/__init__.py +1 -1
- paddlex/inference/models/open_vocabulary_detection/predictor.py +31 -14
- paddlex/inference/models/open_vocabulary_detection/processors/__init__.py +3 -2
- paddlex/inference/models/open_vocabulary_detection/processors/common.py +114 -0
- paddlex/inference/models/open_vocabulary_detection/processors/groundingdino_processors.py +19 -8
- paddlex/inference/models/open_vocabulary_detection/processors/yoloworld_processors.py +209 -0
- paddlex/inference/models/open_vocabulary_segmentation/__init__.py +1 -1
- paddlex/inference/models/open_vocabulary_segmentation/predictor.py +6 -13
- paddlex/inference/models/open_vocabulary_segmentation/processors/__init__.py +1 -1
- paddlex/inference/models/open_vocabulary_segmentation/processors/sam_processer.py +12 -12
- paddlex/inference/models/open_vocabulary_segmentation/results/__init__.py +1 -1
- paddlex/inference/models/open_vocabulary_segmentation/results/sam_result.py +11 -9
- paddlex/inference/models/semantic_segmentation/__init__.py +1 -1
- paddlex/inference/models/semantic_segmentation/predictor.py +9 -18
- paddlex/inference/models/semantic_segmentation/processors.py +11 -8
- paddlex/inference/models/semantic_segmentation/result.py +4 -3
- paddlex/inference/models/table_structure_recognition/__init__.py +1 -1
- paddlex/inference/models/table_structure_recognition/predictor.py +8 -18
- paddlex/inference/models/table_structure_recognition/processors.py +23 -29
- paddlex/inference/models/table_structure_recognition/result.py +8 -15
- paddlex/inference/models/text_detection/__init__.py +1 -1
- paddlex/inference/models/text_detection/predictor.py +24 -24
- paddlex/inference/models/text_detection/processors.py +116 -44
- paddlex/inference/models/text_detection/result.py +8 -13
- paddlex/inference/models/text_recognition/__init__.py +1 -1
- paddlex/inference/models/text_recognition/predictor.py +11 -19
- paddlex/inference/models/text_recognition/processors.py +27 -13
- paddlex/inference/models/text_recognition/result.py +3 -2
- paddlex/inference/models/ts_anomaly_detection/__init__.py +1 -1
- paddlex/inference/models/ts_anomaly_detection/predictor.py +12 -17
- paddlex/inference/models/ts_anomaly_detection/processors.py +6 -2
- paddlex/inference/models/ts_anomaly_detection/result.py +21 -10
- paddlex/inference/models/ts_classification/__init__.py +1 -1
- paddlex/inference/models/ts_classification/predictor.py +14 -27
- paddlex/inference/models/ts_classification/processors.py +7 -2
- paddlex/inference/models/ts_classification/result.py +21 -12
- paddlex/inference/models/ts_forecasting/__init__.py +1 -1
- paddlex/inference/models/ts_forecasting/predictor.py +13 -18
- paddlex/inference/models/ts_forecasting/processors.py +12 -3
- paddlex/inference/models/ts_forecasting/result.py +24 -11
- paddlex/inference/models/video_classification/__init__.py +1 -1
- paddlex/inference/models/video_classification/predictor.py +9 -15
- paddlex/inference/models/video_classification/processors.py +24 -24
- paddlex/inference/models/video_classification/result.py +7 -3
- paddlex/inference/models/video_detection/__init__.py +1 -1
- paddlex/inference/models/video_detection/predictor.py +8 -15
- paddlex/inference/models/video_detection/processors.py +24 -11
- paddlex/inference/models/video_detection/result.py +10 -5
- paddlex/inference/pipelines/__init__.py +48 -37
- paddlex/inference/pipelines/_parallel.py +172 -0
- paddlex/inference/pipelines/anomaly_detection/__init__.py +1 -1
- paddlex/inference/pipelines/anomaly_detection/pipeline.py +29 -9
- paddlex/inference/pipelines/attribute_recognition/__init__.py +1 -1
- paddlex/inference/pipelines/attribute_recognition/pipeline.py +24 -9
- paddlex/inference/pipelines/attribute_recognition/result.py +10 -8
- paddlex/inference/pipelines/base.py +43 -13
- paddlex/inference/pipelines/components/__init__.py +14 -8
- paddlex/inference/pipelines/components/chat_server/__init__.py +1 -1
- paddlex/inference/pipelines/components/chat_server/base.py +2 -2
- paddlex/inference/pipelines/components/chat_server/openai_bot_chat.py +8 -8
- paddlex/inference/pipelines/components/common/__init__.py +5 -4
- paddlex/inference/pipelines/components/common/base_operator.py +2 -1
- paddlex/inference/pipelines/components/common/base_result.py +3 -2
- paddlex/inference/pipelines/components/common/convert_points_and_boxes.py +1 -2
- paddlex/inference/pipelines/components/common/crop_image_regions.py +11 -5
- paddlex/inference/pipelines/components/common/seal_det_warp.py +44 -13
- paddlex/inference/pipelines/components/common/sort_boxes.py +4 -2
- paddlex/inference/pipelines/components/common/warp_image.py +50 -0
- paddlex/inference/pipelines/components/faisser.py +10 -5
- paddlex/inference/pipelines/components/prompt_engineering/__init__.py +2 -2
- paddlex/inference/pipelines/components/prompt_engineering/base.py +2 -2
- paddlex/inference/pipelines/components/prompt_engineering/generate_ensemble_prompt.py +2 -1
- paddlex/inference/pipelines/components/prompt_engineering/generate_kie_prompt.py +2 -2
- paddlex/inference/pipelines/components/retriever/__init__.py +2 -2
- paddlex/inference/pipelines/components/retriever/base.py +18 -16
- paddlex/inference/pipelines/components/retriever/openai_bot_retriever.py +2 -2
- paddlex/inference/pipelines/components/retriever/qianfan_bot_retriever.py +87 -84
- paddlex/inference/pipelines/components/utils/__init__.py +1 -1
- paddlex/inference/pipelines/components/utils/mixin.py +7 -7
- paddlex/inference/pipelines/doc_preprocessor/__init__.py +1 -1
- paddlex/inference/pipelines/doc_preprocessor/pipeline.py +70 -51
- paddlex/inference/pipelines/doc_preprocessor/result.py +5 -10
- paddlex/inference/pipelines/doc_understanding/__init__.py +15 -0
- paddlex/inference/pipelines/doc_understanding/pipeline.py +71 -0
- paddlex/inference/pipelines/face_recognition/__init__.py +1 -1
- paddlex/inference/pipelines/face_recognition/pipeline.py +3 -1
- paddlex/inference/pipelines/face_recognition/result.py +3 -2
- paddlex/inference/pipelines/formula_recognition/__init__.py +1 -1
- paddlex/inference/pipelines/formula_recognition/pipeline.py +137 -93
- paddlex/inference/pipelines/formula_recognition/result.py +20 -29
- paddlex/inference/pipelines/image_classification/__init__.py +1 -1
- paddlex/inference/pipelines/image_classification/pipeline.py +30 -11
- paddlex/inference/pipelines/image_multilabel_classification/__init__.py +1 -1
- paddlex/inference/pipelines/image_multilabel_classification/pipeline.py +31 -12
- paddlex/inference/pipelines/instance_segmentation/__init__.py +1 -1
- paddlex/inference/pipelines/instance_segmentation/pipeline.py +30 -9
- paddlex/inference/pipelines/keypoint_detection/__init__.py +1 -1
- paddlex/inference/pipelines/keypoint_detection/pipeline.py +30 -9
- paddlex/inference/pipelines/layout_parsing/__init__.py +1 -1
- paddlex/inference/pipelines/layout_parsing/pipeline.py +54 -56
- paddlex/inference/pipelines/layout_parsing/pipeline_v2.py +904 -261
- paddlex/inference/pipelines/layout_parsing/result.py +9 -21
- paddlex/inference/pipelines/layout_parsing/result_v2.py +525 -250
- paddlex/inference/pipelines/layout_parsing/setting.py +87 -0
- paddlex/inference/pipelines/layout_parsing/utils.py +570 -2004
- paddlex/inference/pipelines/layout_parsing/xycut_enhanced/__init__.py +16 -0
- paddlex/inference/pipelines/layout_parsing/xycut_enhanced/utils.py +1144 -0
- paddlex/inference/pipelines/layout_parsing/xycut_enhanced/xycuts.py +563 -0
- paddlex/inference/pipelines/{3d_bev_detection → m_3d_bev_detection}/__init__.py +1 -1
- paddlex/inference/pipelines/{3d_bev_detection → m_3d_bev_detection}/pipeline.py +17 -10
- paddlex/inference/pipelines/multilingual_speech_recognition/__init__.py +1 -1
- paddlex/inference/pipelines/multilingual_speech_recognition/pipeline.py +17 -6
- paddlex/inference/pipelines/object_detection/__init__.py +1 -1
- paddlex/inference/pipelines/object_detection/pipeline.py +29 -9
- paddlex/inference/pipelines/ocr/__init__.py +1 -1
- paddlex/inference/pipelines/ocr/pipeline.py +151 -77
- paddlex/inference/pipelines/ocr/result.py +31 -24
- paddlex/inference/pipelines/open_vocabulary_detection/__init__.py +1 -1
- paddlex/inference/pipelines/open_vocabulary_detection/pipeline.py +17 -6
- paddlex/inference/pipelines/open_vocabulary_segmentation/__init__.py +1 -1
- paddlex/inference/pipelines/open_vocabulary_segmentation/pipeline.py +17 -6
- paddlex/inference/pipelines/pp_chatocr/__init__.py +1 -1
- paddlex/inference/pipelines/pp_chatocr/pipeline_base.py +14 -5
- paddlex/inference/pipelines/pp_chatocr/pipeline_v3.py +22 -14
- paddlex/inference/pipelines/pp_chatocr/pipeline_v4.py +34 -16
- paddlex/inference/pipelines/pp_shitu_v2/__init__.py +1 -1
- paddlex/inference/pipelines/pp_shitu_v2/pipeline.py +12 -8
- paddlex/inference/pipelines/pp_shitu_v2/result.py +4 -4
- paddlex/inference/pipelines/rotated_object_detection/__init__.py +1 -1
- paddlex/inference/pipelines/rotated_object_detection/pipeline.py +30 -9
- paddlex/inference/pipelines/seal_recognition/__init__.py +1 -1
- paddlex/inference/pipelines/seal_recognition/pipeline.py +127 -63
- paddlex/inference/pipelines/seal_recognition/result.py +4 -2
- paddlex/inference/pipelines/semantic_segmentation/__init__.py +1 -1
- paddlex/inference/pipelines/semantic_segmentation/pipeline.py +30 -9
- paddlex/inference/pipelines/small_object_detection/__init__.py +1 -1
- paddlex/inference/pipelines/small_object_detection/pipeline.py +30 -9
- paddlex/inference/pipelines/table_recognition/__init__.py +1 -1
- paddlex/inference/pipelines/table_recognition/pipeline.py +61 -37
- paddlex/inference/pipelines/table_recognition/pipeline_v2.py +668 -65
- paddlex/inference/pipelines/table_recognition/result.py +12 -10
- paddlex/inference/pipelines/table_recognition/table_recognition_post_processing.py +12 -8
- paddlex/inference/pipelines/table_recognition/table_recognition_post_processing_v2.py +55 -37
- paddlex/inference/pipelines/table_recognition/utils.py +1 -1
- paddlex/inference/pipelines/ts_anomaly_detection/__init__.py +1 -1
- paddlex/inference/pipelines/ts_anomaly_detection/pipeline.py +16 -6
- paddlex/inference/pipelines/ts_classification/__init__.py +1 -1
- paddlex/inference/pipelines/ts_classification/pipeline.py +16 -6
- paddlex/inference/pipelines/ts_forecasting/__init__.py +1 -1
- paddlex/inference/pipelines/ts_forecasting/pipeline.py +16 -6
- paddlex/inference/pipelines/video_classification/__init__.py +1 -1
- paddlex/inference/pipelines/video_classification/pipeline.py +17 -6
- paddlex/inference/pipelines/video_detection/__init__.py +1 -1
- paddlex/inference/pipelines/video_detection/pipeline.py +20 -7
- paddlex/inference/serving/__init__.py +5 -1
- paddlex/inference/serving/basic_serving/__init__.py +1 -1
- paddlex/inference/serving/basic_serving/_app.py +31 -19
- paddlex/inference/serving/basic_serving/_pipeline_apps/__init__.py +7 -4
- paddlex/inference/serving/basic_serving/_pipeline_apps/_common/__init__.py +1 -1
- paddlex/inference/serving/basic_serving/_pipeline_apps/_common/common.py +12 -4
- paddlex/inference/serving/basic_serving/_pipeline_apps/_common/image_recognition.py +1 -1
- paddlex/inference/serving/basic_serving/_pipeline_apps/_common/ocr.py +7 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/anomaly_detection.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/doc_preprocessor.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/doc_understanding.py +153 -0
- paddlex/inference/serving/basic_serving/_pipeline_apps/face_recognition.py +16 -13
- paddlex/inference/serving/basic_serving/_pipeline_apps/formula_recognition.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/human_keypoint_detection.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/image_classification.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/image_multilabel_classification.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/instance_segmentation.py +13 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/layout_parsing.py +10 -8
- paddlex/inference/serving/basic_serving/_pipeline_apps/m_3d_bev_detection.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/multilingual_speech_recognition.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/object_detection.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/ocr.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/open_vocabulary_detection.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/open_vocabulary_segmentation.py +13 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/pedestrian_attribute_recognition.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv3_doc.py +14 -12
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv4_doc.py +17 -14
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_shituv2.py +16 -13
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_structurev3.py +16 -9
- paddlex/inference/serving/basic_serving/_pipeline_apps/rotated_object_detection.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/seal_recognition.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/semantic_segmentation.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/small_object_detection.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition.py +11 -12
- paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition_v2.py +14 -12
- paddlex/inference/serving/basic_serving/_pipeline_apps/ts_anomaly_detection.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/ts_classification.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/ts_forecast.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/vehicle_attribute_recognition.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/video_classification.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/video_detection.py +10 -7
- paddlex/inference/serving/basic_serving/_server.py +9 -4
- paddlex/inference/serving/infra/__init__.py +1 -1
- paddlex/inference/serving/infra/config.py +1 -1
- paddlex/inference/serving/infra/models.py +13 -6
- paddlex/inference/serving/infra/storage.py +9 -4
- paddlex/inference/serving/infra/utils.py +54 -28
- paddlex/inference/serving/schemas/__init__.py +1 -1
- paddlex/inference/serving/schemas/anomaly_detection.py +1 -1
- paddlex/inference/serving/schemas/doc_preprocessor.py +1 -1
- paddlex/inference/serving/schemas/doc_understanding.py +78 -0
- paddlex/inference/serving/schemas/face_recognition.py +1 -1
- paddlex/inference/serving/schemas/formula_recognition.py +2 -2
- paddlex/inference/serving/schemas/human_keypoint_detection.py +1 -1
- paddlex/inference/serving/schemas/image_classification.py +1 -1
- paddlex/inference/serving/schemas/image_multilabel_classification.py +1 -1
- paddlex/inference/serving/schemas/instance_segmentation.py +1 -1
- paddlex/inference/serving/schemas/layout_parsing.py +2 -3
- paddlex/inference/serving/schemas/m_3d_bev_detection.py +1 -1
- paddlex/inference/serving/schemas/multilingual_speech_recognition.py +1 -1
- paddlex/inference/serving/schemas/object_detection.py +1 -1
- paddlex/inference/serving/schemas/ocr.py +1 -1
- paddlex/inference/serving/schemas/open_vocabulary_detection.py +1 -1
- paddlex/inference/serving/schemas/open_vocabulary_segmentation.py +1 -1
- paddlex/inference/serving/schemas/pedestrian_attribute_recognition.py +1 -1
- paddlex/inference/serving/schemas/pp_chatocrv3_doc.py +2 -3
- paddlex/inference/serving/schemas/pp_chatocrv4_doc.py +3 -3
- paddlex/inference/serving/schemas/pp_shituv2.py +1 -1
- paddlex/inference/serving/schemas/pp_structurev3.py +11 -7
- paddlex/inference/serving/schemas/rotated_object_detection.py +1 -1
- paddlex/inference/serving/schemas/seal_recognition.py +2 -2
- paddlex/inference/serving/schemas/semantic_segmentation.py +1 -1
- paddlex/inference/serving/schemas/shared/__init__.py +1 -1
- paddlex/inference/serving/schemas/shared/classification.py +1 -1
- paddlex/inference/serving/schemas/shared/image_segmentation.py +1 -1
- paddlex/inference/serving/schemas/shared/object_detection.py +1 -1
- paddlex/inference/serving/schemas/shared/ocr.py +1 -1
- paddlex/inference/serving/schemas/small_object_detection.py +1 -1
- paddlex/inference/serving/schemas/table_recognition.py +3 -7
- paddlex/inference/serving/schemas/table_recognition_v2.py +6 -7
- paddlex/inference/serving/schemas/ts_anomaly_detection.py +1 -1
- paddlex/inference/serving/schemas/ts_classification.py +1 -1
- paddlex/inference/serving/schemas/ts_forecast.py +1 -1
- paddlex/inference/serving/schemas/vehicle_attribute_recognition.py +1 -1
- paddlex/inference/serving/schemas/video_classification.py +1 -1
- paddlex/inference/serving/schemas/video_detection.py +1 -1
- paddlex/inference/utils/__init__.py +1 -1
- paddlex/inference/utils/benchmark.py +332 -179
- paddlex/inference/utils/color_map.py +1 -1
- paddlex/inference/utils/get_pipeline_path.py +1 -1
- paddlex/inference/utils/hpi.py +258 -0
- paddlex/inference/utils/hpi_model_info_collection.json +2331 -0
- paddlex/inference/utils/io/__init__.py +11 -11
- paddlex/inference/utils/io/readers.py +31 -27
- paddlex/inference/utils/io/style.py +21 -14
- paddlex/inference/utils/io/tablepyxl.py +13 -5
- paddlex/inference/utils/io/writers.py +9 -10
- paddlex/inference/utils/mkldnn_blocklist.py +25 -0
- paddlex/inference/utils/model_paths.py +48 -0
- paddlex/inference/utils/{new_ir_blacklist.py → new_ir_blocklist.py} +1 -2
- paddlex/inference/utils/official_models.py +278 -262
- paddlex/inference/utils/pp_option.py +184 -92
- paddlex/inference/utils/trt_blocklist.py +43 -0
- paddlex/inference/utils/trt_config.py +420 -0
- paddlex/model.py +30 -12
- paddlex/modules/__init__.py +57 -80
- paddlex/modules/anomaly_detection/__init__.py +2 -2
- paddlex/modules/anomaly_detection/dataset_checker/__init__.py +2 -3
- paddlex/modules/anomaly_detection/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/anomaly_detection/dataset_checker/dataset_src/analyse_dataset.py +6 -3
- paddlex/modules/anomaly_detection/dataset_checker/dataset_src/check_dataset.py +8 -4
- paddlex/modules/anomaly_detection/dataset_checker/dataset_src/convert_dataset.py +7 -4
- paddlex/modules/anomaly_detection/dataset_checker/dataset_src/split_dataset.py +2 -2
- paddlex/modules/anomaly_detection/dataset_checker/dataset_src/utils/__init__.py +1 -1
- paddlex/modules/anomaly_detection/dataset_checker/dataset_src/utils/visualizer.py +7 -2
- paddlex/modules/anomaly_detection/evaluator.py +3 -3
- paddlex/modules/anomaly_detection/exportor.py +1 -1
- paddlex/modules/anomaly_detection/model_list.py +1 -1
- paddlex/modules/anomaly_detection/trainer.py +3 -4
- paddlex/modules/base/__init__.py +5 -5
- paddlex/modules/base/build_model.py +1 -2
- paddlex/modules/base/dataset_checker/__init__.py +2 -2
- paddlex/modules/base/dataset_checker/dataset_checker.py +4 -4
- paddlex/modules/base/dataset_checker/utils.py +1 -3
- paddlex/modules/base/evaluator.py +13 -13
- paddlex/modules/base/exportor.py +12 -13
- paddlex/modules/base/trainer.py +21 -11
- paddlex/modules/base/utils/__init__.py +13 -0
- paddlex/modules/base/utils/cinn_setting.py +89 -0
- paddlex/modules/base/utils/coco_eval.py +94 -0
- paddlex/modules/base/utils/topk_eval.py +118 -0
- paddlex/modules/doc_vlm/__init__.py +18 -0
- paddlex/modules/doc_vlm/dataset_checker.py +29 -0
- paddlex/modules/doc_vlm/evaluator.py +29 -0
- paddlex/modules/doc_vlm/exportor.py +29 -0
- paddlex/modules/doc_vlm/model_list.py +16 -0
- paddlex/modules/doc_vlm/trainer.py +41 -0
- paddlex/modules/face_recognition/__init__.py +2 -2
- paddlex/modules/face_recognition/dataset_checker/__init__.py +2 -2
- paddlex/modules/face_recognition/dataset_checker/dataset_src/__init__.py +1 -1
- paddlex/modules/face_recognition/dataset_checker/dataset_src/check_dataset.py +3 -5
- paddlex/modules/face_recognition/dataset_checker/dataset_src/utils/__init__.py +1 -1
- paddlex/modules/face_recognition/dataset_checker/dataset_src/utils/visualizer.py +2 -5
- paddlex/modules/face_recognition/evaluator.py +3 -3
- paddlex/modules/face_recognition/exportor.py +1 -1
- paddlex/modules/face_recognition/model_list.py +1 -1
- paddlex/modules/face_recognition/trainer.py +1 -1
- paddlex/modules/formula_recognition/__init__.py +2 -2
- paddlex/modules/formula_recognition/dataset_checker/__init__.py +3 -3
- paddlex/modules/formula_recognition/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/formula_recognition/dataset_checker/dataset_src/analyse_dataset.py +13 -12
- paddlex/modules/formula_recognition/dataset_checker/dataset_src/check_dataset.py +2 -6
- paddlex/modules/formula_recognition/dataset_checker/dataset_src/convert_dataset.py +11 -10
- paddlex/modules/formula_recognition/dataset_checker/dataset_src/split_dataset.py +1 -2
- paddlex/modules/formula_recognition/evaluator.py +6 -3
- paddlex/modules/formula_recognition/exportor.py +1 -1
- paddlex/modules/formula_recognition/model_list.py +4 -1
- paddlex/modules/formula_recognition/trainer.py +5 -3
- paddlex/modules/general_recognition/__init__.py +2 -2
- paddlex/modules/general_recognition/dataset_checker/__init__.py +2 -2
- paddlex/modules/general_recognition/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/general_recognition/dataset_checker/dataset_src/analyse_dataset.py +7 -9
- paddlex/modules/general_recognition/dataset_checker/dataset_src/check_dataset.py +4 -5
- paddlex/modules/general_recognition/dataset_checker/dataset_src/convert_dataset.py +6 -5
- paddlex/modules/general_recognition/dataset_checker/dataset_src/split_dataset.py +1 -1
- paddlex/modules/general_recognition/dataset_checker/dataset_src/utils/__init__.py +1 -1
- paddlex/modules/general_recognition/dataset_checker/dataset_src/utils/visualizer.py +2 -5
- paddlex/modules/general_recognition/evaluator.py +2 -2
- paddlex/modules/general_recognition/exportor.py +1 -1
- paddlex/modules/general_recognition/model_list.py +1 -1
- paddlex/modules/general_recognition/trainer.py +1 -1
- paddlex/modules/image_classification/__init__.py +2 -2
- paddlex/modules/image_classification/dataset_checker/__init__.py +2 -2
- paddlex/modules/image_classification/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/image_classification/dataset_checker/dataset_src/analyse_dataset.py +8 -9
- paddlex/modules/image_classification/dataset_checker/dataset_src/check_dataset.py +4 -3
- paddlex/modules/image_classification/dataset_checker/dataset_src/convert_dataset.py +4 -4
- paddlex/modules/image_classification/dataset_checker/dataset_src/split_dataset.py +1 -1
- paddlex/modules/image_classification/dataset_checker/dataset_src/utils/__init__.py +1 -1
- paddlex/modules/image_classification/dataset_checker/dataset_src/utils/visualizer.py +2 -5
- paddlex/modules/image_classification/evaluator.py +3 -3
- paddlex/modules/image_classification/exportor.py +1 -1
- paddlex/modules/image_classification/model_list.py +2 -1
- paddlex/modules/image_classification/trainer.py +3 -3
- paddlex/modules/image_unwarping/__init__.py +1 -1
- paddlex/modules/image_unwarping/model_list.py +1 -1
- paddlex/modules/instance_segmentation/__init__.py +2 -2
- paddlex/modules/instance_segmentation/dataset_checker/__init__.py +2 -3
- paddlex/modules/instance_segmentation/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/instance_segmentation/dataset_checker/dataset_src/analyse_dataset.py +9 -5
- paddlex/modules/instance_segmentation/dataset_checker/dataset_src/check_dataset.py +8 -5
- paddlex/modules/instance_segmentation/dataset_checker/dataset_src/convert_dataset.py +8 -8
- paddlex/modules/instance_segmentation/dataset_checker/dataset_src/split_dataset.py +7 -4
- paddlex/modules/instance_segmentation/dataset_checker/dataset_src/utils/__init__.py +1 -1
- paddlex/modules/instance_segmentation/dataset_checker/dataset_src/utils/visualizer.py +10 -8
- paddlex/modules/instance_segmentation/evaluator.py +2 -2
- paddlex/modules/instance_segmentation/exportor.py +1 -1
- paddlex/modules/instance_segmentation/model_list.py +1 -1
- paddlex/modules/instance_segmentation/trainer.py +1 -1
- paddlex/modules/keypoint_detection/__init__.py +2 -2
- paddlex/modules/keypoint_detection/dataset_checker/__init__.py +2 -2
- paddlex/modules/keypoint_detection/dataset_checker/dataset_src/__init__.py +1 -1
- paddlex/modules/keypoint_detection/dataset_checker/dataset_src/check_dataset.py +10 -5
- paddlex/modules/keypoint_detection/dataset_checker/dataset_src/utils/__init__.py +1 -1
- paddlex/modules/keypoint_detection/dataset_checker/dataset_src/utils/visualizer.py +8 -3
- paddlex/modules/keypoint_detection/evaluator.py +2 -2
- paddlex/modules/keypoint_detection/exportor.py +1 -1
- paddlex/modules/keypoint_detection/model_list.py +1 -1
- paddlex/modules/keypoint_detection/trainer.py +2 -2
- paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/__init__.py +2 -2
- paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/dataset_checker/__init__.py +3 -3
- paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/dataset_checker/dataset_src/analyse_dataset.py +8 -8
- paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/dataset_checker/dataset_src/check_dataset.py +1 -2
- paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/evaluator.py +3 -3
- paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/exportor.py +1 -1
- paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/model_list.py +1 -1
- paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/trainer.py +5 -7
- paddlex/modules/multilabel_classification/__init__.py +2 -2
- paddlex/modules/multilabel_classification/dataset_checker/__init__.py +2 -2
- paddlex/modules/multilabel_classification/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/multilabel_classification/dataset_checker/dataset_src/analyse_dataset.py +8 -9
- paddlex/modules/multilabel_classification/dataset_checker/dataset_src/check_dataset.py +4 -3
- paddlex/modules/multilabel_classification/dataset_checker/dataset_src/convert_dataset.py +10 -7
- paddlex/modules/multilabel_classification/dataset_checker/dataset_src/split_dataset.py +1 -1
- paddlex/modules/multilabel_classification/dataset_checker/dataset_src/utils/__init__.py +1 -1
- paddlex/modules/multilabel_classification/dataset_checker/dataset_src/utils/visualizer.py +1 -5
- paddlex/modules/multilabel_classification/evaluator.py +3 -3
- paddlex/modules/multilabel_classification/exportor.py +1 -1
- paddlex/modules/multilabel_classification/model_list.py +1 -1
- paddlex/modules/multilabel_classification/trainer.py +3 -3
- paddlex/modules/multilingual_speech_recognition/__init__.py +2 -2
- paddlex/modules/multilingual_speech_recognition/dataset_checker.py +3 -3
- paddlex/modules/multilingual_speech_recognition/evaluator.py +3 -3
- paddlex/modules/multilingual_speech_recognition/exportor.py +3 -3
- paddlex/modules/multilingual_speech_recognition/model_list.py +1 -1
- paddlex/modules/multilingual_speech_recognition/trainer.py +7 -5
- paddlex/modules/object_detection/__init__.py +2 -2
- paddlex/modules/object_detection/dataset_checker/__init__.py +2 -11
- paddlex/modules/object_detection/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/object_detection/dataset_checker/dataset_src/analyse_dataset.py +10 -8
- paddlex/modules/object_detection/dataset_checker/dataset_src/check_dataset.py +10 -5
- paddlex/modules/object_detection/dataset_checker/dataset_src/convert_dataset.py +17 -12
- paddlex/modules/object_detection/dataset_checker/dataset_src/split_dataset.py +8 -4
- paddlex/modules/object_detection/dataset_checker/dataset_src/utils/__init__.py +1 -1
- paddlex/modules/object_detection/dataset_checker/dataset_src/utils/visualizer.py +9 -8
- paddlex/modules/object_detection/evaluator.py +11 -6
- paddlex/modules/object_detection/exportor.py +1 -1
- paddlex/modules/object_detection/model_list.py +3 -1
- paddlex/modules/object_detection/trainer.py +4 -5
- paddlex/modules/open_vocabulary_detection/__init__.py +2 -2
- paddlex/modules/open_vocabulary_detection/dataset_checker.py +3 -3
- paddlex/modules/open_vocabulary_detection/evaluator.py +3 -3
- paddlex/modules/open_vocabulary_detection/exportor.py +3 -3
- paddlex/modules/open_vocabulary_detection/model_list.py +2 -4
- paddlex/modules/open_vocabulary_detection/trainer.py +7 -5
- paddlex/modules/open_vocabulary_segmentation/__init__.py +2 -2
- paddlex/modules/open_vocabulary_segmentation/dataset_checker.py +3 -3
- paddlex/modules/open_vocabulary_segmentation/evaluator.py +3 -3
- paddlex/modules/open_vocabulary_segmentation/exportor.py +3 -3
- paddlex/modules/open_vocabulary_segmentation/model_list.py +1 -1
- paddlex/modules/open_vocabulary_segmentation/trainer.py +7 -5
- paddlex/modules/semantic_segmentation/__init__.py +2 -2
- paddlex/modules/semantic_segmentation/dataset_checker/__init__.py +2 -3
- paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/analyse_dataset.py +6 -3
- paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/check_dataset.py +2 -2
- paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/convert_dataset.py +7 -4
- paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/split_dataset.py +2 -2
- paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/utils/__init__.py +1 -1
- paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/utils/visualizer.py +6 -2
- paddlex/modules/semantic_segmentation/evaluator.py +3 -3
- paddlex/modules/semantic_segmentation/exportor.py +1 -1
- paddlex/modules/semantic_segmentation/model_list.py +1 -1
- paddlex/modules/semantic_segmentation/trainer.py +3 -4
- paddlex/modules/table_recognition/__init__.py +2 -2
- paddlex/modules/table_recognition/dataset_checker/__init__.py +5 -5
- paddlex/modules/table_recognition/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/table_recognition/dataset_checker/dataset_src/analyse_dataset.py +3 -2
- paddlex/modules/table_recognition/dataset_checker/dataset_src/check_dataset.py +8 -7
- paddlex/modules/table_recognition/dataset_checker/dataset_src/split_dataset.py +2 -1
- paddlex/modules/table_recognition/evaluator.py +3 -3
- paddlex/modules/table_recognition/exportor.py +1 -1
- paddlex/modules/table_recognition/model_list.py +1 -1
- paddlex/modules/table_recognition/trainer.py +2 -5
- paddlex/modules/text_detection/__init__.py +2 -2
- paddlex/modules/text_detection/dataset_checker/__init__.py +4 -6
- paddlex/modules/text_detection/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/text_detection/dataset_checker/dataset_src/analyse_dataset.py +12 -9
- paddlex/modules/text_detection/dataset_checker/dataset_src/check_dataset.py +3 -3
- paddlex/modules/text_detection/dataset_checker/dataset_src/split_dataset.py +3 -3
- paddlex/modules/text_detection/evaluator.py +3 -3
- paddlex/modules/text_detection/exportor.py +1 -1
- paddlex/modules/text_detection/model_list.py +3 -1
- paddlex/modules/text_detection/trainer.py +2 -5
- paddlex/modules/text_recognition/__init__.py +2 -2
- paddlex/modules/text_recognition/dataset_checker/__init__.py +4 -5
- paddlex/modules/text_recognition/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/text_recognition/dataset_checker/dataset_src/analyse_dataset.py +13 -12
- paddlex/modules/text_recognition/dataset_checker/dataset_src/check_dataset.py +2 -5
- paddlex/modules/text_recognition/dataset_checker/dataset_src/convert_dataset.py +11 -10
- paddlex/modules/text_recognition/dataset_checker/dataset_src/split_dataset.py +1 -2
- paddlex/modules/text_recognition/evaluator.py +3 -3
- paddlex/modules/text_recognition/exportor.py +1 -1
- paddlex/modules/text_recognition/model_list.py +3 -1
- paddlex/modules/text_recognition/trainer.py +2 -3
- paddlex/modules/ts_anomaly_detection/__init__.py +2 -2
- paddlex/modules/ts_anomaly_detection/dataset_checker/__init__.py +4 -5
- paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/analyse_dataset.py +1 -9
- paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/check_dataset.py +2 -2
- paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/convert_dataset.py +2 -6
- paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/split_dataset.py +4 -4
- paddlex/modules/ts_anomaly_detection/evaluator.py +3 -3
- paddlex/modules/ts_anomaly_detection/exportor.py +2 -3
- paddlex/modules/ts_anomaly_detection/model_list.py +1 -1
- paddlex/modules/ts_anomaly_detection/trainer.py +8 -8
- paddlex/modules/ts_classification/__init__.py +2 -2
- paddlex/modules/ts_classification/dataset_checker/__init__.py +4 -5
- paddlex/modules/ts_classification/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/ts_classification/dataset_checker/dataset_src/analyse_dataset.py +8 -5
- paddlex/modules/ts_classification/dataset_checker/dataset_src/check_dataset.py +2 -2
- paddlex/modules/ts_classification/dataset_checker/dataset_src/convert_dataset.py +2 -6
- paddlex/modules/ts_classification/dataset_checker/dataset_src/split_dataset.py +5 -5
- paddlex/modules/ts_classification/evaluator.py +3 -3
- paddlex/modules/ts_classification/exportor.py +2 -3
- paddlex/modules/ts_classification/model_list.py +1 -1
- paddlex/modules/ts_classification/trainer.py +7 -7
- paddlex/modules/ts_forecast/__init__.py +2 -2
- paddlex/modules/ts_forecast/dataset_checker/__init__.py +4 -5
- paddlex/modules/ts_forecast/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/ts_forecast/dataset_checker/dataset_src/analyse_dataset.py +1 -9
- paddlex/modules/ts_forecast/dataset_checker/dataset_src/check_dataset.py +2 -2
- paddlex/modules/ts_forecast/dataset_checker/dataset_src/convert_dataset.py +2 -6
- paddlex/modules/ts_forecast/dataset_checker/dataset_src/split_dataset.py +4 -4
- paddlex/modules/ts_forecast/evaluator.py +3 -3
- paddlex/modules/ts_forecast/exportor.py +2 -3
- paddlex/modules/ts_forecast/model_list.py +1 -1
- paddlex/modules/ts_forecast/trainer.py +7 -7
- paddlex/modules/video_classification/__init__.py +2 -2
- paddlex/modules/video_classification/dataset_checker/__init__.py +2 -2
- paddlex/modules/video_classification/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/video_classification/dataset_checker/dataset_src/analyse_dataset.py +9 -9
- paddlex/modules/video_classification/dataset_checker/dataset_src/check_dataset.py +2 -3
- paddlex/modules/video_classification/dataset_checker/dataset_src/split_dataset.py +1 -1
- paddlex/modules/video_classification/evaluator.py +3 -3
- paddlex/modules/video_classification/exportor.py +1 -1
- paddlex/modules/video_classification/model_list.py +1 -1
- paddlex/modules/video_classification/trainer.py +3 -3
- paddlex/modules/video_detection/__init__.py +2 -2
- paddlex/modules/video_detection/dataset_checker/__init__.py +2 -2
- paddlex/modules/video_detection/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/video_detection/dataset_checker/dataset_src/analyse_dataset.py +8 -9
- paddlex/modules/video_detection/dataset_checker/dataset_src/check_dataset.py +3 -5
- paddlex/modules/video_detection/evaluator.py +3 -3
- paddlex/modules/video_detection/exportor.py +1 -1
- paddlex/modules/video_detection/model_list.py +1 -1
- paddlex/modules/video_detection/trainer.py +3 -3
- paddlex/ops/__init__.py +7 -4
- paddlex/ops/iou3d_nms/iou3d_cpu.cpp +8 -6
- paddlex/ops/iou3d_nms/iou3d_cpu.h +3 -2
- paddlex/ops/iou3d_nms/iou3d_nms.cpp +8 -6
- paddlex/ops/iou3d_nms/iou3d_nms.h +6 -4
- paddlex/ops/iou3d_nms/iou3d_nms_api.cpp +24 -18
- paddlex/ops/iou3d_nms/iou3d_nms_kernel.cu +9 -7
- paddlex/ops/setup.py +3 -3
- paddlex/ops/voxel/voxelize_op.cc +22 -19
- paddlex/ops/voxel/voxelize_op.cu +25 -25
- paddlex/paddlex_cli.py +104 -87
- paddlex/repo_apis/Paddle3D_api/__init__.py +1 -1
- paddlex/repo_apis/Paddle3D_api/bev_fusion/__init__.py +1 -1
- paddlex/repo_apis/Paddle3D_api/bev_fusion/config.py +1 -1
- paddlex/repo_apis/Paddle3D_api/bev_fusion/model.py +6 -6
- paddlex/repo_apis/Paddle3D_api/bev_fusion/register.py +2 -2
- paddlex/repo_apis/Paddle3D_api/bev_fusion/runner.py +1 -1
- paddlex/repo_apis/Paddle3D_api/pp3d_config.py +3 -2
- paddlex/repo_apis/PaddleClas_api/__init__.py +1 -1
- paddlex/repo_apis/PaddleClas_api/cls/__init__.py +3 -3
- paddlex/repo_apis/PaddleClas_api/cls/config.py +5 -4
- paddlex/repo_apis/PaddleClas_api/cls/model.py +4 -4
- paddlex/repo_apis/PaddleClas_api/cls/register.py +12 -3
- paddlex/repo_apis/PaddleClas_api/cls/runner.py +2 -3
- paddlex/repo_apis/PaddleClas_api/shitu_rec/__init__.py +2 -2
- paddlex/repo_apis/PaddleClas_api/shitu_rec/config.py +2 -2
- paddlex/repo_apis/PaddleClas_api/shitu_rec/model.py +1 -4
- paddlex/repo_apis/PaddleClas_api/shitu_rec/register.py +2 -2
- paddlex/repo_apis/PaddleClas_api/shitu_rec/runner.py +1 -6
- paddlex/repo_apis/PaddleDetection_api/__init__.py +2 -2
- paddlex/repo_apis/PaddleDetection_api/config_helper.py +3 -3
- paddlex/repo_apis/PaddleDetection_api/instance_seg/__init__.py +2 -2
- paddlex/repo_apis/PaddleDetection_api/instance_seg/config.py +2 -3
- paddlex/repo_apis/PaddleDetection_api/instance_seg/model.py +4 -4
- paddlex/repo_apis/PaddleDetection_api/instance_seg/register.py +2 -3
- paddlex/repo_apis/PaddleDetection_api/instance_seg/runner.py +2 -3
- paddlex/repo_apis/PaddleDetection_api/object_det/__init__.py +3 -3
- paddlex/repo_apis/PaddleDetection_api/object_det/config.py +5 -4
- paddlex/repo_apis/PaddleDetection_api/object_det/model.py +6 -7
- paddlex/repo_apis/PaddleDetection_api/object_det/official_categories.py +26 -1
- paddlex/repo_apis/PaddleDetection_api/object_det/register.py +32 -3
- paddlex/repo_apis/PaddleDetection_api/object_det/runner.py +2 -3
- paddlex/repo_apis/PaddleNLP_api/__init__.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/__init__.py +4 -3
- paddlex/repo_apis/PaddleOCR_api/config_utils.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/formula_rec/__init__.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/formula_rec/config.py +7 -6
- paddlex/repo_apis/PaddleOCR_api/formula_rec/model.py +9 -13
- paddlex/repo_apis/PaddleOCR_api/formula_rec/register.py +29 -3
- paddlex/repo_apis/PaddleOCR_api/formula_rec/runner.py +2 -3
- paddlex/repo_apis/PaddleOCR_api/table_rec/__init__.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/table_rec/config.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/table_rec/model.py +4 -4
- paddlex/repo_apis/PaddleOCR_api/table_rec/register.py +2 -3
- paddlex/repo_apis/PaddleOCR_api/table_rec/runner.py +3 -3
- paddlex/repo_apis/PaddleOCR_api/text_det/__init__.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/text_det/config.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/text_det/model.py +4 -4
- paddlex/repo_apis/PaddleOCR_api/text_det/register.py +20 -3
- paddlex/repo_apis/PaddleOCR_api/text_det/runner.py +3 -3
- paddlex/repo_apis/PaddleOCR_api/text_rec/__init__.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/text_rec/config.py +7 -6
- paddlex/repo_apis/PaddleOCR_api/text_rec/model.py +9 -13
- paddlex/repo_apis/PaddleOCR_api/text_rec/register.py +20 -3
- paddlex/repo_apis/PaddleOCR_api/text_rec/runner.py +2 -3
- paddlex/repo_apis/PaddleSeg_api/__init__.py +1 -1
- paddlex/repo_apis/PaddleSeg_api/base_seg_config.py +2 -2
- paddlex/repo_apis/PaddleSeg_api/seg/__init__.py +1 -1
- paddlex/repo_apis/PaddleSeg_api/seg/config.py +3 -6
- paddlex/repo_apis/PaddleSeg_api/seg/model.py +6 -6
- paddlex/repo_apis/PaddleSeg_api/seg/register.py +2 -3
- paddlex/repo_apis/PaddleSeg_api/seg/runner.py +2 -3
- paddlex/repo_apis/PaddleTS_api/__init__.py +4 -3
- paddlex/repo_apis/PaddleTS_api/ts_ad/__init__.py +1 -1
- paddlex/repo_apis/PaddleTS_api/ts_ad/config.py +5 -6
- paddlex/repo_apis/PaddleTS_api/ts_ad/register.py +2 -2
- paddlex/repo_apis/PaddleTS_api/ts_ad/runner.py +2 -2
- paddlex/repo_apis/PaddleTS_api/ts_base/__init__.py +1 -1
- paddlex/repo_apis/PaddleTS_api/ts_base/config.py +2 -4
- paddlex/repo_apis/PaddleTS_api/ts_base/model.py +4 -4
- paddlex/repo_apis/PaddleTS_api/ts_base/runner.py +2 -2
- paddlex/repo_apis/PaddleTS_api/ts_cls/__init__.py +1 -1
- paddlex/repo_apis/PaddleTS_api/ts_cls/config.py +4 -5
- paddlex/repo_apis/PaddleTS_api/ts_cls/register.py +2 -2
- paddlex/repo_apis/PaddleTS_api/ts_cls/runner.py +2 -2
- paddlex/repo_apis/PaddleTS_api/ts_fc/__init__.py +1 -1
- paddlex/repo_apis/PaddleTS_api/ts_fc/config.py +6 -7
- paddlex/repo_apis/PaddleTS_api/ts_fc/register.py +1 -1
- paddlex/repo_apis/PaddleVideo_api/__init__.py +1 -1
- paddlex/repo_apis/PaddleVideo_api/config_utils.py +1 -1
- paddlex/repo_apis/PaddleVideo_api/video_cls/__init__.py +3 -3
- paddlex/repo_apis/PaddleVideo_api/video_cls/config.py +5 -4
- paddlex/repo_apis/PaddleVideo_api/video_cls/model.py +4 -4
- paddlex/repo_apis/PaddleVideo_api/video_cls/register.py +2 -3
- paddlex/repo_apis/PaddleVideo_api/video_cls/runner.py +2 -3
- paddlex/repo_apis/PaddleVideo_api/video_det/__init__.py +3 -3
- paddlex/repo_apis/PaddleVideo_api/video_det/config.py +5 -4
- paddlex/repo_apis/PaddleVideo_api/video_det/model.py +5 -5
- paddlex/repo_apis/PaddleVideo_api/video_det/register.py +2 -3
- paddlex/repo_apis/PaddleVideo_api/video_det/runner.py +2 -3
- paddlex/repo_apis/__init__.py +1 -1
- paddlex/repo_apis/base/__init__.py +4 -5
- paddlex/repo_apis/base/config.py +3 -4
- paddlex/repo_apis/base/model.py +11 -19
- paddlex/repo_apis/base/register.py +1 -1
- paddlex/repo_apis/base/runner.py +11 -12
- paddlex/repo_apis/base/utils/__init__.py +1 -1
- paddlex/repo_apis/base/utils/arg.py +1 -1
- paddlex/repo_apis/base/utils/subprocess.py +1 -1
- paddlex/repo_manager/__init__.py +2 -9
- paddlex/repo_manager/core.py +12 -30
- paddlex/repo_manager/meta.py +41 -31
- paddlex/repo_manager/repo.py +171 -161
- paddlex/repo_manager/utils.py +13 -224
- paddlex/utils/__init__.py +1 -1
- paddlex/utils/cache.py +8 -10
- paddlex/utils/config.py +6 -5
- paddlex/utils/{custom_device_whitelist.py → custom_device_list.py} +53 -199
- paddlex/utils/deps.py +249 -0
- paddlex/utils/device.py +87 -36
- paddlex/utils/download.py +4 -4
- paddlex/utils/env.py +37 -7
- paddlex/utils/errors/__init__.py +1 -1
- paddlex/utils/errors/dataset_checker.py +1 -1
- paddlex/utils/errors/others.py +2 -16
- paddlex/utils/file_interface.py +4 -5
- paddlex/utils/flags.py +17 -12
- paddlex/utils/fonts/__init__.py +36 -5
- paddlex/utils/func_register.py +1 -1
- paddlex/utils/install.py +87 -0
- paddlex/utils/interactive_get_pipeline.py +3 -3
- paddlex/utils/lazy_loader.py +3 -3
- paddlex/utils/logging.py +10 -1
- paddlex/utils/misc.py +6 -6
- paddlex/utils/pipeline_arguments.py +15 -7
- paddlex/utils/result_saver.py +4 -5
- paddlex/utils/subclass_register.py +2 -4
- paddlex/version.py +2 -1
- {paddlex-3.0.0rc0.dist-info → paddlex-3.0.1.dist-info}/METADATA +237 -102
- paddlex-3.0.1.dist-info/RECORD +1095 -0
- {paddlex-3.0.0rc0.dist-info → paddlex-3.0.1.dist-info}/WHEEL +1 -1
- paddlex/inference/models/base/predictor/basic_predictor.py +0 -139
- paddlex/paddle2onnx_requirements.txt +0 -1
- paddlex/repo_manager/requirements.txt +0 -21
- paddlex/serving_requirements.txt +0 -9
- paddlex-3.0.0rc0.dist-info/RECORD +0 -1015
- {paddlex-3.0.0rc0.dist-info → paddlex-3.0.1.dist-info}/entry_points.txt +0 -0
- {paddlex-3.0.0rc0.dist-info → paddlex-3.0.1.dist-info/licenses}/LICENSE +0 -0
- {paddlex-3.0.0rc0.dist-info → paddlex-3.0.1.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
|
|
1
|
-
#
|
1
|
+
# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -12,42 +12,50 @@
|
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
|
-
import os, sys
|
16
|
-
from typing import Any, Dict, Optional, Union, List, Tuple
|
17
|
-
import numpy as np
|
18
15
|
import math
|
19
|
-
import
|
20
|
-
from
|
16
|
+
import re
|
17
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
18
|
+
|
19
|
+
import numpy as np
|
20
|
+
|
21
|
+
from ....utils import logging
|
22
|
+
from ....utils.deps import (
|
23
|
+
function_requires_deps,
|
24
|
+
is_dep_available,
|
25
|
+
pipeline_requires_extra,
|
26
|
+
)
|
27
|
+
from ...common.batch_sampler import ImageBatchSampler
|
28
|
+
from ...common.reader import ReadImage
|
29
|
+
from ...models.object_detection.result import DetResult
|
30
|
+
from ...utils.hpi import HPIConfig
|
31
|
+
from ...utils.pp_option import PaddlePredictorOption
|
32
|
+
from .._parallel import AutoParallelImageSimpleInferencePipeline
|
21
33
|
from ..base import BasePipeline
|
22
34
|
from ..components import CropByBoxes
|
23
|
-
from .
|
24
|
-
from .
|
35
|
+
from ..doc_preprocessor.result import DocPreprocessorResult
|
36
|
+
from ..layout_parsing.utils import get_sub_regions_ocr_res
|
37
|
+
from ..ocr.result import OCRResult
|
38
|
+
from .result import SingleTableRecognitionResult, TableRecognitionResult
|
25
39
|
from .table_recognition_post_processing import (
|
26
40
|
get_table_recognition_res as get_table_recognition_res_e2e,
|
27
41
|
)
|
28
|
-
from .
|
29
|
-
from
|
30
|
-
from ...utils.pp_option import PaddlePredictorOption
|
31
|
-
from ...common.reader import ReadImage
|
32
|
-
from ...common.batch_sampler import ImageBatchSampler
|
33
|
-
from ..ocr.result import OCRResult
|
34
|
-
from ..doc_preprocessor.result import DocPreprocessorResult
|
42
|
+
from .table_recognition_post_processing_v2 import get_table_recognition_res
|
43
|
+
from .utils import get_neighbor_boxes_idx
|
35
44
|
|
36
|
-
|
45
|
+
if is_dep_available("scikit-learn"):
|
46
|
+
from sklearn.cluster import KMeans
|
37
47
|
|
38
48
|
|
39
|
-
class
|
49
|
+
class _TableRecognitionPipelineV2(BasePipeline):
|
40
50
|
"""Table Recognition Pipeline"""
|
41
51
|
|
42
|
-
entities = ["table_recognition_v2"]
|
43
|
-
|
44
52
|
def __init__(
|
45
53
|
self,
|
46
54
|
config: Dict,
|
47
55
|
device: str = None,
|
48
56
|
pp_option: PaddlePredictorOption = None,
|
49
57
|
use_hpip: bool = False,
|
50
|
-
|
58
|
+
hpi_config: Optional[Union[Dict[str, Any], HPIConfig]] = None,
|
51
59
|
) -> None:
|
52
60
|
"""Initializes the layout parsing pipeline.
|
53
61
|
|
@@ -55,12 +63,15 @@ class TableRecognitionPipelineV2(BasePipeline):
|
|
55
63
|
config (Dict): Configuration dictionary containing various settings.
|
56
64
|
device (str, optional): Device to run the predictions on. Defaults to None.
|
57
65
|
pp_option (PaddlePredictorOption, optional): PaddlePredictor options. Defaults to None.
|
58
|
-
use_hpip (bool, optional): Whether to use high-performance
|
59
|
-
|
66
|
+
use_hpip (bool, optional): Whether to use the high-performance
|
67
|
+
inference plugin (HPIP) by default. Defaults to False.
|
68
|
+
hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
|
69
|
+
The default high-performance inference configuration dictionary.
|
70
|
+
Defaults to None.
|
60
71
|
"""
|
61
72
|
|
62
73
|
super().__init__(
|
63
|
-
device=device, pp_option=pp_option, use_hpip=use_hpip,
|
74
|
+
device=device, pp_option=pp_option, use_hpip=use_hpip, hpi_config=hpi_config
|
64
75
|
)
|
65
76
|
|
66
77
|
self.use_doc_preprocessor = config.get("use_doc_preprocessor", True)
|
@@ -122,6 +133,7 @@ class TableRecognitionPipelineV2(BasePipeline):
|
|
122
133
|
)
|
123
134
|
|
124
135
|
self.use_ocr_model = config.get("use_ocr_model", True)
|
136
|
+
self.general_ocr_pipeline = None
|
125
137
|
if self.use_ocr_model:
|
126
138
|
general_ocr_config = config.get("SubPipelines", {}).get(
|
127
139
|
"GeneralOCR",
|
@@ -130,12 +142,15 @@ class TableRecognitionPipelineV2(BasePipeline):
|
|
130
142
|
self.general_ocr_pipeline = self.create_pipeline(general_ocr_config)
|
131
143
|
else:
|
132
144
|
self.general_ocr_config_bak = config.get("SubPipelines", {}).get(
|
133
|
-
"GeneralOCR",
|
134
|
-
None
|
145
|
+
"GeneralOCR", None
|
135
146
|
)
|
136
147
|
|
137
|
-
self.
|
148
|
+
self.table_orientation_classify_model = None
|
149
|
+
self.table_orientation_classify_config = config.get("SubModules", {}).get(
|
150
|
+
"TableOrientationClassify", None
|
151
|
+
)
|
138
152
|
|
153
|
+
self._crop_by_boxes = CropByBoxes()
|
139
154
|
self.batch_sampler = ImageBatchSampler(batch_size=1)
|
140
155
|
self.img_reader = ReadImage(format="BGR")
|
141
156
|
|
@@ -414,12 +429,13 @@ class TableRecognitionPipelineV2(BasePipeline):
|
|
414
429
|
intersection_area = (x_right - x_left) * (y_bottom - y_top)
|
415
430
|
# Calculate the area of both rectangles
|
416
431
|
box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
|
417
|
-
|
432
|
+
(box2[2] - box2[0]) * (box2[3] - box2[1])
|
418
433
|
# Calculate the IoU
|
419
434
|
iou = intersection_area / float(box1_area)
|
420
435
|
return iou
|
421
436
|
|
422
437
|
# Function to combine rectangles into N rectangles
|
438
|
+
@function_requires_deps("scikit-learn")
|
423
439
|
def combine_rectangles(rectangles, N):
|
424
440
|
"""
|
425
441
|
Combine rectangles into N rectangles based on geometric proximity.
|
@@ -528,7 +544,177 @@ class TableRecognitionPipelineV2(BasePipeline):
|
|
528
544
|
final_results = combine_rectangles(ocr_det_results, html_pred_boxes_nums)
|
529
545
|
return final_results
|
530
546
|
|
531
|
-
def split_ocr_bboxes_by_table_cells(
|
547
|
+
def split_ocr_bboxes_by_table_cells(
|
548
|
+
self, cells_det_results, overall_ocr_res, ori_img, k=2
|
549
|
+
):
|
550
|
+
"""
|
551
|
+
Split OCR bounding boxes based on table cell boundaries when they span multiple cells horizontally.
|
552
|
+
|
553
|
+
Args:
|
554
|
+
cells_det_results (list): List of cell bounding boxes in format [x1, y1, x2, y2]
|
555
|
+
overall_ocr_res (dict): Dictionary containing OCR results with keys:
|
556
|
+
- 'rec_boxes': OCR bounding boxes (will be converted to list)
|
557
|
+
- 'rec_texts': OCR recognized texts
|
558
|
+
ori_img (np.array): Original input image array
|
559
|
+
k (int): Threshold for determining when to split (minimum number of cells spanned)
|
560
|
+
|
561
|
+
Returns:
|
562
|
+
dict: Modified overall_ocr_res with split boxes and texts
|
563
|
+
"""
|
564
|
+
|
565
|
+
def calculate_iou(box1, box2):
|
566
|
+
"""
|
567
|
+
Calculate Intersection over Union (IoU) between two bounding boxes.
|
568
|
+
|
569
|
+
Args:
|
570
|
+
box1 (list): [x1, y1, x2, y2]
|
571
|
+
box2 (list): [x1, y1, x2, y2]
|
572
|
+
|
573
|
+
Returns:
|
574
|
+
float: IoU value
|
575
|
+
"""
|
576
|
+
# Determine intersection coordinates
|
577
|
+
x_left = max(box1[0], box2[0])
|
578
|
+
y_top = max(box1[1], box2[1])
|
579
|
+
x_right = min(box1[2], box2[2])
|
580
|
+
y_bottom = min(box1[3], box2[3])
|
581
|
+
if x_right < x_left or y_bottom < y_top:
|
582
|
+
return 0.0
|
583
|
+
# Calculate areas
|
584
|
+
intersection_area = (x_right - x_left) * (y_bottom - y_top)
|
585
|
+
box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
|
586
|
+
box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
|
587
|
+
# return intersection_area / float(box1_area + box2_area - intersection_area)
|
588
|
+
return intersection_area / box2_area
|
589
|
+
|
590
|
+
def get_overlapping_cells(ocr_box, cells):
|
591
|
+
"""
|
592
|
+
Find cells that overlap significantly with the OCR box (IoU > 0.5).
|
593
|
+
|
594
|
+
Args:
|
595
|
+
ocr_box (list): OCR bounding box [x1, y1, x2, y2]
|
596
|
+
cells (list): List of cell bounding boxes
|
597
|
+
|
598
|
+
Returns:
|
599
|
+
list: Indices of overlapping cells, sorted by x-coordinate
|
600
|
+
"""
|
601
|
+
overlapping = []
|
602
|
+
for idx, cell in enumerate(cells):
|
603
|
+
if calculate_iou(ocr_box, cell) > 0.5:
|
604
|
+
overlapping.append(idx)
|
605
|
+
# Sort overlapping cells by their x-coordinate (left to right)
|
606
|
+
overlapping.sort(key=lambda i: cells[i][0])
|
607
|
+
return overlapping
|
608
|
+
|
609
|
+
def split_box_by_cells(ocr_box, cell_indices, cells):
|
610
|
+
"""
|
611
|
+
Split OCR box vertically at cell boundaries.
|
612
|
+
|
613
|
+
Args:
|
614
|
+
ocr_box (list): Original OCR box [x1, y1, x2, y2]
|
615
|
+
cell_indices (list): Indices of cells to split by
|
616
|
+
cells (list): All cell bounding boxes
|
617
|
+
|
618
|
+
Returns:
|
619
|
+
list: List of split boxes
|
620
|
+
"""
|
621
|
+
if not cell_indices:
|
622
|
+
return [ocr_box]
|
623
|
+
split_boxes = []
|
624
|
+
cells_to_split = [cells[i] for i in cell_indices]
|
625
|
+
if ocr_box[0] < cells_to_split[0][0]:
|
626
|
+
split_boxes.append(
|
627
|
+
[ocr_box[0], ocr_box[1], cells_to_split[0][0], ocr_box[3]]
|
628
|
+
)
|
629
|
+
for i in range(len(cells_to_split)):
|
630
|
+
current_cell = cells_to_split[i]
|
631
|
+
split_boxes.append(
|
632
|
+
[
|
633
|
+
max(ocr_box[0], current_cell[0]),
|
634
|
+
ocr_box[1],
|
635
|
+
min(ocr_box[2], current_cell[2]),
|
636
|
+
ocr_box[3],
|
637
|
+
]
|
638
|
+
)
|
639
|
+
if i < len(cells_to_split) - 1:
|
640
|
+
next_cell = cells_to_split[i + 1]
|
641
|
+
if current_cell[2] < next_cell[0]:
|
642
|
+
split_boxes.append(
|
643
|
+
[current_cell[2], ocr_box[1], next_cell[0], ocr_box[3]]
|
644
|
+
)
|
645
|
+
last_cell = cells_to_split[-1]
|
646
|
+
if last_cell[2] < ocr_box[2]:
|
647
|
+
split_boxes.append([last_cell[2], ocr_box[1], ocr_box[2], ocr_box[3]])
|
648
|
+
unique_boxes = []
|
649
|
+
seen = set()
|
650
|
+
for box in split_boxes:
|
651
|
+
box_tuple = tuple(box)
|
652
|
+
if box_tuple not in seen:
|
653
|
+
seen.add(box_tuple)
|
654
|
+
unique_boxes.append(box)
|
655
|
+
|
656
|
+
return unique_boxes
|
657
|
+
|
658
|
+
# Convert OCR boxes to list if needed
|
659
|
+
if hasattr(overall_ocr_res["rec_boxes"], "tolist"):
|
660
|
+
ocr_det_results = overall_ocr_res["rec_boxes"].tolist()
|
661
|
+
else:
|
662
|
+
ocr_det_results = overall_ocr_res["rec_boxes"]
|
663
|
+
ocr_texts = overall_ocr_res["rec_texts"]
|
664
|
+
|
665
|
+
# Make copies to modify
|
666
|
+
new_boxes = []
|
667
|
+
new_texts = []
|
668
|
+
|
669
|
+
# Process each OCR box
|
670
|
+
i = 0
|
671
|
+
while i < len(ocr_det_results):
|
672
|
+
ocr_box = ocr_det_results[i]
|
673
|
+
text = ocr_texts[i]
|
674
|
+
# Find cells that significantly overlap with this OCR box
|
675
|
+
overlapping_cells = get_overlapping_cells(ocr_box, cells_det_results)
|
676
|
+
# Check if we need to split (spans >= k cells)
|
677
|
+
if len(overlapping_cells) >= k:
|
678
|
+
# Split the box at cell boundaries
|
679
|
+
split_boxes = split_box_by_cells(
|
680
|
+
ocr_box, overlapping_cells, cells_det_results
|
681
|
+
)
|
682
|
+
# Process each split box
|
683
|
+
split_texts = []
|
684
|
+
for box in split_boxes:
|
685
|
+
x1, y1, x2, y2 = int(box[0]), int(box[1]), int(box[2]), int(box[3])
|
686
|
+
if y2 - y1 > 1 and x2 - x1 > 1:
|
687
|
+
ocr_result = next(
|
688
|
+
self.general_ocr_pipeline.text_rec_model(
|
689
|
+
ori_img[y1:y2, x1:x2, :]
|
690
|
+
)
|
691
|
+
)
|
692
|
+
# Extract the recognized text from the OCR result
|
693
|
+
if "rec_text" in ocr_result:
|
694
|
+
result = ocr_result[
|
695
|
+
"rec_text"
|
696
|
+
] # Assumes "rec_texts" contains a single string
|
697
|
+
else:
|
698
|
+
result = ""
|
699
|
+
else:
|
700
|
+
result = ""
|
701
|
+
split_texts.append(result)
|
702
|
+
# Add split boxes and texts to results
|
703
|
+
new_boxes.extend(split_boxes)
|
704
|
+
new_texts.extend(split_texts)
|
705
|
+
else:
|
706
|
+
# Keep original box and text
|
707
|
+
new_boxes.append(ocr_box)
|
708
|
+
new_texts.append(text)
|
709
|
+
i += 1
|
710
|
+
|
711
|
+
# Update the results dictionary
|
712
|
+
overall_ocr_res["rec_boxes"] = new_boxes
|
713
|
+
overall_ocr_res["rec_texts"] = new_texts
|
714
|
+
|
715
|
+
return overall_ocr_res
|
716
|
+
|
717
|
+
def gen_ocr_with_table_cells(self, ori_img, cells_bboxes):
|
532
718
|
"""
|
533
719
|
Splits OCR bounding boxes by table cells and retrieves text.
|
534
720
|
|
@@ -549,20 +735,228 @@ class TableRecognitionPipelineV2(BasePipeline):
|
|
549
735
|
# Extract and round up the coordinates of the bounding box.
|
550
736
|
x1, y1, x2, y2 = [math.ceil(k) for k in cells_bboxes[i]]
|
551
737
|
# Perform OCR on the defined region of the image and get the recognized text.
|
552
|
-
|
553
|
-
|
554
|
-
|
738
|
+
if y2 - y1 > 1 and x2 - x1 > 1:
|
739
|
+
rec_te = next(self.general_ocr_pipeline(ori_img[y1:y2, x1:x2, :]))
|
740
|
+
# Concatenate the texts and append them to the texts_list.
|
741
|
+
texts_list.append("".join(rec_te["rec_texts"]))
|
555
742
|
# Return the list of recognized texts from each cell.
|
556
743
|
return texts_list
|
557
744
|
|
745
|
+
def map_cells_to_original_image(
|
746
|
+
self, detections, table_angle, img_width, img_height
|
747
|
+
):
|
748
|
+
"""
|
749
|
+
Map bounding boxes from the rotated image back to the original image.
|
750
|
+
|
751
|
+
Parameters:
|
752
|
+
- detections: list of numpy arrays, each containing bounding box coordinates [x1, y1, x2, y2]
|
753
|
+
- table_angle: rotation angle in degrees (90, 180, or 270)
|
754
|
+
- width_orig: width of the original image (img1)
|
755
|
+
- height_orig: height of the original image (img1)
|
756
|
+
|
757
|
+
Returns:
|
758
|
+
- mapped_detections: list of numpy arrays with mapped bounding box coordinates
|
759
|
+
"""
|
760
|
+
|
761
|
+
mapped_detections = []
|
762
|
+
for i in range(len(detections)):
|
763
|
+
tbx1, tby1, tbx2, tby2 = (
|
764
|
+
detections[i][0],
|
765
|
+
detections[i][1],
|
766
|
+
detections[i][2],
|
767
|
+
detections[i][3],
|
768
|
+
)
|
769
|
+
if table_angle == "270":
|
770
|
+
new_x1, new_y1 = tby1, img_width - tbx2
|
771
|
+
new_x2, new_y2 = tby2, img_width - tbx1
|
772
|
+
elif table_angle == "180":
|
773
|
+
new_x1, new_y1 = img_width - tbx2, img_height - tby2
|
774
|
+
new_x2, new_y2 = img_width - tbx1, img_height - tby1
|
775
|
+
elif table_angle == "90":
|
776
|
+
new_x1, new_y1 = img_height - tby2, tbx1
|
777
|
+
new_x2, new_y2 = img_height - tby1, tbx2
|
778
|
+
new_box = np.array([new_x1, new_y1, new_x2, new_y2])
|
779
|
+
mapped_detections.append(new_box)
|
780
|
+
return mapped_detections
|
781
|
+
|
782
|
+
def split_string_by_keywords(self, html_string):
|
783
|
+
"""
|
784
|
+
Split HTML string by keywords.
|
785
|
+
|
786
|
+
Args:
|
787
|
+
html_string (str): The HTML string.
|
788
|
+
Returns:
|
789
|
+
split_html (list): The list of html keywords.
|
790
|
+
"""
|
791
|
+
|
792
|
+
keywords = [
|
793
|
+
"<thead>",
|
794
|
+
"</thead>",
|
795
|
+
"<tbody>",
|
796
|
+
"</tbody>",
|
797
|
+
"<tr>",
|
798
|
+
"</tr>",
|
799
|
+
"<td>",
|
800
|
+
"<td",
|
801
|
+
">",
|
802
|
+
"</td>",
|
803
|
+
'colspan="2"',
|
804
|
+
'colspan="3"',
|
805
|
+
'colspan="4"',
|
806
|
+
'colspan="5"',
|
807
|
+
'colspan="6"',
|
808
|
+
'colspan="7"',
|
809
|
+
'colspan="8"',
|
810
|
+
'colspan="9"',
|
811
|
+
'colspan="10"',
|
812
|
+
'colspan="11"',
|
813
|
+
'colspan="12"',
|
814
|
+
'colspan="13"',
|
815
|
+
'colspan="14"',
|
816
|
+
'colspan="15"',
|
817
|
+
'colspan="16"',
|
818
|
+
'colspan="17"',
|
819
|
+
'colspan="18"',
|
820
|
+
'colspan="19"',
|
821
|
+
'colspan="20"',
|
822
|
+
'rowspan="2"',
|
823
|
+
'rowspan="3"',
|
824
|
+
'rowspan="4"',
|
825
|
+
'rowspan="5"',
|
826
|
+
'rowspan="6"',
|
827
|
+
'rowspan="7"',
|
828
|
+
'rowspan="8"',
|
829
|
+
'rowspan="9"',
|
830
|
+
'rowspan="10"',
|
831
|
+
'rowspan="11"',
|
832
|
+
'rowspan="12"',
|
833
|
+
'rowspan="13"',
|
834
|
+
'rowspan="14"',
|
835
|
+
'rowspan="15"',
|
836
|
+
'rowspan="16"',
|
837
|
+
'rowspan="17"',
|
838
|
+
'rowspan="18"',
|
839
|
+
'rowspan="19"',
|
840
|
+
'rowspan="20"',
|
841
|
+
]
|
842
|
+
regex_pattern = "|".join(re.escape(keyword) for keyword in keywords)
|
843
|
+
split_result = re.split(f"({regex_pattern})", html_string)
|
844
|
+
split_html = [part for part in split_result if part]
|
845
|
+
return split_html
|
846
|
+
|
847
|
+
def cluster_positions(self, positions, tolerance):
|
848
|
+
if not positions:
|
849
|
+
return []
|
850
|
+
positions = sorted(set(positions))
|
851
|
+
clustered = []
|
852
|
+
current_cluster = [positions[0]]
|
853
|
+
for pos in positions[1:]:
|
854
|
+
if abs(pos - current_cluster[-1]) <= tolerance:
|
855
|
+
current_cluster.append(pos)
|
856
|
+
else:
|
857
|
+
clustered.append(sum(current_cluster) / len(current_cluster))
|
858
|
+
current_cluster = [pos]
|
859
|
+
clustered.append(sum(current_cluster) / len(current_cluster))
|
860
|
+
return clustered
|
861
|
+
|
862
|
+
def trans_cells_det_results_to_html(self, cells_det_results):
|
863
|
+
"""
|
864
|
+
Trans table cells bboxes to HTML.
|
865
|
+
|
866
|
+
Args:
|
867
|
+
cells_det_results (list): The table cells detection results.
|
868
|
+
Returns:
|
869
|
+
html (list): The list of html keywords.
|
870
|
+
"""
|
871
|
+
|
872
|
+
tolerance = 5
|
873
|
+
x_coords = [x for cell in cells_det_results for x in (cell[0], cell[2])]
|
874
|
+
y_coords = [y for cell in cells_det_results for y in (cell[1], cell[3])]
|
875
|
+
x_positions = self.cluster_positions(x_coords, tolerance)
|
876
|
+
y_positions = self.cluster_positions(y_coords, tolerance)
|
877
|
+
x_position_to_index = {x: i for i, x in enumerate(x_positions)}
|
878
|
+
y_position_to_index = {y: i for i, y in enumerate(y_positions)}
|
879
|
+
num_rows = len(y_positions) - 1
|
880
|
+
num_cols = len(x_positions) - 1
|
881
|
+
grid = [[None for _ in range(num_cols)] for _ in range(num_rows)]
|
882
|
+
cells_info = []
|
883
|
+
cell_index = 0
|
884
|
+
cell_map = {}
|
885
|
+
for index, cell in enumerate(cells_det_results):
|
886
|
+
x1, y1, x2, y2 = cell
|
887
|
+
x1_idx = min(
|
888
|
+
range(len(x_positions)), key=lambda i: abs(x_positions[i] - x1)
|
889
|
+
)
|
890
|
+
x2_idx = min(
|
891
|
+
range(len(x_positions)), key=lambda i: abs(x_positions[i] - x2)
|
892
|
+
)
|
893
|
+
y1_idx = min(
|
894
|
+
range(len(y_positions)), key=lambda i: abs(y_positions[i] - y1)
|
895
|
+
)
|
896
|
+
y2_idx = min(
|
897
|
+
range(len(y_positions)), key=lambda i: abs(y_positions[i] - y2)
|
898
|
+
)
|
899
|
+
col_start = min(x1_idx, x2_idx)
|
900
|
+
col_end = max(x1_idx, x2_idx)
|
901
|
+
row_start = min(y1_idx, y2_idx)
|
902
|
+
row_end = max(y1_idx, y2_idx)
|
903
|
+
rowspan = row_end - row_start
|
904
|
+
colspan = col_end - col_start
|
905
|
+
if rowspan == 0:
|
906
|
+
rowspan = 1
|
907
|
+
if colspan == 0:
|
908
|
+
colspan = 1
|
909
|
+
cells_info.append(
|
910
|
+
{
|
911
|
+
"row_start": row_start,
|
912
|
+
"col_start": col_start,
|
913
|
+
"rowspan": rowspan,
|
914
|
+
"colspan": colspan,
|
915
|
+
"content": "",
|
916
|
+
}
|
917
|
+
)
|
918
|
+
for r in range(row_start, row_start + rowspan):
|
919
|
+
for c in range(col_start, col_start + colspan):
|
920
|
+
key = (r, c)
|
921
|
+
if key in cell_map:
|
922
|
+
continue
|
923
|
+
else:
|
924
|
+
cell_map[key] = index
|
925
|
+
html = "<table><tbody>"
|
926
|
+
for r in range(num_rows):
|
927
|
+
html += "<tr>"
|
928
|
+
c = 0
|
929
|
+
while c < num_cols:
|
930
|
+
key = (r, c)
|
931
|
+
if key in cell_map:
|
932
|
+
cell_index = cell_map[key]
|
933
|
+
cell_info = cells_info[cell_index]
|
934
|
+
if cell_info["row_start"] == r and cell_info["col_start"] == c:
|
935
|
+
rowspan = cell_info["rowspan"]
|
936
|
+
colspan = cell_info["colspan"]
|
937
|
+
rowspan_attr = f' rowspan="{rowspan}"' if rowspan > 1 else ""
|
938
|
+
colspan_attr = f' colspan="{colspan}"' if colspan > 1 else ""
|
939
|
+
content = cell_info["content"]
|
940
|
+
html += f"<td{rowspan_attr}{colspan_attr}>{content}</td>"
|
941
|
+
c += cell_info["colspan"]
|
942
|
+
else:
|
943
|
+
html += "<td></td>"
|
944
|
+
c += 1
|
945
|
+
html += "</tr>"
|
946
|
+
html += "</tbody></table>"
|
947
|
+
html = self.split_string_by_keywords(html)
|
948
|
+
return html
|
949
|
+
|
558
950
|
def predict_single_table_recognition_res(
|
559
951
|
self,
|
560
952
|
image_array: np.ndarray,
|
561
953
|
overall_ocr_res: OCRResult,
|
562
954
|
table_box: list,
|
563
|
-
use_table_cells_ocr_results: bool = False,
|
564
955
|
use_e2e_wired_table_rec_model: bool = False,
|
565
956
|
use_e2e_wireless_table_rec_model: bool = False,
|
957
|
+
use_wired_table_cells_trans_to_html: bool = False,
|
958
|
+
use_wireless_table_cells_trans_to_html: bool = False,
|
959
|
+
use_ocr_results_with_table_cells: bool = True,
|
566
960
|
flag_find_nei_text: bool = True,
|
567
961
|
) -> SingleTableRecognitionResult:
|
568
962
|
"""
|
@@ -573,9 +967,11 @@ class TableRecognitionPipelineV2(BasePipeline):
|
|
573
967
|
overall_ocr_res (OCRResult): Overall OCR result obtained after running the OCR pipeline.
|
574
968
|
The overall OCR results containing text recognition information.
|
575
969
|
table_box (list): The table box coordinates.
|
576
|
-
use_table_cells_ocr_results (bool): whether to use OCR results with cells.
|
577
970
|
use_e2e_wired_table_rec_model (bool): Whether to use end-to-end wired table recognition model.
|
578
971
|
use_e2e_wireless_table_rec_model (bool): Whether to use end-to-end wireless table recognition model.
|
972
|
+
use_wired_table_cells_trans_to_html (bool): Whether to use wired table cells trans to HTML.
|
973
|
+
use_wireless_table_cells_trans_to_html (bool): Whether to use wireless table cells trans to HTML.
|
974
|
+
use_ocr_results_with_table_cells (bool): Whether to use OCR results processed by table cells.
|
579
975
|
flag_find_nei_text (bool): Whether to find neighboring text.
|
580
976
|
Returns:
|
581
977
|
SingleTableRecognitionResult: single table recognition result.
|
@@ -584,62 +980,110 @@ class TableRecognitionPipelineV2(BasePipeline):
|
|
584
980
|
table_cls_pred = next(self.table_cls_model(image_array))
|
585
981
|
table_cls_result = self.extract_results(table_cls_pred, "cls")
|
586
982
|
use_e2e_model = False
|
983
|
+
cells_trans_to_html = False
|
587
984
|
|
588
985
|
if table_cls_result == "wired_table":
|
589
|
-
|
986
|
+
if use_wired_table_cells_trans_to_html == True:
|
987
|
+
cells_trans_to_html = True
|
988
|
+
else:
|
989
|
+
table_structure_pred = next(self.wired_table_rec_model(image_array))
|
590
990
|
if use_e2e_wired_table_rec_model == True:
|
591
991
|
use_e2e_model = True
|
992
|
+
if cells_trans_to_html == True:
|
993
|
+
table_structure_pred = next(self.wired_table_rec_model(image_array))
|
592
994
|
else:
|
593
995
|
table_cells_pred = next(
|
594
996
|
self.wired_table_cells_detection_model(image_array, threshold=0.3)
|
595
997
|
) # Setting the threshold to 0.3 can improve the accuracy of table cells detection.
|
596
998
|
# If you really want more or fewer table cells detection boxes, the threshold can be adjusted.
|
597
999
|
elif table_cls_result == "wireless_table":
|
598
|
-
|
1000
|
+
if use_wireless_table_cells_trans_to_html == True:
|
1001
|
+
cells_trans_to_html = True
|
1002
|
+
else:
|
1003
|
+
table_structure_pred = next(self.wireless_table_rec_model(image_array))
|
599
1004
|
if use_e2e_wireless_table_rec_model == True:
|
600
1005
|
use_e2e_model = True
|
1006
|
+
if cells_trans_to_html == True:
|
1007
|
+
table_structure_pred = next(
|
1008
|
+
self.wireless_table_rec_model(image_array)
|
1009
|
+
)
|
601
1010
|
else:
|
602
1011
|
table_cells_pred = next(
|
603
|
-
self.wireless_table_cells_detection_model(
|
1012
|
+
self.wireless_table_cells_detection_model(
|
1013
|
+
image_array, threshold=0.3
|
1014
|
+
)
|
604
1015
|
) # Setting the threshold to 0.3 can improve the accuracy of table cells detection.
|
605
1016
|
# If you really want more or fewer table cells detection boxes, the threshold can be adjusted.
|
606
1017
|
|
607
1018
|
if use_e2e_model == False:
|
608
|
-
|
609
|
-
|
610
|
-
|
611
|
-
|
612
|
-
|
613
|
-
table_cells_result,
|
614
|
-
table_cells_score,
|
615
|
-
ocr_det_boxes,
|
616
|
-
len(table_structure_pred["bbox"]),
|
1019
|
+
table_cells_result, table_cells_score = self.extract_results(
|
1020
|
+
table_cells_pred, "det"
|
1021
|
+
)
|
1022
|
+
table_cells_result, table_cells_score = self.cells_det_results_nms(
|
1023
|
+
table_cells_result, table_cells_score
|
617
1024
|
)
|
618
|
-
if
|
619
|
-
|
1025
|
+
if cells_trans_to_html == True:
|
1026
|
+
table_structure_result = self.trans_cells_det_results_to_html(
|
1027
|
+
table_cells_result
|
1028
|
+
)
|
620
1029
|
else:
|
1030
|
+
table_structure_result = self.extract_results(
|
1031
|
+
table_structure_pred, "table_stru"
|
1032
|
+
)
|
1033
|
+
ocr_det_boxes = self.get_region_ocr_det_boxes(
|
1034
|
+
overall_ocr_res["rec_boxes"].tolist(), table_box
|
1035
|
+
)
|
1036
|
+
table_cells_result = self.cells_det_results_reprocessing(
|
1037
|
+
table_cells_result,
|
1038
|
+
table_cells_score,
|
1039
|
+
ocr_det_boxes,
|
1040
|
+
len(table_structure_pred["bbox"]),
|
1041
|
+
)
|
1042
|
+
if use_ocr_results_with_table_cells == True:
|
1043
|
+
if self.cells_split_ocr == True:
|
1044
|
+
table_box_copy = np.array([table_box])
|
1045
|
+
table_ocr_pred = get_sub_regions_ocr_res(
|
1046
|
+
overall_ocr_res, table_box_copy
|
1047
|
+
)
|
1048
|
+
table_ocr_pred = self.split_ocr_bboxes_by_table_cells(
|
1049
|
+
table_cells_result, table_ocr_pred, image_array
|
1050
|
+
)
|
1051
|
+
cells_texts_list = []
|
1052
|
+
else:
|
1053
|
+
cells_texts_list = self.gen_ocr_with_table_cells(
|
1054
|
+
image_array, table_cells_result
|
1055
|
+
)
|
1056
|
+
table_ocr_pred = {}
|
1057
|
+
else:
|
1058
|
+
table_ocr_pred = {}
|
621
1059
|
cells_texts_list = []
|
622
1060
|
single_table_recognition_res = get_table_recognition_res(
|
623
1061
|
table_box,
|
624
1062
|
table_structure_result,
|
625
1063
|
table_cells_result,
|
626
1064
|
overall_ocr_res,
|
1065
|
+
table_ocr_pred,
|
627
1066
|
cells_texts_list,
|
628
|
-
|
1067
|
+
use_ocr_results_with_table_cells,
|
1068
|
+
self.cells_split_ocr,
|
629
1069
|
)
|
630
1070
|
else:
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
|
1071
|
+
cells_texts_list = []
|
1072
|
+
use_ocr_results_with_table_cells = False
|
1073
|
+
table_cells_result_e2e = table_structure_pred["bbox"]
|
1074
|
+
table_cells_result_e2e = [
|
1075
|
+
[rect[0], rect[1], rect[4], rect[5]] for rect in table_cells_result_e2e
|
1076
|
+
]
|
1077
|
+
if cells_trans_to_html == True:
|
1078
|
+
table_structure_pred["structure"] = (
|
1079
|
+
self.trans_cells_det_results_to_html(table_cells_result_e2e)
|
1080
|
+
)
|
637
1081
|
single_table_recognition_res = get_table_recognition_res_e2e(
|
638
1082
|
table_box,
|
639
1083
|
table_structure_pred,
|
640
1084
|
overall_ocr_res,
|
641
1085
|
cells_texts_list,
|
642
|
-
|
1086
|
+
use_ocr_results_with_table_cells,
|
643
1087
|
)
|
644
1088
|
|
645
1089
|
neighbor_text = ""
|
@@ -668,9 +1112,12 @@ class TableRecognitionPipelineV2(BasePipeline):
|
|
668
1112
|
text_det_box_thresh: Optional[float] = None,
|
669
1113
|
text_det_unclip_ratio: Optional[float] = None,
|
670
1114
|
text_rec_score_thresh: Optional[float] = None,
|
671
|
-
use_table_cells_ocr_results: bool = False,
|
672
1115
|
use_e2e_wired_table_rec_model: bool = False,
|
673
1116
|
use_e2e_wireless_table_rec_model: bool = False,
|
1117
|
+
use_wired_table_cells_trans_to_html: bool = False,
|
1118
|
+
use_wireless_table_cells_trans_to_html: bool = False,
|
1119
|
+
use_table_orientation_classify: bool = True,
|
1120
|
+
use_ocr_results_with_table_cells: bool = True,
|
674
1121
|
**kwargs,
|
675
1122
|
) -> TableRecognitionResult:
|
676
1123
|
"""
|
@@ -685,16 +1132,28 @@ class TableRecognitionPipelineV2(BasePipeline):
|
|
685
1132
|
It will be used if it is not None and use_ocr_model is False.
|
686
1133
|
layout_det_res (DetResult): The layout detection result.
|
687
1134
|
It will be used if it is not None and use_layout_detection is False.
|
688
|
-
use_table_cells_ocr_results (bool): whether to use OCR results with cells.
|
689
1135
|
use_e2e_wired_table_rec_model (bool): Whether to use end-to-end wired table recognition model.
|
690
1136
|
use_e2e_wireless_table_rec_model (bool): Whether to use end-to-end wireless table recognition model.
|
691
|
-
|
1137
|
+
use_wired_table_cells_trans_to_html (bool): Whether to use wired table cells trans to HTML.
|
1138
|
+
use_wireless_table_cells_trans_to_html (bool): Whether to use wireless table cells trans to HTML.
|
1139
|
+
use_table_orientation_classify (bool): Whether to use table orientation classification.
|
1140
|
+
use_ocr_results_with_table_cells (bool): Whether to use OCR results processed by table cells.
|
692
1141
|
**kwargs: Additional keyword arguments.
|
693
1142
|
|
694
1143
|
Returns:
|
695
1144
|
TableRecognitionResult: The predicted table recognition result.
|
696
1145
|
"""
|
697
1146
|
|
1147
|
+
self.cells_split_ocr = True
|
1148
|
+
|
1149
|
+
if use_table_orientation_classify == True and (
|
1150
|
+
self.table_orientation_classify_model is None
|
1151
|
+
):
|
1152
|
+
assert self.table_orientation_classify_config != None
|
1153
|
+
self.table_orientation_classify_model = self.create_model(
|
1154
|
+
self.table_orientation_classify_config
|
1155
|
+
)
|
1156
|
+
|
698
1157
|
model_settings = self.get_model_settings(
|
699
1158
|
use_doc_orientation_classify,
|
700
1159
|
use_doc_unwarping,
|
@@ -735,48 +1194,179 @@ class TableRecognitionPipelineV2(BasePipeline):
|
|
735
1194
|
text_rec_score_thresh=text_rec_score_thresh,
|
736
1195
|
)
|
737
1196
|
)
|
738
|
-
elif
|
1197
|
+
elif self.general_ocr_pipeline is None and (
|
1198
|
+
(
|
1199
|
+
use_ocr_results_with_table_cells == True
|
1200
|
+
and self.cells_split_ocr == False
|
1201
|
+
)
|
1202
|
+
or use_table_orientation_classify == True
|
1203
|
+
):
|
739
1204
|
assert self.general_ocr_config_bak != None
|
740
|
-
self.general_ocr_pipeline = self.create_pipeline(
|
1205
|
+
self.general_ocr_pipeline = self.create_pipeline(
|
1206
|
+
self.general_ocr_config_bak
|
1207
|
+
)
|
1208
|
+
|
1209
|
+
if use_table_orientation_classify == False:
|
1210
|
+
table_angle = "0"
|
741
1211
|
|
742
1212
|
table_res_list = []
|
743
1213
|
table_region_id = 1
|
1214
|
+
|
744
1215
|
if not model_settings["use_layout_detection"] and layout_det_res is None:
|
745
|
-
layout_det_res = {}
|
746
1216
|
img_height, img_width = doc_preprocessor_image.shape[:2]
|
747
1217
|
table_box = [0, 0, img_width - 1, img_height - 1]
|
1218
|
+
if use_table_orientation_classify == True:
|
1219
|
+
table_angle = next(
|
1220
|
+
self.table_orientation_classify_model(doc_preprocessor_image)
|
1221
|
+
)["label_names"][0]
|
1222
|
+
if table_angle == "90":
|
1223
|
+
doc_preprocessor_image = np.rot90(doc_preprocessor_image, k=1)
|
1224
|
+
elif table_angle == "180":
|
1225
|
+
doc_preprocessor_image = np.rot90(doc_preprocessor_image, k=2)
|
1226
|
+
elif table_angle == "270":
|
1227
|
+
doc_preprocessor_image = np.rot90(doc_preprocessor_image, k=3)
|
1228
|
+
if table_angle in ["90", "180", "270"]:
|
1229
|
+
overall_ocr_res = next(
|
1230
|
+
self.general_ocr_pipeline(
|
1231
|
+
doc_preprocessor_image,
|
1232
|
+
text_det_limit_side_len=text_det_limit_side_len,
|
1233
|
+
text_det_limit_type=text_det_limit_type,
|
1234
|
+
text_det_thresh=text_det_thresh,
|
1235
|
+
text_det_box_thresh=text_det_box_thresh,
|
1236
|
+
text_det_unclip_ratio=text_det_unclip_ratio,
|
1237
|
+
text_rec_score_thresh=text_rec_score_thresh,
|
1238
|
+
)
|
1239
|
+
)
|
1240
|
+
tbx1, tby1, tbx2, tby2 = (
|
1241
|
+
table_box[0],
|
1242
|
+
table_box[1],
|
1243
|
+
table_box[2],
|
1244
|
+
table_box[3],
|
1245
|
+
)
|
1246
|
+
if table_angle == "90":
|
1247
|
+
new_x1, new_y1 = tby1, img_width - tbx2
|
1248
|
+
new_x2, new_y2 = tby2, img_width - tbx1
|
1249
|
+
elif table_angle == "180":
|
1250
|
+
new_x1, new_y1 = img_width - tbx2, img_height - tby2
|
1251
|
+
new_x2, new_y2 = img_width - tbx1, img_height - tby1
|
1252
|
+
elif table_angle == "270":
|
1253
|
+
new_x1, new_y1 = img_height - tby2, tbx1
|
1254
|
+
new_x2, new_y2 = img_height - tby1, tbx2
|
1255
|
+
table_box = [new_x1, new_y1, new_x2, new_y2]
|
1256
|
+
layout_det_res = {}
|
748
1257
|
single_table_rec_res = self.predict_single_table_recognition_res(
|
749
1258
|
doc_preprocessor_image,
|
750
1259
|
overall_ocr_res,
|
751
1260
|
table_box,
|
752
|
-
use_table_cells_ocr_results,
|
753
1261
|
use_e2e_wired_table_rec_model,
|
754
1262
|
use_e2e_wireless_table_rec_model,
|
1263
|
+
use_wired_table_cells_trans_to_html,
|
1264
|
+
use_wireless_table_cells_trans_to_html,
|
1265
|
+
use_ocr_results_with_table_cells,
|
755
1266
|
flag_find_nei_text=False,
|
756
1267
|
)
|
757
1268
|
single_table_rec_res["table_region_id"] = table_region_id
|
1269
|
+
if use_table_orientation_classify == True and table_angle != "0":
|
1270
|
+
img_height, img_width = doc_preprocessor_image.shape[:2]
|
1271
|
+
single_table_rec_res["cell_box_list"] = (
|
1272
|
+
self.map_cells_to_original_image(
|
1273
|
+
single_table_rec_res["cell_box_list"],
|
1274
|
+
table_angle,
|
1275
|
+
img_width,
|
1276
|
+
img_height,
|
1277
|
+
)
|
1278
|
+
)
|
758
1279
|
table_res_list.append(single_table_rec_res)
|
759
1280
|
table_region_id += 1
|
760
1281
|
else:
|
761
1282
|
if model_settings["use_layout_detection"]:
|
762
1283
|
layout_det_res = next(self.layout_det_model(doc_preprocessor_image))
|
763
|
-
|
1284
|
+
img_height, img_width = doc_preprocessor_image.shape[:2]
|
764
1285
|
for box_info in layout_det_res["boxes"]:
|
765
1286
|
if box_info["label"].lower() in ["table"]:
|
766
|
-
crop_img_info = self._crop_by_boxes(
|
1287
|
+
crop_img_info = self._crop_by_boxes(
|
1288
|
+
doc_preprocessor_image, [box_info]
|
1289
|
+
)
|
767
1290
|
crop_img_info = crop_img_info[0]
|
768
1291
|
table_box = crop_img_info["box"]
|
1292
|
+
if use_table_orientation_classify == True:
|
1293
|
+
doc_preprocessor_image_copy = doc_preprocessor_image.copy()
|
1294
|
+
table_angle = next(
|
1295
|
+
self.table_orientation_classify_model(
|
1296
|
+
crop_img_info["img"]
|
1297
|
+
)
|
1298
|
+
)["label_names"][0]
|
1299
|
+
if table_angle == "90":
|
1300
|
+
crop_img_info["img"] = np.rot90(crop_img_info["img"], k=1)
|
1301
|
+
doc_preprocessor_image_copy = np.rot90(
|
1302
|
+
doc_preprocessor_image_copy, k=1
|
1303
|
+
)
|
1304
|
+
elif table_angle == "180":
|
1305
|
+
crop_img_info["img"] = np.rot90(crop_img_info["img"], k=2)
|
1306
|
+
doc_preprocessor_image_copy = np.rot90(
|
1307
|
+
doc_preprocessor_image_copy, k=2
|
1308
|
+
)
|
1309
|
+
elif table_angle == "270":
|
1310
|
+
crop_img_info["img"] = np.rot90(crop_img_info["img"], k=3)
|
1311
|
+
doc_preprocessor_image_copy = np.rot90(
|
1312
|
+
doc_preprocessor_image_copy, k=3
|
1313
|
+
)
|
1314
|
+
if table_angle in ["90", "180", "270"]:
|
1315
|
+
overall_ocr_res = next(
|
1316
|
+
self.general_ocr_pipeline(
|
1317
|
+
doc_preprocessor_image_copy,
|
1318
|
+
text_det_limit_side_len=text_det_limit_side_len,
|
1319
|
+
text_det_limit_type=text_det_limit_type,
|
1320
|
+
text_det_thresh=text_det_thresh,
|
1321
|
+
text_det_box_thresh=text_det_box_thresh,
|
1322
|
+
text_det_unclip_ratio=text_det_unclip_ratio,
|
1323
|
+
text_rec_score_thresh=text_rec_score_thresh,
|
1324
|
+
)
|
1325
|
+
)
|
1326
|
+
tbx1, tby1, tbx2, tby2 = (
|
1327
|
+
table_box[0],
|
1328
|
+
table_box[1],
|
1329
|
+
table_box[2],
|
1330
|
+
table_box[3],
|
1331
|
+
)
|
1332
|
+
if table_angle == "90":
|
1333
|
+
new_x1, new_y1 = tby1, img_width - tbx2
|
1334
|
+
new_x2, new_y2 = tby2, img_width - tbx1
|
1335
|
+
elif table_angle == "180":
|
1336
|
+
new_x1, new_y1 = img_width - tbx2, img_height - tby2
|
1337
|
+
new_x2, new_y2 = img_width - tbx1, img_height - tby1
|
1338
|
+
elif table_angle == "270":
|
1339
|
+
new_x1, new_y1 = img_height - tby2, tbx1
|
1340
|
+
new_x2, new_y2 = img_height - tby1, tbx2
|
1341
|
+
table_box = [new_x1, new_y1, new_x2, new_y2]
|
769
1342
|
single_table_rec_res = (
|
770
1343
|
self.predict_single_table_recognition_res(
|
771
1344
|
crop_img_info["img"],
|
772
1345
|
overall_ocr_res,
|
773
1346
|
table_box,
|
774
|
-
use_table_cells_ocr_results,
|
775
1347
|
use_e2e_wired_table_rec_model,
|
776
1348
|
use_e2e_wireless_table_rec_model,
|
1349
|
+
use_wired_table_cells_trans_to_html,
|
1350
|
+
use_wireless_table_cells_trans_to_html,
|
1351
|
+
use_ocr_results_with_table_cells,
|
777
1352
|
)
|
778
1353
|
)
|
779
1354
|
single_table_rec_res["table_region_id"] = table_region_id
|
1355
|
+
if (
|
1356
|
+
use_table_orientation_classify == True
|
1357
|
+
and table_angle != "0"
|
1358
|
+
):
|
1359
|
+
img_height_copy, img_width_copy = (
|
1360
|
+
doc_preprocessor_image_copy.shape[:2]
|
1361
|
+
)
|
1362
|
+
single_table_rec_res["cell_box_list"] = (
|
1363
|
+
self.map_cells_to_original_image(
|
1364
|
+
single_table_rec_res["cell_box_list"],
|
1365
|
+
table_angle,
|
1366
|
+
img_width_copy,
|
1367
|
+
img_height_copy,
|
1368
|
+
)
|
1369
|
+
)
|
780
1370
|
table_res_list.append(single_table_rec_res)
|
781
1371
|
table_region_id += 1
|
782
1372
|
|
@@ -789,4 +1379,17 @@ class TableRecognitionPipelineV2(BasePipeline):
|
|
789
1379
|
"table_res_list": table_res_list,
|
790
1380
|
"model_settings": model_settings,
|
791
1381
|
}
|
1382
|
+
|
792
1383
|
yield TableRecognitionResult(single_img_res)
|
1384
|
+
|
1385
|
+
|
1386
|
+
@pipeline_requires_extra("ocr")
|
1387
|
+
class TableRecognitionPipelineV2(AutoParallelImageSimpleInferencePipeline):
|
1388
|
+
entities = ["table_recognition_v2"]
|
1389
|
+
|
1390
|
+
@property
|
1391
|
+
def _pipeline_cls(self):
|
1392
|
+
return _TableRecognitionPipelineV2
|
1393
|
+
|
1394
|
+
def _get_batch_size(self, config):
|
1395
|
+
return 1
|