paddlex 3.0.0rc0__py3-none-any.whl → 3.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- paddlex/.version +1 -1
- paddlex/__init__.py +17 -34
- paddlex/__main__.py +1 -1
- paddlex/configs/modules/chart_parsing/PP-Chart2Table.yaml +13 -0
- paddlex/configs/modules/doc_vlm/PP-DocBee-2B.yaml +14 -0
- paddlex/configs/modules/doc_vlm/PP-DocBee-7B.yaml +14 -0
- paddlex/configs/modules/doc_vlm/PP-DocBee2-3B.yaml +14 -0
- paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-L.yaml +40 -0
- paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-M.yaml +40 -0
- paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-S.yaml +40 -0
- paddlex/configs/modules/layout_detection/PP-DocBlockLayout.yaml +40 -0
- paddlex/configs/modules/layout_detection/PP-DocLayout-L.yaml +2 -2
- paddlex/configs/modules/layout_detection/PP-DocLayout-M.yaml +2 -2
- paddlex/configs/modules/layout_detection/PP-DocLayout-S.yaml +2 -2
- paddlex/configs/modules/layout_detection/PP-DocLayout_plus-L.yaml +40 -0
- paddlex/configs/modules/open_vocabulary_detection/YOLO-Worldv2-L.yaml +13 -0
- paddlex/configs/modules/text_detection/PP-OCRv5_mobile_det.yaml +40 -0
- paddlex/configs/modules/text_detection/PP-OCRv5_server_det.yaml +40 -0
- paddlex/configs/modules/text_recognition/PP-OCRv5_mobile_rec.yaml +39 -0
- paddlex/configs/modules/text_recognition/PP-OCRv5_server_rec.yaml +39 -0
- paddlex/configs/modules/textline_orientation/PP-LCNet_x1_0_textline_ori.yaml +41 -0
- paddlex/configs/pipelines/OCR.yaml +7 -6
- paddlex/configs/pipelines/PP-ChatOCRv3-doc.yaml +3 -1
- paddlex/configs/pipelines/PP-ChatOCRv4-doc.yaml +91 -34
- paddlex/configs/pipelines/PP-StructureV3.yaml +72 -72
- paddlex/configs/pipelines/anomaly_detection.yaml +1 -1
- paddlex/configs/pipelines/doc_understanding.yaml +9 -0
- paddlex/configs/pipelines/formula_recognition.yaml +2 -2
- paddlex/configs/pipelines/layout_parsing.yaml +3 -2
- paddlex/configs/pipelines/seal_recognition.yaml +1 -0
- paddlex/configs/pipelines/table_recognition.yaml +2 -1
- paddlex/configs/pipelines/table_recognition_v2.yaml +7 -1
- paddlex/configs/pipelines/ts_anomaly_detection.yaml +1 -1
- paddlex/configs/pipelines/ts_classification.yaml +1 -1
- paddlex/configs/pipelines/ts_forecast.yaml +1 -1
- paddlex/constants.py +17 -0
- paddlex/engine.py +7 -5
- paddlex/hpip_links.html +23 -11
- paddlex/inference/__init__.py +3 -3
- paddlex/inference/common/__init__.py +1 -1
- paddlex/inference/common/batch_sampler/__init__.py +5 -4
- paddlex/inference/common/batch_sampler/audio_batch_sampler.py +5 -6
- paddlex/inference/common/batch_sampler/base_batch_sampler.py +20 -16
- paddlex/inference/common/batch_sampler/det_3d_batch_sampler.py +4 -7
- paddlex/inference/common/batch_sampler/doc_vlm_batch_sampler.py +87 -0
- paddlex/inference/common/batch_sampler/image_batch_sampler.py +45 -60
- paddlex/inference/common/batch_sampler/ts_batch_sampler.py +9 -10
- paddlex/inference/common/batch_sampler/video_batch_sampler.py +2 -22
- paddlex/inference/common/reader/__init__.py +4 -4
- paddlex/inference/common/reader/audio_reader.py +3 -3
- paddlex/inference/common/reader/det_3d_reader.py +7 -5
- paddlex/inference/common/reader/image_reader.py +16 -12
- paddlex/inference/common/reader/ts_reader.py +3 -2
- paddlex/inference/common/reader/video_reader.py +3 -3
- paddlex/inference/common/result/__init__.py +7 -7
- paddlex/inference/common/result/base_cv_result.py +12 -2
- paddlex/inference/common/result/base_result.py +7 -5
- paddlex/inference/common/result/base_ts_result.py +1 -2
- paddlex/inference/common/result/base_video_result.py +2 -2
- paddlex/inference/common/result/mixin.py +31 -25
- paddlex/inference/models/__init__.py +41 -85
- paddlex/inference/models/anomaly_detection/__init__.py +1 -1
- paddlex/inference/models/anomaly_detection/predictor.py +9 -19
- paddlex/inference/models/anomaly_detection/processors.py +9 -2
- paddlex/inference/models/anomaly_detection/result.py +3 -2
- paddlex/inference/models/base/__init__.py +2 -2
- paddlex/inference/models/base/predictor/__init__.py +1 -2
- paddlex/inference/models/base/predictor/base_predictor.py +278 -39
- paddlex/inference/models/common/__init__.py +6 -15
- paddlex/inference/models/common/static_infer.py +724 -251
- paddlex/inference/models/common/tokenizer/__init__.py +7 -3
- paddlex/inference/models/common/tokenizer/bert_tokenizer.py +1 -1
- paddlex/inference/models/common/tokenizer/clip_tokenizer.py +609 -0
- paddlex/inference/models/common/tokenizer/gpt_tokenizer.py +9 -7
- paddlex/inference/models/common/tokenizer/qwen2_5_tokenizer.py +112 -0
- paddlex/inference/models/common/tokenizer/qwen2_tokenizer.py +438 -0
- paddlex/inference/models/common/tokenizer/qwen_tokenizer.py +288 -0
- paddlex/inference/models/common/tokenizer/tokenizer_utils.py +85 -77
- paddlex/inference/models/common/tokenizer/tokenizer_utils_base.py +339 -123
- paddlex/inference/models/common/tokenizer/utils.py +1 -1
- paddlex/inference/models/common/tokenizer/vocab.py +8 -8
- paddlex/inference/models/common/ts/__init__.py +1 -1
- paddlex/inference/models/common/ts/funcs.py +13 -6
- paddlex/inference/models/common/ts/processors.py +14 -5
- paddlex/inference/models/common/vision/__init__.py +3 -3
- paddlex/inference/models/common/vision/funcs.py +17 -12
- paddlex/inference/models/common/vision/processors.py +61 -46
- paddlex/inference/models/common/vlm/__init__.py +13 -0
- paddlex/inference/models/common/vlm/activations.py +189 -0
- paddlex/inference/models/common/vlm/bert_padding.py +127 -0
- paddlex/inference/models/common/vlm/conversion_utils.py +99 -0
- paddlex/inference/models/common/vlm/distributed.py +229 -0
- paddlex/inference/models/common/vlm/flash_attn_utils.py +119 -0
- paddlex/inference/models/common/vlm/fusion_ops.py +205 -0
- paddlex/inference/models/common/vlm/generation/__init__.py +34 -0
- paddlex/inference/models/common/vlm/generation/configuration_utils.py +533 -0
- paddlex/inference/models/common/vlm/generation/logits_process.py +730 -0
- paddlex/inference/models/common/vlm/generation/stopping_criteria.py +106 -0
- paddlex/inference/models/common/vlm/generation/utils.py +2162 -0
- paddlex/inference/models/common/vlm/transformers/__init__.py +16 -0
- paddlex/inference/models/common/vlm/transformers/configuration_utils.py +1037 -0
- paddlex/inference/models/common/vlm/transformers/conversion_utils.py +408 -0
- paddlex/inference/models/common/vlm/transformers/model_outputs.py +1612 -0
- paddlex/inference/models/common/vlm/transformers/model_utils.py +2014 -0
- paddlex/inference/models/common/vlm/transformers/utils.py +178 -0
- paddlex/inference/models/common/vlm/utils.py +109 -0
- paddlex/inference/models/doc_vlm/__init__.py +15 -0
- paddlex/inference/models/doc_vlm/modeling/GOT_ocr_2_0.py +830 -0
- paddlex/inference/models/doc_vlm/modeling/__init__.py +17 -0
- paddlex/inference/models/doc_vlm/modeling/qwen2.py +1606 -0
- paddlex/inference/models/doc_vlm/modeling/qwen2_5_vl.py +3006 -0
- paddlex/inference/models/doc_vlm/modeling/qwen2_vl.py +2495 -0
- paddlex/inference/models/doc_vlm/predictor.py +253 -0
- paddlex/inference/models/doc_vlm/processors/GOT_ocr_2_0.py +97 -0
- paddlex/inference/models/doc_vlm/processors/__init__.py +17 -0
- paddlex/inference/models/doc_vlm/processors/common.py +561 -0
- paddlex/inference/models/doc_vlm/processors/qwen2_5_vl.py +548 -0
- paddlex/inference/models/doc_vlm/processors/qwen2_vl.py +543 -0
- paddlex/inference/models/doc_vlm/result.py +21 -0
- paddlex/inference/models/face_feature/__init__.py +1 -1
- paddlex/inference/models/face_feature/predictor.py +2 -1
- paddlex/inference/models/formula_recognition/__init__.py +1 -1
- paddlex/inference/models/formula_recognition/predictor.py +18 -28
- paddlex/inference/models/formula_recognition/processors.py +126 -97
- paddlex/inference/models/formula_recognition/result.py +43 -35
- paddlex/inference/models/image_classification/__init__.py +1 -1
- paddlex/inference/models/image_classification/predictor.py +9 -19
- paddlex/inference/models/image_classification/processors.py +4 -2
- paddlex/inference/models/image_classification/result.py +4 -3
- paddlex/inference/models/image_feature/__init__.py +1 -1
- paddlex/inference/models/image_feature/predictor.py +9 -19
- paddlex/inference/models/image_feature/processors.py +7 -5
- paddlex/inference/models/image_feature/result.py +2 -3
- paddlex/inference/models/image_multilabel_classification/__init__.py +1 -1
- paddlex/inference/models/image_multilabel_classification/predictor.py +7 -6
- paddlex/inference/models/image_multilabel_classification/processors.py +6 -2
- paddlex/inference/models/image_multilabel_classification/result.py +4 -3
- paddlex/inference/models/image_unwarping/__init__.py +1 -1
- paddlex/inference/models/image_unwarping/predictor.py +8 -16
- paddlex/inference/models/image_unwarping/processors.py +6 -2
- paddlex/inference/models/image_unwarping/result.py +4 -2
- paddlex/inference/models/instance_segmentation/__init__.py +1 -1
- paddlex/inference/models/instance_segmentation/predictor.py +7 -15
- paddlex/inference/models/instance_segmentation/processors.py +4 -7
- paddlex/inference/models/instance_segmentation/result.py +11 -10
- paddlex/inference/models/keypoint_detection/__init__.py +1 -1
- paddlex/inference/models/keypoint_detection/predictor.py +5 -3
- paddlex/inference/models/keypoint_detection/processors.py +11 -3
- paddlex/inference/models/keypoint_detection/result.py +9 -4
- paddlex/inference/models/{3d_bev_detection → m_3d_bev_detection}/__init__.py +1 -1
- paddlex/inference/models/{3d_bev_detection → m_3d_bev_detection}/predictor.py +15 -26
- paddlex/inference/models/{3d_bev_detection → m_3d_bev_detection}/processors.py +26 -14
- paddlex/inference/models/{3d_bev_detection → m_3d_bev_detection}/result.py +15 -12
- paddlex/inference/models/{3d_bev_detection → m_3d_bev_detection}/visualizer_3d.py +77 -39
- paddlex/inference/models/multilingual_speech_recognition/__init__.py +1 -1
- paddlex/inference/models/multilingual_speech_recognition/predictor.py +11 -15
- paddlex/inference/models/multilingual_speech_recognition/processors.py +45 -53
- paddlex/inference/models/multilingual_speech_recognition/result.py +1 -1
- paddlex/inference/models/object_detection/__init__.py +1 -1
- paddlex/inference/models/object_detection/predictor.py +8 -12
- paddlex/inference/models/object_detection/processors.py +63 -33
- paddlex/inference/models/object_detection/result.py +5 -4
- paddlex/inference/models/object_detection/utils.py +3 -1
- paddlex/inference/models/open_vocabulary_detection/__init__.py +1 -1
- paddlex/inference/models/open_vocabulary_detection/predictor.py +31 -14
- paddlex/inference/models/open_vocabulary_detection/processors/__init__.py +3 -2
- paddlex/inference/models/open_vocabulary_detection/processors/common.py +114 -0
- paddlex/inference/models/open_vocabulary_detection/processors/groundingdino_processors.py +19 -8
- paddlex/inference/models/open_vocabulary_detection/processors/yoloworld_processors.py +209 -0
- paddlex/inference/models/open_vocabulary_segmentation/__init__.py +1 -1
- paddlex/inference/models/open_vocabulary_segmentation/predictor.py +6 -13
- paddlex/inference/models/open_vocabulary_segmentation/processors/__init__.py +1 -1
- paddlex/inference/models/open_vocabulary_segmentation/processors/sam_processer.py +12 -12
- paddlex/inference/models/open_vocabulary_segmentation/results/__init__.py +1 -1
- paddlex/inference/models/open_vocabulary_segmentation/results/sam_result.py +11 -9
- paddlex/inference/models/semantic_segmentation/__init__.py +1 -1
- paddlex/inference/models/semantic_segmentation/predictor.py +9 -18
- paddlex/inference/models/semantic_segmentation/processors.py +11 -8
- paddlex/inference/models/semantic_segmentation/result.py +4 -3
- paddlex/inference/models/table_structure_recognition/__init__.py +1 -1
- paddlex/inference/models/table_structure_recognition/predictor.py +8 -18
- paddlex/inference/models/table_structure_recognition/processors.py +23 -29
- paddlex/inference/models/table_structure_recognition/result.py +8 -15
- paddlex/inference/models/text_detection/__init__.py +1 -1
- paddlex/inference/models/text_detection/predictor.py +24 -24
- paddlex/inference/models/text_detection/processors.py +116 -44
- paddlex/inference/models/text_detection/result.py +8 -13
- paddlex/inference/models/text_recognition/__init__.py +1 -1
- paddlex/inference/models/text_recognition/predictor.py +11 -19
- paddlex/inference/models/text_recognition/processors.py +27 -13
- paddlex/inference/models/text_recognition/result.py +3 -2
- paddlex/inference/models/ts_anomaly_detection/__init__.py +1 -1
- paddlex/inference/models/ts_anomaly_detection/predictor.py +12 -17
- paddlex/inference/models/ts_anomaly_detection/processors.py +6 -2
- paddlex/inference/models/ts_anomaly_detection/result.py +21 -10
- paddlex/inference/models/ts_classification/__init__.py +1 -1
- paddlex/inference/models/ts_classification/predictor.py +14 -27
- paddlex/inference/models/ts_classification/processors.py +7 -2
- paddlex/inference/models/ts_classification/result.py +21 -12
- paddlex/inference/models/ts_forecasting/__init__.py +1 -1
- paddlex/inference/models/ts_forecasting/predictor.py +13 -18
- paddlex/inference/models/ts_forecasting/processors.py +12 -3
- paddlex/inference/models/ts_forecasting/result.py +24 -11
- paddlex/inference/models/video_classification/__init__.py +1 -1
- paddlex/inference/models/video_classification/predictor.py +9 -15
- paddlex/inference/models/video_classification/processors.py +24 -24
- paddlex/inference/models/video_classification/result.py +7 -3
- paddlex/inference/models/video_detection/__init__.py +1 -1
- paddlex/inference/models/video_detection/predictor.py +8 -15
- paddlex/inference/models/video_detection/processors.py +24 -11
- paddlex/inference/models/video_detection/result.py +10 -5
- paddlex/inference/pipelines/__init__.py +48 -37
- paddlex/inference/pipelines/_parallel.py +172 -0
- paddlex/inference/pipelines/anomaly_detection/__init__.py +1 -1
- paddlex/inference/pipelines/anomaly_detection/pipeline.py +29 -9
- paddlex/inference/pipelines/attribute_recognition/__init__.py +1 -1
- paddlex/inference/pipelines/attribute_recognition/pipeline.py +24 -9
- paddlex/inference/pipelines/attribute_recognition/result.py +10 -8
- paddlex/inference/pipelines/base.py +43 -13
- paddlex/inference/pipelines/components/__init__.py +14 -8
- paddlex/inference/pipelines/components/chat_server/__init__.py +1 -1
- paddlex/inference/pipelines/components/chat_server/base.py +2 -2
- paddlex/inference/pipelines/components/chat_server/openai_bot_chat.py +8 -8
- paddlex/inference/pipelines/components/common/__init__.py +5 -4
- paddlex/inference/pipelines/components/common/base_operator.py +2 -1
- paddlex/inference/pipelines/components/common/base_result.py +3 -2
- paddlex/inference/pipelines/components/common/convert_points_and_boxes.py +1 -2
- paddlex/inference/pipelines/components/common/crop_image_regions.py +11 -5
- paddlex/inference/pipelines/components/common/seal_det_warp.py +44 -13
- paddlex/inference/pipelines/components/common/sort_boxes.py +4 -2
- paddlex/inference/pipelines/components/common/warp_image.py +50 -0
- paddlex/inference/pipelines/components/faisser.py +10 -5
- paddlex/inference/pipelines/components/prompt_engineering/__init__.py +2 -2
- paddlex/inference/pipelines/components/prompt_engineering/base.py +2 -2
- paddlex/inference/pipelines/components/prompt_engineering/generate_ensemble_prompt.py +2 -1
- paddlex/inference/pipelines/components/prompt_engineering/generate_kie_prompt.py +2 -2
- paddlex/inference/pipelines/components/retriever/__init__.py +2 -2
- paddlex/inference/pipelines/components/retriever/base.py +18 -16
- paddlex/inference/pipelines/components/retriever/openai_bot_retriever.py +2 -2
- paddlex/inference/pipelines/components/retriever/qianfan_bot_retriever.py +87 -84
- paddlex/inference/pipelines/components/utils/__init__.py +1 -1
- paddlex/inference/pipelines/components/utils/mixin.py +7 -7
- paddlex/inference/pipelines/doc_preprocessor/__init__.py +1 -1
- paddlex/inference/pipelines/doc_preprocessor/pipeline.py +70 -51
- paddlex/inference/pipelines/doc_preprocessor/result.py +5 -10
- paddlex/inference/pipelines/doc_understanding/__init__.py +15 -0
- paddlex/inference/pipelines/doc_understanding/pipeline.py +71 -0
- paddlex/inference/pipelines/face_recognition/__init__.py +1 -1
- paddlex/inference/pipelines/face_recognition/pipeline.py +3 -1
- paddlex/inference/pipelines/face_recognition/result.py +3 -2
- paddlex/inference/pipelines/formula_recognition/__init__.py +1 -1
- paddlex/inference/pipelines/formula_recognition/pipeline.py +137 -93
- paddlex/inference/pipelines/formula_recognition/result.py +20 -29
- paddlex/inference/pipelines/image_classification/__init__.py +1 -1
- paddlex/inference/pipelines/image_classification/pipeline.py +30 -11
- paddlex/inference/pipelines/image_multilabel_classification/__init__.py +1 -1
- paddlex/inference/pipelines/image_multilabel_classification/pipeline.py +31 -12
- paddlex/inference/pipelines/instance_segmentation/__init__.py +1 -1
- paddlex/inference/pipelines/instance_segmentation/pipeline.py +30 -9
- paddlex/inference/pipelines/keypoint_detection/__init__.py +1 -1
- paddlex/inference/pipelines/keypoint_detection/pipeline.py +30 -9
- paddlex/inference/pipelines/layout_parsing/__init__.py +1 -1
- paddlex/inference/pipelines/layout_parsing/pipeline.py +54 -56
- paddlex/inference/pipelines/layout_parsing/pipeline_v2.py +904 -261
- paddlex/inference/pipelines/layout_parsing/result.py +9 -21
- paddlex/inference/pipelines/layout_parsing/result_v2.py +525 -250
- paddlex/inference/pipelines/layout_parsing/setting.py +87 -0
- paddlex/inference/pipelines/layout_parsing/utils.py +570 -2004
- paddlex/inference/pipelines/layout_parsing/xycut_enhanced/__init__.py +16 -0
- paddlex/inference/pipelines/layout_parsing/xycut_enhanced/utils.py +1144 -0
- paddlex/inference/pipelines/layout_parsing/xycut_enhanced/xycuts.py +563 -0
- paddlex/inference/pipelines/{3d_bev_detection → m_3d_bev_detection}/__init__.py +1 -1
- paddlex/inference/pipelines/{3d_bev_detection → m_3d_bev_detection}/pipeline.py +17 -10
- paddlex/inference/pipelines/multilingual_speech_recognition/__init__.py +1 -1
- paddlex/inference/pipelines/multilingual_speech_recognition/pipeline.py +17 -6
- paddlex/inference/pipelines/object_detection/__init__.py +1 -1
- paddlex/inference/pipelines/object_detection/pipeline.py +29 -9
- paddlex/inference/pipelines/ocr/__init__.py +1 -1
- paddlex/inference/pipelines/ocr/pipeline.py +151 -77
- paddlex/inference/pipelines/ocr/result.py +31 -24
- paddlex/inference/pipelines/open_vocabulary_detection/__init__.py +1 -1
- paddlex/inference/pipelines/open_vocabulary_detection/pipeline.py +17 -6
- paddlex/inference/pipelines/open_vocabulary_segmentation/__init__.py +1 -1
- paddlex/inference/pipelines/open_vocabulary_segmentation/pipeline.py +17 -6
- paddlex/inference/pipelines/pp_chatocr/__init__.py +1 -1
- paddlex/inference/pipelines/pp_chatocr/pipeline_base.py +14 -5
- paddlex/inference/pipelines/pp_chatocr/pipeline_v3.py +22 -14
- paddlex/inference/pipelines/pp_chatocr/pipeline_v4.py +34 -16
- paddlex/inference/pipelines/pp_shitu_v2/__init__.py +1 -1
- paddlex/inference/pipelines/pp_shitu_v2/pipeline.py +12 -8
- paddlex/inference/pipelines/pp_shitu_v2/result.py +4 -4
- paddlex/inference/pipelines/rotated_object_detection/__init__.py +1 -1
- paddlex/inference/pipelines/rotated_object_detection/pipeline.py +30 -9
- paddlex/inference/pipelines/seal_recognition/__init__.py +1 -1
- paddlex/inference/pipelines/seal_recognition/pipeline.py +127 -63
- paddlex/inference/pipelines/seal_recognition/result.py +4 -2
- paddlex/inference/pipelines/semantic_segmentation/__init__.py +1 -1
- paddlex/inference/pipelines/semantic_segmentation/pipeline.py +30 -9
- paddlex/inference/pipelines/small_object_detection/__init__.py +1 -1
- paddlex/inference/pipelines/small_object_detection/pipeline.py +30 -9
- paddlex/inference/pipelines/table_recognition/__init__.py +1 -1
- paddlex/inference/pipelines/table_recognition/pipeline.py +61 -37
- paddlex/inference/pipelines/table_recognition/pipeline_v2.py +668 -65
- paddlex/inference/pipelines/table_recognition/result.py +12 -10
- paddlex/inference/pipelines/table_recognition/table_recognition_post_processing.py +12 -8
- paddlex/inference/pipelines/table_recognition/table_recognition_post_processing_v2.py +55 -37
- paddlex/inference/pipelines/table_recognition/utils.py +1 -1
- paddlex/inference/pipelines/ts_anomaly_detection/__init__.py +1 -1
- paddlex/inference/pipelines/ts_anomaly_detection/pipeline.py +16 -6
- paddlex/inference/pipelines/ts_classification/__init__.py +1 -1
- paddlex/inference/pipelines/ts_classification/pipeline.py +16 -6
- paddlex/inference/pipelines/ts_forecasting/__init__.py +1 -1
- paddlex/inference/pipelines/ts_forecasting/pipeline.py +16 -6
- paddlex/inference/pipelines/video_classification/__init__.py +1 -1
- paddlex/inference/pipelines/video_classification/pipeline.py +17 -6
- paddlex/inference/pipelines/video_detection/__init__.py +1 -1
- paddlex/inference/pipelines/video_detection/pipeline.py +20 -7
- paddlex/inference/serving/__init__.py +5 -1
- paddlex/inference/serving/basic_serving/__init__.py +1 -1
- paddlex/inference/serving/basic_serving/_app.py +31 -19
- paddlex/inference/serving/basic_serving/_pipeline_apps/__init__.py +7 -4
- paddlex/inference/serving/basic_serving/_pipeline_apps/_common/__init__.py +1 -1
- paddlex/inference/serving/basic_serving/_pipeline_apps/_common/common.py +12 -4
- paddlex/inference/serving/basic_serving/_pipeline_apps/_common/image_recognition.py +1 -1
- paddlex/inference/serving/basic_serving/_pipeline_apps/_common/ocr.py +7 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/anomaly_detection.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/doc_preprocessor.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/doc_understanding.py +153 -0
- paddlex/inference/serving/basic_serving/_pipeline_apps/face_recognition.py +16 -13
- paddlex/inference/serving/basic_serving/_pipeline_apps/formula_recognition.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/human_keypoint_detection.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/image_classification.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/image_multilabel_classification.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/instance_segmentation.py +13 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/layout_parsing.py +10 -8
- paddlex/inference/serving/basic_serving/_pipeline_apps/m_3d_bev_detection.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/multilingual_speech_recognition.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/object_detection.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/ocr.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/open_vocabulary_detection.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/open_vocabulary_segmentation.py +13 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/pedestrian_attribute_recognition.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv3_doc.py +14 -12
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv4_doc.py +17 -14
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_shituv2.py +16 -13
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_structurev3.py +16 -9
- paddlex/inference/serving/basic_serving/_pipeline_apps/rotated_object_detection.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/seal_recognition.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/semantic_segmentation.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/small_object_detection.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition.py +11 -12
- paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition_v2.py +14 -12
- paddlex/inference/serving/basic_serving/_pipeline_apps/ts_anomaly_detection.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/ts_classification.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/ts_forecast.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/vehicle_attribute_recognition.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/video_classification.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/video_detection.py +10 -7
- paddlex/inference/serving/basic_serving/_server.py +9 -4
- paddlex/inference/serving/infra/__init__.py +1 -1
- paddlex/inference/serving/infra/config.py +1 -1
- paddlex/inference/serving/infra/models.py +13 -6
- paddlex/inference/serving/infra/storage.py +9 -4
- paddlex/inference/serving/infra/utils.py +54 -28
- paddlex/inference/serving/schemas/__init__.py +1 -1
- paddlex/inference/serving/schemas/anomaly_detection.py +1 -1
- paddlex/inference/serving/schemas/doc_preprocessor.py +1 -1
- paddlex/inference/serving/schemas/doc_understanding.py +78 -0
- paddlex/inference/serving/schemas/face_recognition.py +1 -1
- paddlex/inference/serving/schemas/formula_recognition.py +2 -2
- paddlex/inference/serving/schemas/human_keypoint_detection.py +1 -1
- paddlex/inference/serving/schemas/image_classification.py +1 -1
- paddlex/inference/serving/schemas/image_multilabel_classification.py +1 -1
- paddlex/inference/serving/schemas/instance_segmentation.py +1 -1
- paddlex/inference/serving/schemas/layout_parsing.py +2 -3
- paddlex/inference/serving/schemas/m_3d_bev_detection.py +1 -1
- paddlex/inference/serving/schemas/multilingual_speech_recognition.py +1 -1
- paddlex/inference/serving/schemas/object_detection.py +1 -1
- paddlex/inference/serving/schemas/ocr.py +1 -1
- paddlex/inference/serving/schemas/open_vocabulary_detection.py +1 -1
- paddlex/inference/serving/schemas/open_vocabulary_segmentation.py +1 -1
- paddlex/inference/serving/schemas/pedestrian_attribute_recognition.py +1 -1
- paddlex/inference/serving/schemas/pp_chatocrv3_doc.py +2 -3
- paddlex/inference/serving/schemas/pp_chatocrv4_doc.py +3 -3
- paddlex/inference/serving/schemas/pp_shituv2.py +1 -1
- paddlex/inference/serving/schemas/pp_structurev3.py +11 -7
- paddlex/inference/serving/schemas/rotated_object_detection.py +1 -1
- paddlex/inference/serving/schemas/seal_recognition.py +2 -2
- paddlex/inference/serving/schemas/semantic_segmentation.py +1 -1
- paddlex/inference/serving/schemas/shared/__init__.py +1 -1
- paddlex/inference/serving/schemas/shared/classification.py +1 -1
- paddlex/inference/serving/schemas/shared/image_segmentation.py +1 -1
- paddlex/inference/serving/schemas/shared/object_detection.py +1 -1
- paddlex/inference/serving/schemas/shared/ocr.py +1 -1
- paddlex/inference/serving/schemas/small_object_detection.py +1 -1
- paddlex/inference/serving/schemas/table_recognition.py +3 -7
- paddlex/inference/serving/schemas/table_recognition_v2.py +6 -7
- paddlex/inference/serving/schemas/ts_anomaly_detection.py +1 -1
- paddlex/inference/serving/schemas/ts_classification.py +1 -1
- paddlex/inference/serving/schemas/ts_forecast.py +1 -1
- paddlex/inference/serving/schemas/vehicle_attribute_recognition.py +1 -1
- paddlex/inference/serving/schemas/video_classification.py +1 -1
- paddlex/inference/serving/schemas/video_detection.py +1 -1
- paddlex/inference/utils/__init__.py +1 -1
- paddlex/inference/utils/benchmark.py +332 -179
- paddlex/inference/utils/color_map.py +1 -1
- paddlex/inference/utils/get_pipeline_path.py +1 -1
- paddlex/inference/utils/hpi.py +258 -0
- paddlex/inference/utils/hpi_model_info_collection.json +2331 -0
- paddlex/inference/utils/io/__init__.py +11 -11
- paddlex/inference/utils/io/readers.py +31 -27
- paddlex/inference/utils/io/style.py +21 -14
- paddlex/inference/utils/io/tablepyxl.py +13 -5
- paddlex/inference/utils/io/writers.py +9 -10
- paddlex/inference/utils/mkldnn_blocklist.py +25 -0
- paddlex/inference/utils/model_paths.py +48 -0
- paddlex/inference/utils/{new_ir_blacklist.py → new_ir_blocklist.py} +1 -2
- paddlex/inference/utils/official_models.py +278 -262
- paddlex/inference/utils/pp_option.py +184 -92
- paddlex/inference/utils/trt_blocklist.py +43 -0
- paddlex/inference/utils/trt_config.py +420 -0
- paddlex/model.py +30 -12
- paddlex/modules/__init__.py +57 -80
- paddlex/modules/anomaly_detection/__init__.py +2 -2
- paddlex/modules/anomaly_detection/dataset_checker/__init__.py +2 -3
- paddlex/modules/anomaly_detection/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/anomaly_detection/dataset_checker/dataset_src/analyse_dataset.py +6 -3
- paddlex/modules/anomaly_detection/dataset_checker/dataset_src/check_dataset.py +8 -4
- paddlex/modules/anomaly_detection/dataset_checker/dataset_src/convert_dataset.py +7 -4
- paddlex/modules/anomaly_detection/dataset_checker/dataset_src/split_dataset.py +2 -2
- paddlex/modules/anomaly_detection/dataset_checker/dataset_src/utils/__init__.py +1 -1
- paddlex/modules/anomaly_detection/dataset_checker/dataset_src/utils/visualizer.py +7 -2
- paddlex/modules/anomaly_detection/evaluator.py +3 -3
- paddlex/modules/anomaly_detection/exportor.py +1 -1
- paddlex/modules/anomaly_detection/model_list.py +1 -1
- paddlex/modules/anomaly_detection/trainer.py +3 -4
- paddlex/modules/base/__init__.py +5 -5
- paddlex/modules/base/build_model.py +1 -2
- paddlex/modules/base/dataset_checker/__init__.py +2 -2
- paddlex/modules/base/dataset_checker/dataset_checker.py +4 -4
- paddlex/modules/base/dataset_checker/utils.py +1 -3
- paddlex/modules/base/evaluator.py +13 -13
- paddlex/modules/base/exportor.py +12 -13
- paddlex/modules/base/trainer.py +21 -11
- paddlex/modules/base/utils/__init__.py +13 -0
- paddlex/modules/base/utils/cinn_setting.py +89 -0
- paddlex/modules/base/utils/coco_eval.py +94 -0
- paddlex/modules/base/utils/topk_eval.py +118 -0
- paddlex/modules/doc_vlm/__init__.py +18 -0
- paddlex/modules/doc_vlm/dataset_checker.py +29 -0
- paddlex/modules/doc_vlm/evaluator.py +29 -0
- paddlex/modules/doc_vlm/exportor.py +29 -0
- paddlex/modules/doc_vlm/model_list.py +16 -0
- paddlex/modules/doc_vlm/trainer.py +41 -0
- paddlex/modules/face_recognition/__init__.py +2 -2
- paddlex/modules/face_recognition/dataset_checker/__init__.py +2 -2
- paddlex/modules/face_recognition/dataset_checker/dataset_src/__init__.py +1 -1
- paddlex/modules/face_recognition/dataset_checker/dataset_src/check_dataset.py +3 -5
- paddlex/modules/face_recognition/dataset_checker/dataset_src/utils/__init__.py +1 -1
- paddlex/modules/face_recognition/dataset_checker/dataset_src/utils/visualizer.py +2 -5
- paddlex/modules/face_recognition/evaluator.py +3 -3
- paddlex/modules/face_recognition/exportor.py +1 -1
- paddlex/modules/face_recognition/model_list.py +1 -1
- paddlex/modules/face_recognition/trainer.py +1 -1
- paddlex/modules/formula_recognition/__init__.py +2 -2
- paddlex/modules/formula_recognition/dataset_checker/__init__.py +3 -3
- paddlex/modules/formula_recognition/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/formula_recognition/dataset_checker/dataset_src/analyse_dataset.py +13 -12
- paddlex/modules/formula_recognition/dataset_checker/dataset_src/check_dataset.py +2 -6
- paddlex/modules/formula_recognition/dataset_checker/dataset_src/convert_dataset.py +11 -10
- paddlex/modules/formula_recognition/dataset_checker/dataset_src/split_dataset.py +1 -2
- paddlex/modules/formula_recognition/evaluator.py +6 -3
- paddlex/modules/formula_recognition/exportor.py +1 -1
- paddlex/modules/formula_recognition/model_list.py +4 -1
- paddlex/modules/formula_recognition/trainer.py +5 -3
- paddlex/modules/general_recognition/__init__.py +2 -2
- paddlex/modules/general_recognition/dataset_checker/__init__.py +2 -2
- paddlex/modules/general_recognition/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/general_recognition/dataset_checker/dataset_src/analyse_dataset.py +7 -9
- paddlex/modules/general_recognition/dataset_checker/dataset_src/check_dataset.py +4 -5
- paddlex/modules/general_recognition/dataset_checker/dataset_src/convert_dataset.py +6 -5
- paddlex/modules/general_recognition/dataset_checker/dataset_src/split_dataset.py +1 -1
- paddlex/modules/general_recognition/dataset_checker/dataset_src/utils/__init__.py +1 -1
- paddlex/modules/general_recognition/dataset_checker/dataset_src/utils/visualizer.py +2 -5
- paddlex/modules/general_recognition/evaluator.py +2 -2
- paddlex/modules/general_recognition/exportor.py +1 -1
- paddlex/modules/general_recognition/model_list.py +1 -1
- paddlex/modules/general_recognition/trainer.py +1 -1
- paddlex/modules/image_classification/__init__.py +2 -2
- paddlex/modules/image_classification/dataset_checker/__init__.py +2 -2
- paddlex/modules/image_classification/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/image_classification/dataset_checker/dataset_src/analyse_dataset.py +8 -9
- paddlex/modules/image_classification/dataset_checker/dataset_src/check_dataset.py +4 -3
- paddlex/modules/image_classification/dataset_checker/dataset_src/convert_dataset.py +4 -4
- paddlex/modules/image_classification/dataset_checker/dataset_src/split_dataset.py +1 -1
- paddlex/modules/image_classification/dataset_checker/dataset_src/utils/__init__.py +1 -1
- paddlex/modules/image_classification/dataset_checker/dataset_src/utils/visualizer.py +2 -5
- paddlex/modules/image_classification/evaluator.py +3 -3
- paddlex/modules/image_classification/exportor.py +1 -1
- paddlex/modules/image_classification/model_list.py +2 -1
- paddlex/modules/image_classification/trainer.py +3 -3
- paddlex/modules/image_unwarping/__init__.py +1 -1
- paddlex/modules/image_unwarping/model_list.py +1 -1
- paddlex/modules/instance_segmentation/__init__.py +2 -2
- paddlex/modules/instance_segmentation/dataset_checker/__init__.py +2 -3
- paddlex/modules/instance_segmentation/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/instance_segmentation/dataset_checker/dataset_src/analyse_dataset.py +9 -5
- paddlex/modules/instance_segmentation/dataset_checker/dataset_src/check_dataset.py +8 -5
- paddlex/modules/instance_segmentation/dataset_checker/dataset_src/convert_dataset.py +8 -8
- paddlex/modules/instance_segmentation/dataset_checker/dataset_src/split_dataset.py +7 -4
- paddlex/modules/instance_segmentation/dataset_checker/dataset_src/utils/__init__.py +1 -1
- paddlex/modules/instance_segmentation/dataset_checker/dataset_src/utils/visualizer.py +10 -8
- paddlex/modules/instance_segmentation/evaluator.py +2 -2
- paddlex/modules/instance_segmentation/exportor.py +1 -1
- paddlex/modules/instance_segmentation/model_list.py +1 -1
- paddlex/modules/instance_segmentation/trainer.py +1 -1
- paddlex/modules/keypoint_detection/__init__.py +2 -2
- paddlex/modules/keypoint_detection/dataset_checker/__init__.py +2 -2
- paddlex/modules/keypoint_detection/dataset_checker/dataset_src/__init__.py +1 -1
- paddlex/modules/keypoint_detection/dataset_checker/dataset_src/check_dataset.py +10 -5
- paddlex/modules/keypoint_detection/dataset_checker/dataset_src/utils/__init__.py +1 -1
- paddlex/modules/keypoint_detection/dataset_checker/dataset_src/utils/visualizer.py +8 -3
- paddlex/modules/keypoint_detection/evaluator.py +2 -2
- paddlex/modules/keypoint_detection/exportor.py +1 -1
- paddlex/modules/keypoint_detection/model_list.py +1 -1
- paddlex/modules/keypoint_detection/trainer.py +2 -2
- paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/__init__.py +2 -2
- paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/dataset_checker/__init__.py +3 -3
- paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/dataset_checker/dataset_src/analyse_dataset.py +8 -8
- paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/dataset_checker/dataset_src/check_dataset.py +1 -2
- paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/evaluator.py +3 -3
- paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/exportor.py +1 -1
- paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/model_list.py +1 -1
- paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/trainer.py +5 -7
- paddlex/modules/multilabel_classification/__init__.py +2 -2
- paddlex/modules/multilabel_classification/dataset_checker/__init__.py +2 -2
- paddlex/modules/multilabel_classification/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/multilabel_classification/dataset_checker/dataset_src/analyse_dataset.py +8 -9
- paddlex/modules/multilabel_classification/dataset_checker/dataset_src/check_dataset.py +4 -3
- paddlex/modules/multilabel_classification/dataset_checker/dataset_src/convert_dataset.py +10 -7
- paddlex/modules/multilabel_classification/dataset_checker/dataset_src/split_dataset.py +1 -1
- paddlex/modules/multilabel_classification/dataset_checker/dataset_src/utils/__init__.py +1 -1
- paddlex/modules/multilabel_classification/dataset_checker/dataset_src/utils/visualizer.py +1 -5
- paddlex/modules/multilabel_classification/evaluator.py +3 -3
- paddlex/modules/multilabel_classification/exportor.py +1 -1
- paddlex/modules/multilabel_classification/model_list.py +1 -1
- paddlex/modules/multilabel_classification/trainer.py +3 -3
- paddlex/modules/multilingual_speech_recognition/__init__.py +2 -2
- paddlex/modules/multilingual_speech_recognition/dataset_checker.py +3 -3
- paddlex/modules/multilingual_speech_recognition/evaluator.py +3 -3
- paddlex/modules/multilingual_speech_recognition/exportor.py +3 -3
- paddlex/modules/multilingual_speech_recognition/model_list.py +1 -1
- paddlex/modules/multilingual_speech_recognition/trainer.py +7 -5
- paddlex/modules/object_detection/__init__.py +2 -2
- paddlex/modules/object_detection/dataset_checker/__init__.py +2 -11
- paddlex/modules/object_detection/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/object_detection/dataset_checker/dataset_src/analyse_dataset.py +10 -8
- paddlex/modules/object_detection/dataset_checker/dataset_src/check_dataset.py +10 -5
- paddlex/modules/object_detection/dataset_checker/dataset_src/convert_dataset.py +17 -12
- paddlex/modules/object_detection/dataset_checker/dataset_src/split_dataset.py +8 -4
- paddlex/modules/object_detection/dataset_checker/dataset_src/utils/__init__.py +1 -1
- paddlex/modules/object_detection/dataset_checker/dataset_src/utils/visualizer.py +9 -8
- paddlex/modules/object_detection/evaluator.py +11 -6
- paddlex/modules/object_detection/exportor.py +1 -1
- paddlex/modules/object_detection/model_list.py +3 -1
- paddlex/modules/object_detection/trainer.py +4 -5
- paddlex/modules/open_vocabulary_detection/__init__.py +2 -2
- paddlex/modules/open_vocabulary_detection/dataset_checker.py +3 -3
- paddlex/modules/open_vocabulary_detection/evaluator.py +3 -3
- paddlex/modules/open_vocabulary_detection/exportor.py +3 -3
- paddlex/modules/open_vocabulary_detection/model_list.py +2 -4
- paddlex/modules/open_vocabulary_detection/trainer.py +7 -5
- paddlex/modules/open_vocabulary_segmentation/__init__.py +2 -2
- paddlex/modules/open_vocabulary_segmentation/dataset_checker.py +3 -3
- paddlex/modules/open_vocabulary_segmentation/evaluator.py +3 -3
- paddlex/modules/open_vocabulary_segmentation/exportor.py +3 -3
- paddlex/modules/open_vocabulary_segmentation/model_list.py +1 -1
- paddlex/modules/open_vocabulary_segmentation/trainer.py +7 -5
- paddlex/modules/semantic_segmentation/__init__.py +2 -2
- paddlex/modules/semantic_segmentation/dataset_checker/__init__.py +2 -3
- paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/analyse_dataset.py +6 -3
- paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/check_dataset.py +2 -2
- paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/convert_dataset.py +7 -4
- paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/split_dataset.py +2 -2
- paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/utils/__init__.py +1 -1
- paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/utils/visualizer.py +6 -2
- paddlex/modules/semantic_segmentation/evaluator.py +3 -3
- paddlex/modules/semantic_segmentation/exportor.py +1 -1
- paddlex/modules/semantic_segmentation/model_list.py +1 -1
- paddlex/modules/semantic_segmentation/trainer.py +3 -4
- paddlex/modules/table_recognition/__init__.py +2 -2
- paddlex/modules/table_recognition/dataset_checker/__init__.py +5 -5
- paddlex/modules/table_recognition/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/table_recognition/dataset_checker/dataset_src/analyse_dataset.py +3 -2
- paddlex/modules/table_recognition/dataset_checker/dataset_src/check_dataset.py +8 -7
- paddlex/modules/table_recognition/dataset_checker/dataset_src/split_dataset.py +2 -1
- paddlex/modules/table_recognition/evaluator.py +3 -3
- paddlex/modules/table_recognition/exportor.py +1 -1
- paddlex/modules/table_recognition/model_list.py +1 -1
- paddlex/modules/table_recognition/trainer.py +2 -5
- paddlex/modules/text_detection/__init__.py +2 -2
- paddlex/modules/text_detection/dataset_checker/__init__.py +4 -6
- paddlex/modules/text_detection/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/text_detection/dataset_checker/dataset_src/analyse_dataset.py +12 -9
- paddlex/modules/text_detection/dataset_checker/dataset_src/check_dataset.py +3 -3
- paddlex/modules/text_detection/dataset_checker/dataset_src/split_dataset.py +3 -3
- paddlex/modules/text_detection/evaluator.py +3 -3
- paddlex/modules/text_detection/exportor.py +1 -1
- paddlex/modules/text_detection/model_list.py +3 -1
- paddlex/modules/text_detection/trainer.py +2 -5
- paddlex/modules/text_recognition/__init__.py +2 -2
- paddlex/modules/text_recognition/dataset_checker/__init__.py +4 -5
- paddlex/modules/text_recognition/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/text_recognition/dataset_checker/dataset_src/analyse_dataset.py +13 -12
- paddlex/modules/text_recognition/dataset_checker/dataset_src/check_dataset.py +2 -5
- paddlex/modules/text_recognition/dataset_checker/dataset_src/convert_dataset.py +11 -10
- paddlex/modules/text_recognition/dataset_checker/dataset_src/split_dataset.py +1 -2
- paddlex/modules/text_recognition/evaluator.py +3 -3
- paddlex/modules/text_recognition/exportor.py +1 -1
- paddlex/modules/text_recognition/model_list.py +3 -1
- paddlex/modules/text_recognition/trainer.py +2 -3
- paddlex/modules/ts_anomaly_detection/__init__.py +2 -2
- paddlex/modules/ts_anomaly_detection/dataset_checker/__init__.py +4 -5
- paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/analyse_dataset.py +1 -9
- paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/check_dataset.py +2 -2
- paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/convert_dataset.py +2 -6
- paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/split_dataset.py +4 -4
- paddlex/modules/ts_anomaly_detection/evaluator.py +3 -3
- paddlex/modules/ts_anomaly_detection/exportor.py +2 -3
- paddlex/modules/ts_anomaly_detection/model_list.py +1 -1
- paddlex/modules/ts_anomaly_detection/trainer.py +8 -8
- paddlex/modules/ts_classification/__init__.py +2 -2
- paddlex/modules/ts_classification/dataset_checker/__init__.py +4 -5
- paddlex/modules/ts_classification/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/ts_classification/dataset_checker/dataset_src/analyse_dataset.py +8 -5
- paddlex/modules/ts_classification/dataset_checker/dataset_src/check_dataset.py +2 -2
- paddlex/modules/ts_classification/dataset_checker/dataset_src/convert_dataset.py +2 -6
- paddlex/modules/ts_classification/dataset_checker/dataset_src/split_dataset.py +5 -5
- paddlex/modules/ts_classification/evaluator.py +3 -3
- paddlex/modules/ts_classification/exportor.py +2 -3
- paddlex/modules/ts_classification/model_list.py +1 -1
- paddlex/modules/ts_classification/trainer.py +7 -7
- paddlex/modules/ts_forecast/__init__.py +2 -2
- paddlex/modules/ts_forecast/dataset_checker/__init__.py +4 -5
- paddlex/modules/ts_forecast/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/ts_forecast/dataset_checker/dataset_src/analyse_dataset.py +1 -9
- paddlex/modules/ts_forecast/dataset_checker/dataset_src/check_dataset.py +2 -2
- paddlex/modules/ts_forecast/dataset_checker/dataset_src/convert_dataset.py +2 -6
- paddlex/modules/ts_forecast/dataset_checker/dataset_src/split_dataset.py +4 -4
- paddlex/modules/ts_forecast/evaluator.py +3 -3
- paddlex/modules/ts_forecast/exportor.py +2 -3
- paddlex/modules/ts_forecast/model_list.py +1 -1
- paddlex/modules/ts_forecast/trainer.py +7 -7
- paddlex/modules/video_classification/__init__.py +2 -2
- paddlex/modules/video_classification/dataset_checker/__init__.py +2 -2
- paddlex/modules/video_classification/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/video_classification/dataset_checker/dataset_src/analyse_dataset.py +9 -9
- paddlex/modules/video_classification/dataset_checker/dataset_src/check_dataset.py +2 -3
- paddlex/modules/video_classification/dataset_checker/dataset_src/split_dataset.py +1 -1
- paddlex/modules/video_classification/evaluator.py +3 -3
- paddlex/modules/video_classification/exportor.py +1 -1
- paddlex/modules/video_classification/model_list.py +1 -1
- paddlex/modules/video_classification/trainer.py +3 -3
- paddlex/modules/video_detection/__init__.py +2 -2
- paddlex/modules/video_detection/dataset_checker/__init__.py +2 -2
- paddlex/modules/video_detection/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/video_detection/dataset_checker/dataset_src/analyse_dataset.py +8 -9
- paddlex/modules/video_detection/dataset_checker/dataset_src/check_dataset.py +3 -5
- paddlex/modules/video_detection/evaluator.py +3 -3
- paddlex/modules/video_detection/exportor.py +1 -1
- paddlex/modules/video_detection/model_list.py +1 -1
- paddlex/modules/video_detection/trainer.py +3 -3
- paddlex/ops/__init__.py +7 -4
- paddlex/ops/iou3d_nms/iou3d_cpu.cpp +8 -6
- paddlex/ops/iou3d_nms/iou3d_cpu.h +3 -2
- paddlex/ops/iou3d_nms/iou3d_nms.cpp +8 -6
- paddlex/ops/iou3d_nms/iou3d_nms.h +6 -4
- paddlex/ops/iou3d_nms/iou3d_nms_api.cpp +24 -18
- paddlex/ops/iou3d_nms/iou3d_nms_kernel.cu +9 -7
- paddlex/ops/setup.py +3 -3
- paddlex/ops/voxel/voxelize_op.cc +22 -19
- paddlex/ops/voxel/voxelize_op.cu +25 -25
- paddlex/paddlex_cli.py +104 -87
- paddlex/repo_apis/Paddle3D_api/__init__.py +1 -1
- paddlex/repo_apis/Paddle3D_api/bev_fusion/__init__.py +1 -1
- paddlex/repo_apis/Paddle3D_api/bev_fusion/config.py +1 -1
- paddlex/repo_apis/Paddle3D_api/bev_fusion/model.py +6 -6
- paddlex/repo_apis/Paddle3D_api/bev_fusion/register.py +2 -2
- paddlex/repo_apis/Paddle3D_api/bev_fusion/runner.py +1 -1
- paddlex/repo_apis/Paddle3D_api/pp3d_config.py +3 -2
- paddlex/repo_apis/PaddleClas_api/__init__.py +1 -1
- paddlex/repo_apis/PaddleClas_api/cls/__init__.py +3 -3
- paddlex/repo_apis/PaddleClas_api/cls/config.py +5 -4
- paddlex/repo_apis/PaddleClas_api/cls/model.py +4 -4
- paddlex/repo_apis/PaddleClas_api/cls/register.py +12 -3
- paddlex/repo_apis/PaddleClas_api/cls/runner.py +2 -3
- paddlex/repo_apis/PaddleClas_api/shitu_rec/__init__.py +2 -2
- paddlex/repo_apis/PaddleClas_api/shitu_rec/config.py +2 -2
- paddlex/repo_apis/PaddleClas_api/shitu_rec/model.py +1 -4
- paddlex/repo_apis/PaddleClas_api/shitu_rec/register.py +2 -2
- paddlex/repo_apis/PaddleClas_api/shitu_rec/runner.py +1 -6
- paddlex/repo_apis/PaddleDetection_api/__init__.py +2 -2
- paddlex/repo_apis/PaddleDetection_api/config_helper.py +3 -3
- paddlex/repo_apis/PaddleDetection_api/instance_seg/__init__.py +2 -2
- paddlex/repo_apis/PaddleDetection_api/instance_seg/config.py +2 -3
- paddlex/repo_apis/PaddleDetection_api/instance_seg/model.py +4 -4
- paddlex/repo_apis/PaddleDetection_api/instance_seg/register.py +2 -3
- paddlex/repo_apis/PaddleDetection_api/instance_seg/runner.py +2 -3
- paddlex/repo_apis/PaddleDetection_api/object_det/__init__.py +3 -3
- paddlex/repo_apis/PaddleDetection_api/object_det/config.py +5 -4
- paddlex/repo_apis/PaddleDetection_api/object_det/model.py +6 -7
- paddlex/repo_apis/PaddleDetection_api/object_det/official_categories.py +26 -1
- paddlex/repo_apis/PaddleDetection_api/object_det/register.py +32 -3
- paddlex/repo_apis/PaddleDetection_api/object_det/runner.py +2 -3
- paddlex/repo_apis/PaddleNLP_api/__init__.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/__init__.py +4 -3
- paddlex/repo_apis/PaddleOCR_api/config_utils.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/formula_rec/__init__.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/formula_rec/config.py +7 -6
- paddlex/repo_apis/PaddleOCR_api/formula_rec/model.py +9 -13
- paddlex/repo_apis/PaddleOCR_api/formula_rec/register.py +29 -3
- paddlex/repo_apis/PaddleOCR_api/formula_rec/runner.py +2 -3
- paddlex/repo_apis/PaddleOCR_api/table_rec/__init__.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/table_rec/config.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/table_rec/model.py +4 -4
- paddlex/repo_apis/PaddleOCR_api/table_rec/register.py +2 -3
- paddlex/repo_apis/PaddleOCR_api/table_rec/runner.py +3 -3
- paddlex/repo_apis/PaddleOCR_api/text_det/__init__.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/text_det/config.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/text_det/model.py +4 -4
- paddlex/repo_apis/PaddleOCR_api/text_det/register.py +20 -3
- paddlex/repo_apis/PaddleOCR_api/text_det/runner.py +3 -3
- paddlex/repo_apis/PaddleOCR_api/text_rec/__init__.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/text_rec/config.py +7 -6
- paddlex/repo_apis/PaddleOCR_api/text_rec/model.py +9 -13
- paddlex/repo_apis/PaddleOCR_api/text_rec/register.py +20 -3
- paddlex/repo_apis/PaddleOCR_api/text_rec/runner.py +2 -3
- paddlex/repo_apis/PaddleSeg_api/__init__.py +1 -1
- paddlex/repo_apis/PaddleSeg_api/base_seg_config.py +2 -2
- paddlex/repo_apis/PaddleSeg_api/seg/__init__.py +1 -1
- paddlex/repo_apis/PaddleSeg_api/seg/config.py +3 -6
- paddlex/repo_apis/PaddleSeg_api/seg/model.py +6 -6
- paddlex/repo_apis/PaddleSeg_api/seg/register.py +2 -3
- paddlex/repo_apis/PaddleSeg_api/seg/runner.py +2 -3
- paddlex/repo_apis/PaddleTS_api/__init__.py +4 -3
- paddlex/repo_apis/PaddleTS_api/ts_ad/__init__.py +1 -1
- paddlex/repo_apis/PaddleTS_api/ts_ad/config.py +5 -6
- paddlex/repo_apis/PaddleTS_api/ts_ad/register.py +2 -2
- paddlex/repo_apis/PaddleTS_api/ts_ad/runner.py +2 -2
- paddlex/repo_apis/PaddleTS_api/ts_base/__init__.py +1 -1
- paddlex/repo_apis/PaddleTS_api/ts_base/config.py +2 -4
- paddlex/repo_apis/PaddleTS_api/ts_base/model.py +4 -4
- paddlex/repo_apis/PaddleTS_api/ts_base/runner.py +2 -2
- paddlex/repo_apis/PaddleTS_api/ts_cls/__init__.py +1 -1
- paddlex/repo_apis/PaddleTS_api/ts_cls/config.py +4 -5
- paddlex/repo_apis/PaddleTS_api/ts_cls/register.py +2 -2
- paddlex/repo_apis/PaddleTS_api/ts_cls/runner.py +2 -2
- paddlex/repo_apis/PaddleTS_api/ts_fc/__init__.py +1 -1
- paddlex/repo_apis/PaddleTS_api/ts_fc/config.py +6 -7
- paddlex/repo_apis/PaddleTS_api/ts_fc/register.py +1 -1
- paddlex/repo_apis/PaddleVideo_api/__init__.py +1 -1
- paddlex/repo_apis/PaddleVideo_api/config_utils.py +1 -1
- paddlex/repo_apis/PaddleVideo_api/video_cls/__init__.py +3 -3
- paddlex/repo_apis/PaddleVideo_api/video_cls/config.py +5 -4
- paddlex/repo_apis/PaddleVideo_api/video_cls/model.py +4 -4
- paddlex/repo_apis/PaddleVideo_api/video_cls/register.py +2 -3
- paddlex/repo_apis/PaddleVideo_api/video_cls/runner.py +2 -3
- paddlex/repo_apis/PaddleVideo_api/video_det/__init__.py +3 -3
- paddlex/repo_apis/PaddleVideo_api/video_det/config.py +5 -4
- paddlex/repo_apis/PaddleVideo_api/video_det/model.py +5 -5
- paddlex/repo_apis/PaddleVideo_api/video_det/register.py +2 -3
- paddlex/repo_apis/PaddleVideo_api/video_det/runner.py +2 -3
- paddlex/repo_apis/__init__.py +1 -1
- paddlex/repo_apis/base/__init__.py +4 -5
- paddlex/repo_apis/base/config.py +3 -4
- paddlex/repo_apis/base/model.py +11 -19
- paddlex/repo_apis/base/register.py +1 -1
- paddlex/repo_apis/base/runner.py +11 -12
- paddlex/repo_apis/base/utils/__init__.py +1 -1
- paddlex/repo_apis/base/utils/arg.py +1 -1
- paddlex/repo_apis/base/utils/subprocess.py +1 -1
- paddlex/repo_manager/__init__.py +2 -9
- paddlex/repo_manager/core.py +12 -30
- paddlex/repo_manager/meta.py +41 -31
- paddlex/repo_manager/repo.py +171 -161
- paddlex/repo_manager/utils.py +13 -224
- paddlex/utils/__init__.py +1 -1
- paddlex/utils/cache.py +8 -10
- paddlex/utils/config.py +6 -5
- paddlex/utils/{custom_device_whitelist.py → custom_device_list.py} +53 -199
- paddlex/utils/deps.py +249 -0
- paddlex/utils/device.py +87 -36
- paddlex/utils/download.py +4 -4
- paddlex/utils/env.py +37 -7
- paddlex/utils/errors/__init__.py +1 -1
- paddlex/utils/errors/dataset_checker.py +1 -1
- paddlex/utils/errors/others.py +2 -16
- paddlex/utils/file_interface.py +4 -5
- paddlex/utils/flags.py +17 -12
- paddlex/utils/fonts/__init__.py +36 -5
- paddlex/utils/func_register.py +1 -1
- paddlex/utils/install.py +87 -0
- paddlex/utils/interactive_get_pipeline.py +3 -3
- paddlex/utils/lazy_loader.py +3 -3
- paddlex/utils/logging.py +10 -1
- paddlex/utils/misc.py +6 -6
- paddlex/utils/pipeline_arguments.py +15 -7
- paddlex/utils/result_saver.py +4 -5
- paddlex/utils/subclass_register.py +2 -4
- paddlex/version.py +2 -1
- {paddlex-3.0.0rc0.dist-info → paddlex-3.0.1.dist-info}/METADATA +237 -102
- paddlex-3.0.1.dist-info/RECORD +1095 -0
- {paddlex-3.0.0rc0.dist-info → paddlex-3.0.1.dist-info}/WHEEL +1 -1
- paddlex/inference/models/base/predictor/basic_predictor.py +0 -139
- paddlex/paddle2onnx_requirements.txt +0 -1
- paddlex/repo_manager/requirements.txt +0 -21
- paddlex/serving_requirements.txt +0 -9
- paddlex-3.0.0rc0.dist-info/RECORD +0 -1015
- {paddlex-3.0.0rc0.dist-info → paddlex-3.0.1.dist-info}/entry_points.txt +0 -0
- {paddlex-3.0.0rc0.dist-info → paddlex-3.0.1.dist-info/licenses}/LICENSE +0 -0
- {paddlex-3.0.0rc0.dist-info → paddlex-3.0.1.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
|
|
1
|
-
#
|
1
|
+
# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -14,22 +14,20 @@
|
|
14
14
|
|
15
15
|
__all__ = [
|
16
16
|
"get_sub_regions_ocr_res",
|
17
|
-
"get_layout_ordering",
|
18
|
-
"get_single_block_parsing_res",
|
19
17
|
"get_show_color",
|
20
18
|
"sorted_layout_boxes",
|
21
19
|
]
|
22
20
|
|
23
|
-
import numpy as np
|
24
|
-
from PIL import Image
|
25
|
-
import uuid
|
26
21
|
import re
|
27
|
-
from pathlib import Path
|
28
22
|
from copy import deepcopy
|
29
|
-
from typing import
|
30
|
-
|
31
|
-
|
23
|
+
from typing import Dict, List, Optional, Tuple, Union
|
24
|
+
|
25
|
+
import numpy as np
|
26
|
+
from PIL import Image
|
27
|
+
|
32
28
|
from ..components import convert_points_to_boxes
|
29
|
+
from ..ocr.result import OCRResult
|
30
|
+
from .setting import BLOCK_LABEL_MAP, REGION_SETTINGS
|
33
31
|
|
34
32
|
|
35
33
|
def get_overlap_boxes_idx(src_boxes: np.ndarray, ref_boxes: np.ndarray) -> List:
|
@@ -173,808 +171,453 @@ def sorted_layout_boxes(res, w):
|
|
173
171
|
return new_res
|
174
172
|
|
175
173
|
|
176
|
-
def
|
177
|
-
bbox1:
|
178
|
-
bbox2:
|
174
|
+
def calculate_projection_overlap_ratio(
|
175
|
+
bbox1: List[float],
|
176
|
+
bbox2: List[float],
|
177
|
+
direction: str = "horizontal",
|
178
|
+
mode="union",
|
179
179
|
) -> float:
|
180
180
|
"""
|
181
|
-
Calculate the
|
182
|
-
to the area of the smaller bounding box.
|
181
|
+
Calculate the IoU of lines between two bounding boxes.
|
183
182
|
|
184
183
|
Args:
|
185
|
-
bbox1 (
|
186
|
-
bbox2 (
|
184
|
+
bbox1 (List[float]): First bounding box [x_min, y_min, x_max, y_max].
|
185
|
+
bbox2 (List[float]): Second bounding box [x_min, y_min, x_max, y_max].
|
186
|
+
direction (str): direction of the projection, "horizontal" or "vertical".
|
187
187
|
|
188
188
|
Returns:
|
189
|
-
float:
|
190
|
-
"""
|
191
|
-
bbox1 = list(map(int, bbox1))
|
192
|
-
bbox2 = list(map(int, bbox2))
|
193
|
-
|
194
|
-
x_left = max(bbox1[0], bbox2[0])
|
195
|
-
y_top = max(bbox1[1], bbox2[1])
|
196
|
-
x_right = min(bbox1[2], bbox2[2])
|
197
|
-
y_bottom = min(bbox1[3], bbox2[3])
|
198
|
-
|
199
|
-
if x_right <= x_left or y_bottom <= y_top:
|
200
|
-
return 0.0
|
201
|
-
|
202
|
-
intersection_area = (x_right - x_left) * (y_bottom - y_top)
|
203
|
-
area_bbox1 = (bbox1[2] - bbox1[0]) * (bbox1[3] - bbox1[1])
|
204
|
-
area_bbox2 = (bbox2[2] - bbox2[0]) * (bbox2[3] - bbox2[1])
|
205
|
-
min_box_area = min(area_bbox1, area_bbox2)
|
206
|
-
|
207
|
-
if min_box_area <= 0:
|
208
|
-
return 0.0
|
209
|
-
|
210
|
-
return intersection_area / min_box_area
|
211
|
-
|
212
|
-
|
213
|
-
def _whether_y_overlap_exceeds_threshold(
|
214
|
-
bbox1: Union[list, tuple],
|
215
|
-
bbox2: Union[list, tuple],
|
216
|
-
overlap_ratio_threshold: float = 0.6,
|
217
|
-
) -> bool:
|
189
|
+
float: Line overlap ratio. Returns 0 if there is no overlap.
|
218
190
|
"""
|
219
|
-
|
191
|
+
start_index, end_index = 1, 3
|
192
|
+
if direction == "horizontal":
|
193
|
+
start_index, end_index = 0, 2
|
220
194
|
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
Returns:
|
228
|
-
bool: True if the vertical overlap divided by the minimum height of the two bounding boxes
|
229
|
-
exceeds the overlap_ratio_threshold, otherwise False.
|
230
|
-
"""
|
231
|
-
_, y1_0, _, y1_1 = bbox1
|
232
|
-
_, y2_0, _, y2_1 = bbox2
|
195
|
+
intersection_start = max(bbox1[start_index], bbox2[start_index])
|
196
|
+
intersection_end = min(bbox1[end_index], bbox2[end_index])
|
197
|
+
overlap = intersection_end - intersection_start
|
198
|
+
if overlap <= 0:
|
199
|
+
return 0
|
233
200
|
|
234
|
-
|
235
|
-
|
201
|
+
if mode == "union":
|
202
|
+
ref_width = max(bbox1[end_index], bbox2[end_index]) - min(
|
203
|
+
bbox1[start_index], bbox2[start_index]
|
204
|
+
)
|
205
|
+
elif mode == "small":
|
206
|
+
ref_width = min(
|
207
|
+
bbox1[end_index] - bbox1[start_index], bbox2[end_index] - bbox2[start_index]
|
208
|
+
)
|
209
|
+
elif mode == "large":
|
210
|
+
ref_width = max(
|
211
|
+
bbox1[end_index] - bbox1[start_index], bbox2[end_index] - bbox2[start_index]
|
212
|
+
)
|
213
|
+
else:
|
214
|
+
raise ValueError(
|
215
|
+
f"Invalid mode {mode}, must be one of ['union', 'small', 'large']."
|
216
|
+
)
|
236
217
|
|
237
|
-
return
|
218
|
+
return overlap / ref_width if ref_width > 0 else 0.0
|
238
219
|
|
239
220
|
|
240
|
-
def
|
221
|
+
def calculate_overlap_ratio(
|
222
|
+
bbox1: Union[list, tuple], bbox2: Union[list, tuple], mode="union"
|
223
|
+
) -> float:
|
241
224
|
"""
|
242
|
-
|
225
|
+
Calculate the overlap ratio between two bounding boxes.
|
243
226
|
|
244
227
|
Args:
|
245
|
-
|
246
|
-
|
247
|
-
|
228
|
+
bbox1 (list or tuple): The first bounding box, format [x_min, y_min, x_max, y_max]
|
229
|
+
bbox2 (list or tuple): The second bounding box, format [x_min, y_min, x_max, y_max]
|
230
|
+
mode (str): The mode of calculation, either 'union', 'small', or 'large'.
|
248
231
|
|
249
232
|
Returns:
|
250
|
-
|
233
|
+
float: The overlap ratio value between the two bounding boxes
|
251
234
|
"""
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
return span
|
235
|
+
x_min_inter = max(bbox1[0], bbox2[0])
|
236
|
+
y_min_inter = max(bbox1[1], bbox2[1])
|
237
|
+
x_max_inter = min(bbox1[2], bbox2[2])
|
238
|
+
y_max_inter = min(bbox1[3], bbox2[3])
|
257
239
|
|
240
|
+
inter_width = max(0, x_max_inter - x_min_inter)
|
241
|
+
inter_height = max(0, y_max_inter - y_min_inter)
|
258
242
|
|
259
|
-
|
260
|
-
line: List[List[Union[List[int], str]]],
|
261
|
-
layout_min: int,
|
262
|
-
layout_max: int,
|
263
|
-
is_reference: bool = False,
|
264
|
-
) -> None:
|
265
|
-
"""
|
266
|
-
Format a line of text spans based on layout constraints.
|
243
|
+
inter_area = inter_width * inter_height
|
267
244
|
|
268
|
-
|
269
|
-
|
270
|
-
layout_min (int): The minimum x-coordinate of the layout bounding box.
|
271
|
-
layout_max (int): The maximum x-coordinate of the layout bounding box.
|
272
|
-
is_reference (bool): A flag indicating whether the line is a reference line, which affects formatting rules.
|
245
|
+
bbox1_area = caculate_bbox_area(bbox1)
|
246
|
+
bbox2_area = caculate_bbox_area(bbox2)
|
273
247
|
|
274
|
-
|
275
|
-
|
276
|
-
""
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
if not is_reference:
|
281
|
-
if first_span[0][0] - layout_min > 10:
|
282
|
-
first_span = _adjust_span_text(first_span, prepend=True)
|
283
|
-
if layout_max - end_span[0][2] > 10:
|
284
|
-
end_span = _adjust_span_text(end_span, append=True)
|
248
|
+
if mode == "union":
|
249
|
+
ref_area = bbox1_area + bbox2_area - inter_area
|
250
|
+
elif mode == "small":
|
251
|
+
ref_area = min(bbox1_area, bbox2_area)
|
252
|
+
elif mode == "large":
|
253
|
+
ref_area = max(bbox1_area, bbox2_area)
|
285
254
|
else:
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
end_span = _adjust_span_text(end_span, append=True)
|
255
|
+
raise ValueError(
|
256
|
+
f"Invalid mode {mode}, must be one of ['union', 'small', 'large']."
|
257
|
+
)
|
290
258
|
|
291
|
-
|
292
|
-
|
259
|
+
if ref_area == 0:
|
260
|
+
return 0.0
|
293
261
|
|
294
|
-
return
|
262
|
+
return inter_area / ref_area
|
295
263
|
|
296
264
|
|
297
|
-
def
|
298
|
-
""
|
299
|
-
|
300
|
-
|
265
|
+
def group_boxes_into_lines(ocr_rec_res, line_height_iou_threshold):
|
266
|
+
rec_boxes = ocr_rec_res["boxes"]
|
267
|
+
rec_texts = ocr_rec_res["rec_texts"]
|
268
|
+
rec_labels = ocr_rec_res["rec_labels"]
|
301
269
|
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
A new list of boxes, including split boxes, with the same `rec_text` and `label` attributes.
|
307
|
-
"""
|
270
|
+
text_boxes = [
|
271
|
+
rec_boxes[i] for i in range(len(rec_boxes)) if rec_labels[i] == "text"
|
272
|
+
]
|
273
|
+
text_orientation = calculate_text_orientation(text_boxes)
|
308
274
|
|
309
|
-
|
310
|
-
"""Check if box_a completely contains box_b in the x-direction."""
|
311
|
-
return box_a[0][0] <= box_b[0][0] and box_a[0][2] >= box_b[0][2]
|
275
|
+
match_direction = "vertical" if text_orientation == "horizontal" else "horizontal"
|
312
276
|
|
313
|
-
|
277
|
+
line_start_index = 1 if text_orientation == "horizontal" else 0
|
278
|
+
line_end_index = 3 if text_orientation == "horizontal" else 2
|
314
279
|
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
is_split = True
|
324
|
-
# Split box_a based on the x-coordinates of box_b
|
325
|
-
if box_a[0][0] < box_b[0][0]:
|
326
|
-
w = box_b[0][0] - offset - box_a[0][0]
|
327
|
-
if w > 1:
|
328
|
-
new_boxes.append(
|
329
|
-
[
|
330
|
-
np.array(
|
331
|
-
[
|
332
|
-
box_a[0][0],
|
333
|
-
box_a[0][1],
|
334
|
-
box_b[0][0] - offset,
|
335
|
-
box_a[0][3],
|
336
|
-
]
|
337
|
-
),
|
338
|
-
box_a[1],
|
339
|
-
box_a[2],
|
340
|
-
]
|
341
|
-
)
|
342
|
-
if box_a[0][2] > box_b[0][2]:
|
343
|
-
w = box_a[0][2] - box_b[0][2] + offset
|
344
|
-
if w > 1:
|
345
|
-
box_a = [
|
346
|
-
np.array(
|
347
|
-
[
|
348
|
-
box_b[0][2] + offset,
|
349
|
-
box_a[0][1],
|
350
|
-
box_a[0][2],
|
351
|
-
box_a[0][3],
|
352
|
-
]
|
353
|
-
),
|
354
|
-
box_a[1],
|
355
|
-
box_a[2],
|
356
|
-
]
|
357
|
-
if j == len(boxes) - 1 and is_split:
|
358
|
-
new_boxes.append(box_a)
|
359
|
-
if not is_split:
|
360
|
-
new_boxes.append(box_a)
|
361
|
-
|
362
|
-
return new_boxes
|
280
|
+
spans = list(zip(rec_boxes, rec_texts, rec_labels))
|
281
|
+
sort_index = 1
|
282
|
+
reverse = False
|
283
|
+
if text_orientation == "vertical":
|
284
|
+
sort_index = 0
|
285
|
+
reverse = True
|
286
|
+
spans.sort(key=lambda span: span[0][sort_index], reverse=reverse)
|
287
|
+
spans = [list(span) for span in spans]
|
363
288
|
|
289
|
+
lines = []
|
290
|
+
line = [spans[0]]
|
291
|
+
line_region_box = spans[0][0].copy()
|
292
|
+
line_heights = []
|
293
|
+
# merge line
|
294
|
+
for span in spans[1:]:
|
295
|
+
rec_bbox = span[0]
|
296
|
+
if (
|
297
|
+
calculate_projection_overlap_ratio(
|
298
|
+
line_region_box, rec_bbox, match_direction, mode="small"
|
299
|
+
)
|
300
|
+
>= line_height_iou_threshold
|
301
|
+
):
|
302
|
+
line.append(span)
|
303
|
+
line_region_box[line_start_index] = min(
|
304
|
+
line_region_box[line_start_index], rec_bbox[line_start_index]
|
305
|
+
)
|
306
|
+
line_region_box[line_end_index] = max(
|
307
|
+
line_region_box[line_end_index], rec_bbox[line_end_index]
|
308
|
+
)
|
309
|
+
else:
|
310
|
+
line_heights.append(
|
311
|
+
line_region_box[line_end_index] - line_region_box[line_start_index]
|
312
|
+
)
|
313
|
+
lines.append(line)
|
314
|
+
line = [span]
|
315
|
+
line_region_box = rec_bbox.copy()
|
364
316
|
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
) -> None:
|
370
|
-
"""
|
371
|
-
Sort a line of text spans based on their vertical position within the layout bounding box.
|
317
|
+
lines.append(line)
|
318
|
+
line_heights.append(
|
319
|
+
line_region_box[line_end_index] - line_region_box[line_start_index]
|
320
|
+
)
|
372
321
|
|
373
|
-
|
374
|
-
|
375
|
-
general_ocr_pipeline (Any): The general OCR pipeline used for text recognition.
|
376
|
-
line (list): A list of spans, where each span is a list containing a bounding box and text.
|
322
|
+
min_height = min(line_heights) if line_heights else 0
|
323
|
+
max_height = max(line_heights) if line_heights else 0
|
377
324
|
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
splited_boxes.sort(key=lambda span: span[0][0])
|
385
|
-
text_rec_model = general_ocr_pipeline.text_rec_model
|
386
|
-
for span in splited_boxes:
|
387
|
-
if span[2] == "text":
|
388
|
-
crop_img = input_img[
|
389
|
-
int(span[0][1]) : int(span[0][3]),
|
390
|
-
int(span[0][0]) : int(span[0][2]),
|
391
|
-
]
|
392
|
-
span[1] = next(text_rec_model([crop_img]))["rec_text"]
|
393
|
-
splited_lines.append(span)
|
394
|
-
else:
|
395
|
-
splited_lines = line
|
325
|
+
if max_height > min_height * 2 and text_orientation == "vertical":
|
326
|
+
line_heights = np.array(line_heights)
|
327
|
+
min_height_num = np.sum(line_heights < min_height * 1.1)
|
328
|
+
if min_height_num < len(lines) * 0.4:
|
329
|
+
condition = line_heights > min_height * 1.1
|
330
|
+
lines = [value for value, keep in zip(lines, condition) if keep]
|
396
331
|
|
397
|
-
return
|
332
|
+
return lines, text_orientation, np.mean(line_heights)
|
398
333
|
|
399
334
|
|
400
|
-
def
|
401
|
-
input_img: np.ndarray,
|
402
|
-
general_ocr_pipeline: Any,
|
403
|
-
label: Any,
|
404
|
-
block_bbox: Tuple[int, int, int, int],
|
405
|
-
ocr_res: Dict[str, List[Any]],
|
406
|
-
line_height_iou_threshold: float = 0.7,
|
407
|
-
) -> Dict[str, List[Any]]:
|
335
|
+
def calculate_minimum_enclosing_bbox(bboxes):
|
408
336
|
"""
|
409
|
-
|
337
|
+
Calculate the minimum enclosing bounding box for a list of bounding boxes.
|
410
338
|
|
411
339
|
Args:
|
412
|
-
|
413
|
-
general_ocr_pipeline (Any): The general OCR pipeline used for text recognition.
|
414
|
-
label (Any): The label associated with the OCR results. It's not used in the function but might be
|
415
|
-
relevant for other parts of the calling context.
|
416
|
-
block_bbox (Tuple[int, int, int, int]): A tuple representing the layout bounding box, defined as
|
417
|
-
(left, top, right, bottom).
|
418
|
-
ocr_res (Dict[str, List[Any]]): A dictionary containing OCR results with the following keys:
|
419
|
-
- "boxes": A list of bounding boxes, each defined as [left, top, right, bottom].
|
420
|
-
- "rec_texts": A corresponding list of recognized text strings for each box.
|
421
|
-
line_height_iou_threshold (float): The threshold for determining whether two boxes belong to
|
422
|
-
the same line based on their vertical overlap. Defaults to 0.7.
|
340
|
+
bboxes (list): A list of bounding boxes represented as lists of four integers [x1, y1, x2, y2].
|
423
341
|
|
424
342
|
Returns:
|
425
|
-
|
426
|
-
and grouped into lines and blocks.
|
343
|
+
list: The minimum enclosing bounding box represented as a list of four integers [x1, y1, x2, y2].
|
427
344
|
"""
|
428
|
-
|
429
|
-
|
430
|
-
), "OCR results must contain 'boxes' and 'rec_texts'"
|
345
|
+
if not bboxes:
|
346
|
+
raise ValueError("The list of bounding boxes is empty.")
|
431
347
|
|
432
|
-
boxes
|
433
|
-
|
434
|
-
rec_labels = ocr_res["rec_labels"]
|
348
|
+
# Convert the list of bounding boxes to a NumPy array
|
349
|
+
bboxes_array = np.array(bboxes)
|
435
350
|
|
436
|
-
|
437
|
-
|
438
|
-
|
351
|
+
# Compute the minimum and maximum values along the respective axes
|
352
|
+
min_x = np.min(bboxes_array[:, 0])
|
353
|
+
min_y = np.min(bboxes_array[:, 1])
|
354
|
+
max_x = np.max(bboxes_array[:, 2])
|
355
|
+
max_y = np.max(bboxes_array[:, 3])
|
439
356
|
|
440
|
-
|
357
|
+
# Return the minimum enclosing bounding box
|
358
|
+
return [min_x, min_y, max_x, max_y]
|
441
359
|
|
442
|
-
spans.sort(key=lambda span: span[0][1])
|
443
|
-
spans = [list(span) for span in spans]
|
444
|
-
|
445
|
-
lines = []
|
446
|
-
current_line = [spans[0]]
|
447
|
-
current_y0, current_y1 = spans[0][0][1], spans[0][0][3]
|
448
360
|
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
(0, current_y0, 0, current_y1),
|
453
|
-
(0, y0, 0, y1),
|
454
|
-
line_height_iou_threshold,
|
455
|
-
):
|
456
|
-
current_line.append(span)
|
457
|
-
current_y0 = min(current_y0, y0)
|
458
|
-
current_y1 = max(current_y1, y1)
|
459
|
-
else:
|
460
|
-
lines.append(current_line)
|
461
|
-
current_line = [span]
|
462
|
-
current_y0, current_y1 = y0, y1
|
463
|
-
|
464
|
-
if current_line:
|
465
|
-
lines.append(current_line)
|
466
|
-
|
467
|
-
new_lines = []
|
468
|
-
for line in lines:
|
469
|
-
line.sort(key=lambda span: span[0][0])
|
470
|
-
|
471
|
-
ocr_labels = [span[2] for span in line]
|
472
|
-
if "formula" in ocr_labels:
|
473
|
-
line = _sort_line_by_x_projection(input_img, general_ocr_pipeline, line)
|
474
|
-
if label == "reference":
|
475
|
-
line = _format_line(line, inline_x_min, inline_x_max, is_reference=True)
|
476
|
-
elif label != "content":
|
477
|
-
line = _format_line(line, x_min, x_max)
|
478
|
-
new_lines.append(line)
|
479
|
-
|
480
|
-
ocr_res["boxes"] = [span[0] for line in new_lines for span in line]
|
481
|
-
if label == "content":
|
482
|
-
ocr_res["rec_texts"] = [
|
483
|
-
"".join(f"{span[1]} " for span in line).rstrip() for line in new_lines
|
484
|
-
]
|
485
|
-
else:
|
486
|
-
ocr_res["rec_texts"] = [span[1] + " " for line in new_lines for span in line]
|
487
|
-
return ocr_res, len(new_lines)
|
488
|
-
|
489
|
-
|
490
|
-
def _process_text(input_text: str) -> str:
|
361
|
+
def calculate_text_orientation(
|
362
|
+
bboxes: List[List[int]], orientation_ratio: float = 1.5
|
363
|
+
) -> bool:
|
491
364
|
"""
|
492
|
-
|
493
|
-
|
494
|
-
The function removes multiple consecutive spaces between Chinese characters and ensures that
|
495
|
-
only a single space is retained between Chinese and non-Chinese characters.
|
365
|
+
Calculate the orientation of the text based on the bounding boxes.
|
496
366
|
|
497
367
|
Args:
|
498
|
-
|
368
|
+
bboxes (list): A list of bounding boxes.
|
369
|
+
orientation_ratio (float): Ratio for determining orientation. Default is 1.5.
|
499
370
|
|
500
371
|
Returns:
|
501
|
-
str:
|
372
|
+
str: "horizontal" or "vertical".
|
502
373
|
"""
|
503
374
|
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
Args:
|
510
|
-
text (str): The text to handle spaces for.
|
511
|
-
|
512
|
-
Returns:
|
513
|
-
str: The text with properly formatted spaces.
|
514
|
-
"""
|
515
|
-
spaces = re.finditer(r"\s+", text)
|
516
|
-
processed_text = list(text)
|
517
|
-
|
518
|
-
for space in reversed(list(spaces)):
|
519
|
-
start, end = space.span()
|
520
|
-
prev_char = processed_text[start - 1] if start > 0 else ""
|
521
|
-
next_char = processed_text[end] if end < len(processed_text) else ""
|
522
|
-
|
523
|
-
is_prev_chinese = (
|
524
|
-
re.match(r"[\u4e00-\u9fff]", prev_char) if prev_char else False
|
525
|
-
)
|
526
|
-
is_next_chinese = (
|
527
|
-
re.match(r"[\u4e00-\u9fff]", next_char) if next_char else False
|
375
|
+
horizontal_box_num = 0
|
376
|
+
for bbox in bboxes:
|
377
|
+
if len(bbox) != 4:
|
378
|
+
raise ValueError(
|
379
|
+
"Invalid bounding box format. Expected a list of length 4."
|
528
380
|
)
|
381
|
+
x1, y1, x2, y2 = bbox
|
382
|
+
width = x2 - x1
|
383
|
+
height = y2 - y1
|
384
|
+
horizontal_box_num += 1 if width * orientation_ratio >= height else 0
|
529
385
|
|
530
|
-
|
531
|
-
processed_text[start:end] = []
|
532
|
-
else:
|
533
|
-
processed_text[start:end] = [" "]
|
386
|
+
return "horizontal" if horizontal_box_num >= len(bboxes) * 0.5 else "vertical"
|
534
387
|
|
535
|
-
return "".join(processed_text)
|
536
388
|
|
537
|
-
|
389
|
+
def is_english_letter(char):
|
390
|
+
return bool(re.match(r"^[A-Za-z]$", char))
|
538
391
|
|
539
|
-
final_text = re.sub(r"\s+", " ", text_without_spaces).strip()
|
540
|
-
return final_text
|
541
392
|
|
393
|
+
def is_numeric(char):
|
394
|
+
return bool(re.match(r"^[\d.]+$", char))
|
542
395
|
|
543
|
-
|
544
|
-
|
545
|
-
overall_ocr_res: OCRResult,
|
546
|
-
layout_det_res: DetResult,
|
547
|
-
table_res_list: list,
|
548
|
-
seal_res_list: list,
|
549
|
-
) -> OCRResult:
|
396
|
+
|
397
|
+
def is_non_breaking_punctuation(char):
|
550
398
|
"""
|
551
|
-
|
399
|
+
判断一个字符是否是不需要换行的标点符号,包括全角和半角的符号。
|
552
400
|
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
|
557
|
-
|
401
|
+
:param char: str, 单个字符
|
402
|
+
:return: bool, 如果字符是不需要换行的标点符号,返回True,否则返回False
|
403
|
+
"""
|
404
|
+
non_breaking_punctuations = {
|
405
|
+
",", # 半角逗号
|
406
|
+
",", # 全角逗号
|
407
|
+
"、", # 顿号
|
408
|
+
";", # 半角分号
|
409
|
+
";", # 全角分号
|
410
|
+
":", # 半角冒号
|
411
|
+
":", # 全角冒号
|
412
|
+
"-", # 连字符
|
413
|
+
}
|
558
414
|
|
559
|
-
|
560
|
-
- "boxes": A list of dictionaries with keys "coordinate" for box coordinates and "block_label" for the type of content.
|
415
|
+
return char in non_breaking_punctuations
|
561
416
|
|
562
|
-
table_res_list (list): A list of table detection results, where each item is a dictionary containing:
|
563
|
-
- "block_bbox": The bounding box of the table layout.
|
564
|
-
- "pred_html": The predicted HTML representation of the table.
|
565
417
|
|
566
|
-
|
418
|
+
def format_line(
|
419
|
+
line: List[List[Union[List[int], str]]],
|
420
|
+
text_direction: int,
|
421
|
+
block_width: int,
|
422
|
+
block_start_coordinate: int,
|
423
|
+
block_stop_coordinate: int,
|
424
|
+
line_gap_limit: int = 10,
|
425
|
+
block_label: str = "text",
|
426
|
+
) -> None:
|
427
|
+
"""
|
428
|
+
Format a line of text spans based on layout constraints.
|
567
429
|
|
430
|
+
Args:
|
431
|
+
line (list): A list of spans, where each span is a list containing a bounding box and text.
|
432
|
+
block_left_coordinate (int): The text line directional minimum coordinate of the layout bounding box.
|
433
|
+
block_stop_coordinate (int): The text line directional maximum x-coordinate of the layout bounding box.
|
434
|
+
first_line_span_limit (int): The limit for the number of pixels before the first span that should be considered part of the first line. Default is 10.
|
435
|
+
line_gap_limit (int): The limit for the number of pixels after the last span that should be considered part of the last line. Default is 10.
|
436
|
+
block_label (str): The label associated with the entire block. Default is 'text'.
|
568
437
|
Returns:
|
569
|
-
|
570
|
-
- "block_label": The label of the content (e.g., 'table', 'chart', 'image').
|
571
|
-
- The label as a key with either table HTML or image data and text.
|
572
|
-
- "block_bbox": The coordinates of the layout box.
|
438
|
+
None: The function modifies the line in place.
|
573
439
|
"""
|
440
|
+
first_span_box = line[0][0]
|
441
|
+
last_span_box = line[-1][0]
|
574
442
|
|
575
|
-
|
576
|
-
|
577
|
-
|
578
|
-
|
579
|
-
|
580
|
-
|
581
|
-
|
582
|
-
layout_det_res_list, _ = _remove_overlap_blocks(
|
583
|
-
deepcopy(layout_det_res["boxes"]),
|
584
|
-
threshold=0.5,
|
585
|
-
smaller=True,
|
586
|
-
)
|
587
|
-
|
588
|
-
for box_idx, box_info in enumerate(layout_det_res_list):
|
589
|
-
block_bbox = box_info["coordinate"]
|
590
|
-
label = box_info["label"]
|
591
|
-
rec_res = {"boxes": [], "rec_texts": [], "rec_labels": [], "flag": False}
|
592
|
-
seg_start_coordinate = float("inf")
|
593
|
-
seg_end_coordinate = float("-inf")
|
594
|
-
num_of_lines = 1
|
595
|
-
|
596
|
-
if label == "doc_title":
|
597
|
-
with_doc_title = True
|
598
|
-
elif label == "paragraph_title":
|
599
|
-
paragraph_title_indexs.append(box_idx)
|
600
|
-
|
601
|
-
block_area = (block_bbox[2] - block_bbox[0]) * (block_bbox[3] - block_bbox[1])
|
602
|
-
max_block_area = max(max_block_area, block_area)
|
603
|
-
|
604
|
-
if label == "table":
|
605
|
-
for table_res in table_res_list:
|
606
|
-
if len(table_res["cell_box_list"]) == 0:
|
607
|
-
continue
|
608
|
-
if (
|
609
|
-
_calculate_overlap_area_div_minbox_area_ratio(
|
610
|
-
block_bbox, table_res["cell_box_list"][0]
|
611
|
-
)
|
612
|
-
> 0.5
|
613
|
-
):
|
614
|
-
single_block_layout_parsing_res.append(
|
615
|
-
{
|
616
|
-
"block_label": label,
|
617
|
-
"block_content": table_res["pred_html"],
|
618
|
-
"block_bbox": block_bbox,
|
619
|
-
},
|
620
|
-
)
|
621
|
-
break
|
622
|
-
elif label == "seal":
|
623
|
-
if len(seal_res_list) > 0:
|
624
|
-
single_block_layout_parsing_res.append(
|
625
|
-
{
|
626
|
-
"block_label": label,
|
627
|
-
"block_content": _process_text(
|
628
|
-
", ".join(seal_res_list[seal_index]["rec_texts"])
|
629
|
-
),
|
630
|
-
"block_bbox": block_bbox,
|
631
|
-
},
|
632
|
-
)
|
633
|
-
seal_index += 1
|
634
|
-
else:
|
635
|
-
overall_text_boxes = overall_ocr_res["rec_boxes"]
|
636
|
-
for box_no in range(len(overall_text_boxes)):
|
637
|
-
if (
|
638
|
-
_calculate_overlap_area_div_minbox_area_ratio(
|
639
|
-
block_bbox, overall_text_boxes[box_no]
|
640
|
-
)
|
641
|
-
> 0.5
|
642
|
-
):
|
643
|
-
rec_res["boxes"].append(overall_text_boxes[box_no])
|
644
|
-
rec_res["rec_texts"].append(
|
645
|
-
overall_ocr_res["rec_texts"][box_no],
|
646
|
-
)
|
647
|
-
rec_res["rec_labels"].append(
|
648
|
-
overall_ocr_res["rec_labels"][box_no],
|
649
|
-
)
|
650
|
-
rec_res["flag"] = True
|
651
|
-
|
652
|
-
if rec_res["flag"]:
|
653
|
-
rec_res, num_of_lines = _sort_ocr_res_by_y_projection(
|
654
|
-
input_img, general_ocr_pipeline, label, block_bbox, rec_res, 0.7
|
655
|
-
)
|
656
|
-
seg_start_coordinate = rec_res["boxes"][0][0]
|
657
|
-
seg_end_coordinate = rec_res["boxes"][-1][2]
|
658
|
-
if label == "formula":
|
659
|
-
rec_res["rec_texts"] = [
|
660
|
-
rec_res_text.replace("$", "")
|
661
|
-
for rec_res_text in rec_res["rec_texts"]
|
662
|
-
]
|
663
|
-
|
664
|
-
if label in ["chart", "image"]:
|
665
|
-
x_min, y_min, x_max, y_max = list(map(int, block_bbox))
|
666
|
-
img_path = f"imgs/img_in_table_box_{x_min}_{y_min}_{x_max}_{y_max}.jpg"
|
667
|
-
img = Image.fromarray(input_img[y_min:y_max, x_min:x_max, ::-1])
|
668
|
-
single_block_layout_parsing_res.append(
|
669
|
-
{
|
670
|
-
"block_label": label,
|
671
|
-
"block_content": _process_text("".join(rec_res["rec_texts"])),
|
672
|
-
"block_image": {img_path: img},
|
673
|
-
"block_bbox": block_bbox,
|
674
|
-
},
|
675
|
-
)
|
676
|
-
else:
|
677
|
-
if label in ["doc_title"]:
|
678
|
-
content = " ".join(rec_res["rec_texts"])
|
679
|
-
elif label in ["content"]:
|
680
|
-
content = "\n".join(rec_res["rec_texts"])
|
443
|
+
for span in line:
|
444
|
+
if span[2] == "formula" and block_label != "formula":
|
445
|
+
formula_rec = span[1]
|
446
|
+
if not formula_rec.startswith("$") and not formula_rec.endswith("$"):
|
447
|
+
if len(line) > 1:
|
448
|
+
span[1] = f"${span[1]}$"
|
681
449
|
else:
|
682
|
-
|
683
|
-
|
684
|
-
|
685
|
-
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
|
692
|
-
|
693
|
-
|
694
|
-
|
695
|
-
|
450
|
+
span[1] = f"\n${span[1]}$"
|
451
|
+
|
452
|
+
line_text = ""
|
453
|
+
for span in line:
|
454
|
+
_, text, label = span
|
455
|
+
line_text += text
|
456
|
+
if len(text) > 0 and is_english_letter(line_text[-1]) or label == "formula":
|
457
|
+
line_text += " "
|
458
|
+
|
459
|
+
if text_direction == "horizontal":
|
460
|
+
text_start_index = 0
|
461
|
+
text_stop_index = 2
|
462
|
+
else:
|
463
|
+
text_start_index = 1
|
464
|
+
text_stop_index = 3
|
696
465
|
|
466
|
+
need_new_line = False
|
697
467
|
if (
|
698
|
-
|
699
|
-
and
|
700
|
-
and
|
701
|
-
"block_area", 0
|
702
|
-
)
|
703
|
-
> max_block_area * 0.3
|
468
|
+
len(line_text) > 0
|
469
|
+
and not is_english_letter(line_text[-1])
|
470
|
+
and not is_non_breaking_punctuation(line_text[-1])
|
704
471
|
):
|
705
|
-
|
706
|
-
"
|
707
|
-
|
708
|
-
|
709
|
-
|
710
|
-
|
711
|
-
|
712
|
-
|
713
|
-
single_block_layout_parsing_res.append(
|
714
|
-
{
|
715
|
-
"block_label": "text",
|
716
|
-
"block_content": ocr_rec_text,
|
717
|
-
"block_bbox": ocr_rec_box,
|
718
|
-
"seg_start_coordinate": ocr_rec_box[0],
|
719
|
-
"seg_end_coordinate": ocr_rec_box[2],
|
720
|
-
},
|
472
|
+
if (
|
473
|
+
text_direction == "horizontal"
|
474
|
+
and block_stop_coordinate - last_span_box[text_stop_index] > line_gap_limit
|
475
|
+
) or (
|
476
|
+
text_direction == "vertical"
|
477
|
+
and (
|
478
|
+
block_stop_coordinate - last_span_box[text_stop_index] > line_gap_limit
|
479
|
+
or first_span_box[1] - block_start_coordinate > line_gap_limit
|
721
480
|
)
|
481
|
+
):
|
482
|
+
need_new_line = True
|
483
|
+
|
484
|
+
if line_text.endswith("-"):
|
485
|
+
line_text = line_text[:-1]
|
486
|
+
elif (
|
487
|
+
len(line_text) > 0 and is_english_letter(line_text[-1])
|
488
|
+
) or line_text.endswith("$"):
|
489
|
+
line_text += " "
|
490
|
+
elif (
|
491
|
+
len(line_text) > 0
|
492
|
+
and not is_english_letter(line_text[-1])
|
493
|
+
and not is_non_breaking_punctuation(line_text[-1])
|
494
|
+
and not is_numeric(line_text[-1])
|
495
|
+
) or text_direction == "vertical":
|
496
|
+
if block_stop_coordinate - last_span_box[text_stop_index] > block_width * 0.4:
|
497
|
+
line_text += "\n"
|
498
|
+
if (
|
499
|
+
first_span_box[text_start_index] - block_start_coordinate
|
500
|
+
> block_width * 0.4
|
501
|
+
):
|
502
|
+
line_text = "\n" + line_text
|
722
503
|
|
723
|
-
|
724
|
-
single_block_layout_parsing_res,
|
725
|
-
no_mask_labels=[
|
726
|
-
"text",
|
727
|
-
"formula",
|
728
|
-
"algorithm",
|
729
|
-
"reference",
|
730
|
-
"content",
|
731
|
-
"abstract",
|
732
|
-
],
|
733
|
-
)
|
734
|
-
|
735
|
-
return single_block_layout_parsing_res
|
504
|
+
return line_text, need_new_line
|
736
505
|
|
737
506
|
|
738
|
-
def
|
507
|
+
def split_boxes_by_projection(spans: List[List[int]], direction, offset=1e-5):
|
739
508
|
"""
|
740
|
-
|
509
|
+
Check if there is any complete containment in the x-direction
|
510
|
+
between the bounding boxes and split the containing box accordingly.
|
741
511
|
|
742
512
|
Args:
|
743
|
-
|
744
|
-
|
745
|
-
|
513
|
+
spans (list of lists): Each element is a list containing an ndarray of length 4, a text string, and a label.
|
514
|
+
direction: 'horizontal' or 'vertical', indicating whether the spans are arranged horizontally or vertically.
|
515
|
+
offset (float): A small offset value to ensure that the split boxes are not too close to the original boxes.
|
746
516
|
Returns:
|
747
|
-
A
|
748
|
-
"""
|
749
|
-
assert axis in [0, 1]
|
750
|
-
max_length = np.max(boxes[:, axis::2])
|
751
|
-
projection = np.zeros(max_length, dtype=int)
|
752
|
-
|
753
|
-
# Increment projection histogram over the interval defined by each bounding box
|
754
|
-
for start, end in boxes[:, axis::2]:
|
755
|
-
projection[start:end] += 1
|
756
|
-
|
757
|
-
return projection
|
758
|
-
|
759
|
-
|
760
|
-
def _split_projection_profile(arr_values: np.ndarray, min_value: float, min_gap: float):
|
517
|
+
A new list of boxes, including split boxes, with the same `rec_text` and `label` attributes.
|
761
518
|
"""
|
762
|
-
Split the projection profile into segments based on specified thresholds.
|
763
|
-
|
764
|
-
Args:
|
765
|
-
arr_values: 1D array representing the projection profile.
|
766
|
-
min_value: Minimum value threshold to consider a profile segment significant.
|
767
|
-
min_gap: Minimum gap width to consider a separation between segments.
|
768
519
|
|
769
|
-
|
770
|
-
|
771
|
-
|
772
|
-
# Identify indices where the projection exceeds the minimum value
|
773
|
-
significant_indices = np.where(arr_values > min_value)[0]
|
774
|
-
if not len(significant_indices):
|
775
|
-
return
|
776
|
-
|
777
|
-
# Calculate gaps between significant indices
|
778
|
-
index_diffs = significant_indices[1:] - significant_indices[:-1]
|
779
|
-
gap_indices = np.where(index_diffs > min_gap)[0]
|
780
|
-
|
781
|
-
# Determine start and end indices of segments
|
782
|
-
segment_starts = np.insert(
|
783
|
-
significant_indices[gap_indices + 1],
|
784
|
-
0,
|
785
|
-
significant_indices[0],
|
786
|
-
)
|
787
|
-
segment_ends = np.append(
|
788
|
-
significant_indices[gap_indices],
|
789
|
-
significant_indices[-1] + 1,
|
790
|
-
)
|
520
|
+
def is_projection_contained(box_a, box_b, start_idx, end_idx):
|
521
|
+
"""Check if box_a completely contains box_b in the x-direction."""
|
522
|
+
return box_a[start_idx] <= box_b[start_idx] and box_a[end_idx] >= box_b[end_idx]
|
791
523
|
|
792
|
-
|
524
|
+
new_boxes = []
|
525
|
+
if direction == "horizontal":
|
526
|
+
projection_start_index, projection_end_index = 0, 2
|
527
|
+
else:
|
528
|
+
projection_start_index, projection_end_index = 1, 3
|
793
529
|
|
530
|
+
for i in range(len(spans)):
|
531
|
+
span = spans[i]
|
532
|
+
is_split = False
|
533
|
+
for j in range(i, len(spans)):
|
534
|
+
box_b = spans[j][0]
|
535
|
+
box_a, text, label = span
|
536
|
+
if is_projection_contained(
|
537
|
+
box_a, box_b, projection_start_index, projection_end_index
|
538
|
+
):
|
539
|
+
is_split = True
|
540
|
+
# Split box_a based on the x-coordinates of box_b
|
541
|
+
if box_a[projection_start_index] < box_b[projection_start_index]:
|
542
|
+
w = (
|
543
|
+
box_b[projection_start_index]
|
544
|
+
- offset
|
545
|
+
- box_a[projection_start_index]
|
546
|
+
)
|
547
|
+
if w > 1:
|
548
|
+
new_bbox = box_a.copy()
|
549
|
+
new_bbox[projection_end_index] = (
|
550
|
+
box_b[projection_start_index] - offset
|
551
|
+
)
|
552
|
+
new_boxes.append(
|
553
|
+
[
|
554
|
+
np.array(new_bbox),
|
555
|
+
text,
|
556
|
+
label,
|
557
|
+
]
|
558
|
+
)
|
559
|
+
if box_a[projection_end_index] > box_b[projection_end_index]:
|
560
|
+
w = (
|
561
|
+
box_a[projection_end_index]
|
562
|
+
- box_b[projection_end_index]
|
563
|
+
+ offset
|
564
|
+
)
|
565
|
+
if w > 1:
|
566
|
+
box_a[projection_start_index] = (
|
567
|
+
box_b[projection_end_index] + offset
|
568
|
+
)
|
569
|
+
span = [
|
570
|
+
np.array(box_a),
|
571
|
+
text,
|
572
|
+
label,
|
573
|
+
]
|
574
|
+
if j == len(spans) - 1 and is_split:
|
575
|
+
new_boxes.append(span)
|
576
|
+
if not is_split:
|
577
|
+
new_boxes.append(span)
|
794
578
|
|
795
|
-
|
796
|
-
boxes: np.ndarray, indices: List[int], res: List[int], min_gap: int = 1
|
797
|
-
):
|
798
|
-
"""
|
799
|
-
Recursively project and segment bounding boxes, starting with Y-axis and followed by X-axis.
|
579
|
+
return new_boxes
|
800
580
|
|
801
|
-
Args:
|
802
|
-
boxes: A (N, 4) array representing bounding boxes.
|
803
|
-
indices: List of indices indicating the original position of boxes.
|
804
|
-
res: List to store indices of the final segmented bounding boxes.
|
805
|
-
min_gap (int): Minimum gap width to consider a separation between segments on the X-axis. Defaults to 1.
|
806
581
|
|
807
|
-
|
808
|
-
None: This function modifies the `res` list in place.
|
582
|
+
def remove_extra_space(input_text: str) -> str:
|
809
583
|
"""
|
810
|
-
|
811
|
-
indices
|
812
|
-
), "The length of boxes and indices must be the same."
|
813
|
-
|
814
|
-
# Sort by y_min for Y-axis projection
|
815
|
-
y_sorted_indices = boxes[:, 1].argsort()
|
816
|
-
y_sorted_boxes = boxes[y_sorted_indices]
|
817
|
-
y_sorted_indices = np.array(indices)[y_sorted_indices]
|
818
|
-
|
819
|
-
# Perform Y-axis projection
|
820
|
-
y_projection = _projection_by_bboxes(boxes=y_sorted_boxes, axis=1)
|
821
|
-
y_intervals = _split_projection_profile(y_projection, 0, 1)
|
822
|
-
|
823
|
-
if not y_intervals:
|
824
|
-
return
|
825
|
-
|
826
|
-
# Process each segment defined by Y-axis projection
|
827
|
-
for y_start, y_end in zip(*y_intervals):
|
828
|
-
# Select boxes within the current y interval
|
829
|
-
y_interval_indices = (y_start <= y_sorted_boxes[:, 1]) & (
|
830
|
-
y_sorted_boxes[:, 1] < y_end
|
831
|
-
)
|
832
|
-
y_boxes_chunk = y_sorted_boxes[y_interval_indices]
|
833
|
-
y_indices_chunk = y_sorted_indices[y_interval_indices]
|
834
|
-
|
835
|
-
# Sort by x_min for X-axis projection
|
836
|
-
x_sorted_indices = y_boxes_chunk[:, 0].argsort()
|
837
|
-
x_sorted_boxes_chunk = y_boxes_chunk[x_sorted_indices]
|
838
|
-
x_sorted_indices_chunk = y_indices_chunk[x_sorted_indices]
|
839
|
-
|
840
|
-
# Perform X-axis projection
|
841
|
-
x_projection = _projection_by_bboxes(boxes=x_sorted_boxes_chunk, axis=0)
|
842
|
-
x_intervals = _split_projection_profile(x_projection, 0, min_gap)
|
843
|
-
|
844
|
-
if not x_intervals:
|
845
|
-
continue
|
846
|
-
|
847
|
-
# If X-axis cannot be further segmented, add current indices to results
|
848
|
-
if len(x_intervals[0]) == 1:
|
849
|
-
res.extend(x_sorted_indices_chunk)
|
850
|
-
continue
|
851
|
-
|
852
|
-
# Recursively process each segment defined by X-axis projection
|
853
|
-
for x_start, x_end in zip(*x_intervals):
|
854
|
-
x_interval_indices = (x_start <= x_sorted_boxes_chunk[:, 0]) & (
|
855
|
-
x_sorted_boxes_chunk[:, 0] < x_end
|
856
|
-
)
|
857
|
-
_recursive_yx_cut(
|
858
|
-
x_sorted_boxes_chunk[x_interval_indices],
|
859
|
-
x_sorted_indices_chunk[x_interval_indices],
|
860
|
-
res,
|
861
|
-
)
|
862
|
-
|
584
|
+
Process the input text to handle spaces.
|
863
585
|
|
864
|
-
|
865
|
-
|
866
|
-
):
|
867
|
-
"""
|
868
|
-
Recursively performs X-axis projection followed by Y-axis projection to segment bounding boxes.
|
586
|
+
The function removes multiple consecutive spaces between Chinese characters and ensures that
|
587
|
+
only a single space is retained between Chinese and non-Chinese characters.
|
869
588
|
|
870
589
|
Args:
|
871
|
-
|
872
|
-
indices: A list of indices representing the position of boxes in the original data.
|
873
|
-
res: A list to store indices of bounding boxes that meet the criteria.
|
874
|
-
min_gap (int): Minimum gap width to consider a separation between segments on the X-axis. Defaults to 1.
|
590
|
+
input_text (str): The text to be processed.
|
875
591
|
|
876
592
|
Returns:
|
877
|
-
|
593
|
+
str: The processed text with properly formatted spaces.
|
878
594
|
"""
|
879
|
-
# Ensure boxes and indices have the same length
|
880
|
-
assert len(boxes) == len(
|
881
|
-
indices
|
882
|
-
), "The length of boxes and indices must be the same."
|
883
|
-
|
884
|
-
# Sort by x_min to prepare for X-axis projection
|
885
|
-
x_sorted_indices = boxes[:, 0].argsort()
|
886
|
-
x_sorted_boxes = boxes[x_sorted_indices]
|
887
|
-
x_sorted_indices = np.array(indices)[x_sorted_indices]
|
888
|
-
|
889
|
-
# Perform X-axis projection
|
890
|
-
x_projection = _projection_by_bboxes(boxes=x_sorted_boxes, axis=0)
|
891
|
-
x_intervals = _split_projection_profile(x_projection, 0, 1)
|
892
|
-
|
893
|
-
if not x_intervals:
|
894
|
-
return
|
895
|
-
|
896
|
-
# Process each segment defined by X-axis projection
|
897
|
-
for x_start, x_end in zip(*x_intervals):
|
898
|
-
# Select boxes within the current x interval
|
899
|
-
x_interval_indices = (x_start <= x_sorted_boxes[:, 0]) & (
|
900
|
-
x_sorted_boxes[:, 0] < x_end
|
901
|
-
)
|
902
|
-
x_boxes_chunk = x_sorted_boxes[x_interval_indices]
|
903
|
-
x_indices_chunk = x_sorted_indices[x_interval_indices]
|
904
|
-
|
905
|
-
# Sort selected boxes by y_min to prepare for Y-axis projection
|
906
|
-
y_sorted_indices = x_boxes_chunk[:, 1].argsort()
|
907
|
-
y_sorted_boxes_chunk = x_boxes_chunk[y_sorted_indices]
|
908
|
-
y_sorted_indices_chunk = x_indices_chunk[y_sorted_indices]
|
909
|
-
|
910
|
-
# Perform Y-axis projection
|
911
|
-
y_projection = _projection_by_bboxes(boxes=y_sorted_boxes_chunk, axis=1)
|
912
|
-
y_intervals = _split_projection_profile(y_projection, 0, min_gap)
|
913
|
-
|
914
|
-
if not y_intervals:
|
915
|
-
continue
|
916
|
-
|
917
|
-
# If Y-axis cannot be further segmented, add current indices to results
|
918
|
-
if len(y_intervals[0]) == 1:
|
919
|
-
res.extend(y_sorted_indices_chunk)
|
920
|
-
continue
|
921
|
-
|
922
|
-
# Recursively process each segment defined by Y-axis projection
|
923
|
-
for y_start, y_end in zip(*y_intervals):
|
924
|
-
y_interval_indices = (y_start <= y_sorted_boxes_chunk[:, 1]) & (
|
925
|
-
y_sorted_boxes_chunk[:, 1] < y_end
|
926
|
-
)
|
927
|
-
_recursive_xy_cut(
|
928
|
-
y_sorted_boxes_chunk[y_interval_indices],
|
929
|
-
y_sorted_indices_chunk[y_interval_indices],
|
930
|
-
res,
|
931
|
-
)
|
932
595
|
|
596
|
+
# Remove spaces between Chinese characters
|
597
|
+
text_without_spaces = re.sub(
|
598
|
+
r"(?<=[\u4e00-\u9fff])\s+(?=[\u4e00-\u9fff])", "", input_text
|
599
|
+
)
|
933
600
|
|
934
|
-
|
935
|
-
|
936
|
-
|
937
|
-
|
938
|
-
|
939
|
-
|
940
|
-
Sort bounding boxes using recursive XY cut method based on the specified direction.
|
601
|
+
# Ensure single space between Chinese and non-Chinese characters
|
602
|
+
text_with_single_spaces = re.sub(
|
603
|
+
r"(?<=[\u4e00-\u9fff])\s+(?=[^\u4e00-\u9fff])|(?<=[^\u4e00-\u9fff])\s+(?=[\u4e00-\u9fff])",
|
604
|
+
" ",
|
605
|
+
text_without_spaces,
|
606
|
+
)
|
941
607
|
|
942
|
-
|
943
|
-
|
944
|
-
where each box is represented as
|
945
|
-
[x_min, y_min, x_max, y_max].
|
946
|
-
direction (int): Direction for the initial cut. Use 1 for Y-axis first and 0 for X-axis first.
|
947
|
-
Defaults to 0.
|
948
|
-
min_gap (int): Minimum gap width to consider a separation between segments. Defaults to 1.
|
608
|
+
# Reduce any remaining consecutive spaces to a single space
|
609
|
+
final_text = re.sub(r"\s+", " ", text_with_single_spaces).strip()
|
949
610
|
|
950
|
-
|
951
|
-
List[int]: A list of indices representing the order of sorted bounding boxes.
|
952
|
-
"""
|
953
|
-
block_bboxes = np.asarray(block_bboxes).astype(int)
|
954
|
-
res = []
|
955
|
-
if direction == 1:
|
956
|
-
_recursive_yx_cut(
|
957
|
-
block_bboxes,
|
958
|
-
np.arange(len(block_bboxes)).tolist(),
|
959
|
-
res,
|
960
|
-
min_gap,
|
961
|
-
)
|
962
|
-
else:
|
963
|
-
_recursive_xy_cut(
|
964
|
-
block_bboxes,
|
965
|
-
np.arange(len(block_bboxes)).tolist(),
|
966
|
-
res,
|
967
|
-
min_gap,
|
968
|
-
)
|
969
|
-
return res
|
611
|
+
return final_text
|
970
612
|
|
971
613
|
|
972
614
|
def gather_imgs(original_img, layout_det_objs):
|
973
615
|
imgs_in_doc = []
|
974
616
|
for det_obj in layout_det_objs:
|
975
|
-
if det_obj["label"] in
|
617
|
+
if det_obj["label"] in BLOCK_LABEL_MAP["image_labels"]:
|
618
|
+
label = det_obj["label"]
|
976
619
|
x_min, y_min, x_max, y_max = list(map(int, det_obj["coordinate"]))
|
977
|
-
img_path = f"imgs/
|
620
|
+
img_path = f"imgs/img_in_{label}_box_{x_min}_{y_min}_{x_max}_{y_max}.jpg"
|
978
621
|
img = Image.fromarray(original_img[y_min:y_max, x_min:x_max, ::-1])
|
979
622
|
imgs_in_doc.append(
|
980
623
|
{
|
@@ -1008,10 +651,10 @@ def _get_minbox_if_overlap_by_ratio(
|
|
1008
651
|
The selected bounding box or None if the overlap ratio is not exceeded.
|
1009
652
|
"""
|
1010
653
|
# Calculate the areas of both bounding boxes
|
1011
|
-
area1 = (bbox1
|
1012
|
-
area2 = (bbox2
|
654
|
+
area1 = caculate_bbox_area(bbox1)
|
655
|
+
area2 = caculate_bbox_area(bbox2)
|
1013
656
|
# Calculate the overlap ratio using a helper function
|
1014
|
-
overlap_ratio =
|
657
|
+
overlap_ratio = calculate_overlap_ratio(bbox1, bbox2, mode="small")
|
1015
658
|
# Check if the overlap ratio exceeds the threshold
|
1016
659
|
if overlap_ratio > ratio:
|
1017
660
|
if (area1 <= area2 and smaller) or (area1 >= area2 and not smaller):
|
@@ -1021,7 +664,7 @@ def _get_minbox_if_overlap_by_ratio(
|
|
1021
664
|
return None
|
1022
665
|
|
1023
666
|
|
1024
|
-
def
|
667
|
+
def remove_overlap_blocks(
|
1025
668
|
blocks: List[Dict[str, List[int]]], threshold: float = 0.65, smaller: bool = True
|
1026
669
|
) -> Tuple[List[Dict[str, List[int]]], List[Dict[str, List[int]]]]:
|
1027
670
|
"""
|
@@ -1036,13 +679,13 @@ def _remove_overlap_blocks(
|
|
1036
679
|
Tuple[List[Dict[str, List[int]]], List[Dict[str, List[int]]]]:
|
1037
680
|
A tuple containing the updated list of blocks and a list of dropped blocks.
|
1038
681
|
"""
|
1039
|
-
dropped_blocks = []
|
1040
682
|
dropped_indexes = set()
|
1041
|
-
|
683
|
+
blocks = deepcopy(blocks)
|
684
|
+
overlap_image_blocks = []
|
1042
685
|
# Iterate over each pair of blocks to find overlaps
|
1043
|
-
for i, block1 in enumerate(blocks):
|
1044
|
-
for j in range(i + 1, len(blocks)):
|
1045
|
-
block2 = blocks[j]
|
686
|
+
for i, block1 in enumerate(blocks["boxes"]):
|
687
|
+
for j in range(i + 1, len(blocks["boxes"])):
|
688
|
+
block2 = blocks["boxes"][j]
|
1046
689
|
# Skip blocks that are already marked for removal
|
1047
690
|
if i in dropped_indexes or j in dropped_indexes:
|
1048
691
|
continue
|
@@ -1054,1332 +697,255 @@ def _remove_overlap_blocks(
|
|
1054
697
|
smaller=smaller,
|
1055
698
|
)
|
1056
699
|
if overlap_box_index is not None:
|
1057
|
-
|
1058
|
-
|
1059
|
-
|
700
|
+
is_block1_image = block1["label"] == "image"
|
701
|
+
is_block2_image = block2["label"] == "image"
|
702
|
+
|
703
|
+
if is_block1_image != is_block2_image:
|
704
|
+
# 如果只有一个块在视觉标签中,删除在视觉标签中的那个块
|
705
|
+
drop_index = i if is_block1_image else j
|
706
|
+
overlap_image_blocks.append(blocks["boxes"][drop_index])
|
1060
707
|
else:
|
1061
|
-
|
708
|
+
# 如果两个块都在或都不在视觉标签中,根据 overlap_box_index 决定删除哪个块
|
709
|
+
drop_index = i if overlap_box_index == 1 else j
|
710
|
+
|
1062
711
|
dropped_indexes.add(drop_index)
|
1063
712
|
|
1064
713
|
# Remove marked blocks from the original list
|
1065
714
|
for index in sorted(dropped_indexes, reverse=True):
|
1066
|
-
|
1067
|
-
del blocks[index]
|
1068
|
-
|
1069
|
-
return blocks, dropped_blocks
|
1070
|
-
|
715
|
+
del blocks["boxes"][index]
|
1071
716
|
|
1072
|
-
|
1073
|
-
"""
|
1074
|
-
Calculate the median width of blocks labeled as "text".
|
1075
|
-
|
1076
|
-
Args:
|
1077
|
-
blocks (List[Dict[str, any]]): List of block dictionaries, each containing a 'block_bbox' and 'label'.
|
1078
|
-
|
1079
|
-
Returns:
|
1080
|
-
float: The median width of text blocks, or infinity if no text blocks are found.
|
1081
|
-
"""
|
1082
|
-
widths = [
|
1083
|
-
block["block_bbox"][2] - block["block_bbox"][0]
|
1084
|
-
for block in blocks
|
1085
|
-
if block.get("block_label") == "text"
|
1086
|
-
]
|
1087
|
-
return np.median(widths) if widths else float("inf")
|
717
|
+
return blocks
|
1088
718
|
|
1089
719
|
|
1090
|
-
def
|
1091
|
-
blocks: List[Dict[str, any]],
|
1092
|
-
median_width: float,
|
1093
|
-
no_mask_labels: List[str],
|
1094
|
-
threshold: float = 0.8,
|
1095
|
-
) -> Tuple[List[Dict[str, any]], bool]:
|
720
|
+
def get_bbox_intersection(bbox1, bbox2, return_format="bbox"):
|
1096
721
|
"""
|
1097
|
-
|
722
|
+
Compute the intersection of two bounding boxes, supporting both 4-coordinate and 8-coordinate formats.
|
1098
723
|
|
1099
724
|
Args:
|
1100
|
-
|
1101
|
-
|
1102
|
-
|
1103
|
-
|
725
|
+
bbox1 (tuple): The first bounding box, either in 4-coordinate format (x_min, y_min, x_max, y_max)
|
726
|
+
or 8-coordinate format (x1, y1, x2, y2, x3, y3, x4, y4).
|
727
|
+
bbox2 (tuple): The second bounding box in the same format as bbox1.
|
728
|
+
return_format (str): The format of the output intersection, either 'bbox' or 'poly'.
|
1104
729
|
|
1105
730
|
Returns:
|
1106
|
-
|
1107
|
-
|
1108
|
-
|
1109
|
-
|
1110
|
-
|
1111
|
-
|
1112
|
-
|
1113
|
-
|
1114
|
-
|
1115
|
-
|
1116
|
-
|
1117
|
-
|
1118
|
-
|
1119
|
-
|
1120
|
-
|
1121
|
-
|
1122
|
-
|
1123
|
-
|
1124
|
-
|
1125
|
-
|
1126
|
-
|
1127
|
-
|
1128
|
-
|
1129
|
-
|
1130
|
-
|
1131
|
-
|
1132
|
-
|
1133
|
-
|
1134
|
-
|
1135
|
-
|
1136
|
-
|
1137
|
-
|
1138
|
-
x_min_j, _, x_max_j, _ = other_block["block_bbox"]
|
1139
|
-
x_match_min, x_match_max = max(
|
1140
|
-
x_min_i,
|
1141
|
-
x_min_j,
|
1142
|
-
), min(x_max_i, x_max_j)
|
1143
|
-
match_block_iou = (x_match_max - x_match_min) / (x_max_j - x_min_j)
|
1144
|
-
|
1145
|
-
if match_block_iou > 0:
|
1146
|
-
cover_count += 1
|
1147
|
-
if match_block_iou > threshold:
|
1148
|
-
cover_with_threshold_count += 1
|
1149
|
-
match_block_with_threshold_indexes.append(
|
1150
|
-
(j, match_block_iou),
|
1151
|
-
)
|
1152
|
-
x_min_i = x_match_max
|
1153
|
-
if x_min_i >= x_max_i:
|
1154
|
-
break
|
1155
|
-
|
1156
|
-
if (
|
1157
|
-
layout_length > median_width * 1.3
|
1158
|
-
and (cover_with_threshold_count >= 2 or cover_count >= 2)
|
1159
|
-
) or layout_length > 0.6 * page_width:
|
1160
|
-
# if layout_length > median_width * 1.3 and (cover_with_threshold_count >= 2):
|
1161
|
-
block["layout"] = "double"
|
1162
|
-
double_label_area += (block["block_bbox"][2] - block["block_bbox"][0]) * (
|
1163
|
-
block["block_bbox"][3] - block["block_bbox"][1]
|
1164
|
-
)
|
1165
|
-
else:
|
1166
|
-
block["layout"] = "single"
|
1167
|
-
check_single_layout[i] = match_block_with_threshold_indexes
|
1168
|
-
|
1169
|
-
# Check single-layout block
|
1170
|
-
for i, single_layout in check_single_layout.items():
|
1171
|
-
if single_layout:
|
1172
|
-
index, match_iou = single_layout[-1]
|
1173
|
-
if match_iou > 0.9 and blocks[index]["layout"] == "double":
|
1174
|
-
blocks[i]["layout"] = "double"
|
1175
|
-
double_label_area += (
|
1176
|
-
blocks[i]["block_bbox"][2] - blocks[i]["block_bbox"][0]
|
1177
|
-
) * (blocks[i]["block_bbox"][3] - blocks[i]["block_bbox"][1])
|
1178
|
-
else:
|
1179
|
-
single_label_area += (
|
1180
|
-
blocks[i]["block_bbox"][2] - blocks[i]["block_bbox"][0]
|
1181
|
-
) * (blocks[i]["block_bbox"][3] - blocks[i]["block_bbox"][1])
|
1182
|
-
|
1183
|
-
return blocks, (double_label_area > single_label_area)
|
1184
|
-
|
1185
|
-
|
1186
|
-
def _get_bbox_direction(input_bbox: List[float], ratio: float = 1.0) -> bool:
|
1187
|
-
"""
|
1188
|
-
Determine if a bounding box is horizontal or vertical.
|
1189
|
-
|
1190
|
-
Args:
|
1191
|
-
input_bbox (List[float]): Bounding box [x_min, y_min, x_max, y_max].
|
1192
|
-
ratio (float): Ratio for determining orientation. Default is 1.0.
|
1193
|
-
|
1194
|
-
Returns:
|
1195
|
-
bool: True if the bounding box is considered horizontal, False if vertical.
|
1196
|
-
"""
|
1197
|
-
width = input_bbox[2] - input_bbox[0]
|
1198
|
-
height = input_bbox[3] - input_bbox[1]
|
1199
|
-
return width * ratio >= height
|
1200
|
-
|
1201
|
-
|
1202
|
-
def _get_projection_iou(
|
1203
|
-
input_bbox: List[float], match_bbox: List[float], is_horizontal: bool = True
|
1204
|
-
) -> float:
|
1205
|
-
"""
|
1206
|
-
Calculate the IoU of lines between two bounding boxes.
|
1207
|
-
|
1208
|
-
Args:
|
1209
|
-
input_bbox (List[float]): First bounding box [x_min, y_min, x_max, y_max].
|
1210
|
-
match_bbox (List[float]): Second bounding box [x_min, y_min, x_max, y_max].
|
1211
|
-
is_horizontal (bool): Whether to compare horizontally or vertically.
|
1212
|
-
|
1213
|
-
Returns:
|
1214
|
-
float: Line IoU. Returns 0 if there is no overlap.
|
1215
|
-
"""
|
1216
|
-
if is_horizontal:
|
1217
|
-
x_match_min = max(input_bbox[0], match_bbox[0])
|
1218
|
-
x_match_max = min(input_bbox[2], match_bbox[2])
|
1219
|
-
overlap = max(0, x_match_max - x_match_min)
|
1220
|
-
input_width = min(input_bbox[2] - input_bbox[0], match_bbox[2] - match_bbox[0])
|
731
|
+
tuple or None: The intersection bounding box in the specified format, or None if there is no intersection.
|
732
|
+
"""
|
733
|
+
bbox1 = np.array(bbox1)
|
734
|
+
bbox2 = np.array(bbox2)
|
735
|
+
# Convert both bounding boxes to rectangles
|
736
|
+
rect1 = bbox1 if len(bbox1.shape) == 1 else convert_points_to_boxes([bbox1])[0]
|
737
|
+
rect2 = bbox2 if len(bbox2.shape) == 1 else convert_points_to_boxes([bbox2])[0]
|
738
|
+
|
739
|
+
# Calculate the intersection rectangle
|
740
|
+
|
741
|
+
x_min_inter = max(rect1[0], rect2[0])
|
742
|
+
y_min_inter = max(rect1[1], rect2[1])
|
743
|
+
x_max_inter = min(rect1[2], rect2[2])
|
744
|
+
y_max_inter = min(rect1[3], rect2[3])
|
745
|
+
|
746
|
+
# Check if there is an intersection
|
747
|
+
if x_min_inter >= x_max_inter or y_min_inter >= y_max_inter:
|
748
|
+
return None
|
749
|
+
|
750
|
+
if return_format == "bbox":
|
751
|
+
return np.array([x_min_inter, y_min_inter, x_max_inter, y_max_inter])
|
752
|
+
elif return_format == "poly":
|
753
|
+
return np.array(
|
754
|
+
[
|
755
|
+
[x_min_inter, y_min_inter],
|
756
|
+
[x_max_inter, y_min_inter],
|
757
|
+
[x_max_inter, y_max_inter],
|
758
|
+
[x_min_inter, y_max_inter],
|
759
|
+
],
|
760
|
+
dtype=np.int16,
|
761
|
+
)
|
1221
762
|
else:
|
1222
|
-
|
1223
|
-
y_match_max = min(input_bbox[3], match_bbox[3])
|
1224
|
-
overlap = max(0, y_match_max - y_match_min)
|
1225
|
-
input_width = min(input_bbox[3] - input_bbox[1], match_bbox[3] - match_bbox[1])
|
1226
|
-
|
1227
|
-
return overlap / input_width if input_width > 0 else 0.0
|
763
|
+
raise ValueError("return_format must be either 'bbox' or 'poly'.")
|
1228
764
|
|
1229
765
|
|
1230
|
-
def
|
1231
|
-
|
1232
|
-
|
766
|
+
def shrink_supplement_region_bbox(
|
767
|
+
supplement_region_bbox,
|
768
|
+
ref_region_bbox,
|
769
|
+
image_width,
|
770
|
+
image_height,
|
771
|
+
block_idxes_set,
|
772
|
+
block_bboxes,
|
773
|
+
) -> List:
|
1233
774
|
"""
|
1234
|
-
|
775
|
+
Shrink the supplement region bbox according to the reference region bbox and match the block bboxes.
|
1235
776
|
|
1236
777
|
Args:
|
1237
|
-
|
1238
|
-
|
778
|
+
supplement_region_bbox (list): The supplement region bbox.
|
779
|
+
ref_region_bbox (list): The reference region bbox.
|
780
|
+
image_width (int): The width of the image.
|
781
|
+
image_height (int): The height of the image.
|
782
|
+
block_idxes_set (set): The indexes of the blocks that intersect with the region bbox.
|
783
|
+
block_bboxes (dict): The dictionary of block bboxes.
|
1239
784
|
|
1240
785
|
Returns:
|
1241
|
-
|
1242
|
-
|
1243
|
-
|
1244
|
-
|
1245
|
-
|
1246
|
-
|
1247
|
-
|
1248
|
-
|
1249
|
-
|
1250
|
-
|
1251
|
-
|
1252
|
-
|
1253
|
-
|
1254
|
-
|
1255
|
-
|
1256
|
-
|
1257
|
-
|
1258
|
-
|
1259
|
-
|
1260
|
-
|
1261
|
-
|
1262
|
-
|
1263
|
-
|
1264
|
-
|
1265
|
-
max_y = max(block["block_bbox"][3] for block in blocks)
|
1266
|
-
region_bbox = (min_x, min_y, max_x, max_y)
|
1267
|
-
region_x_center = (region_bbox[0] + region_bbox[2]) / 2
|
1268
|
-
region_y_center = (region_bbox[1] + region_bbox[3]) / 2
|
1269
|
-
region_width = region_bbox[2] - region_bbox[0]
|
1270
|
-
region_height = region_bbox[3] - region_bbox[1]
|
1271
|
-
|
1272
|
-
pre_cuts = {}
|
1273
|
-
|
1274
|
-
for i, block1 in enumerate(blocks):
|
1275
|
-
block1.setdefault("title_text", [])
|
1276
|
-
block1.setdefault("sub_title", [])
|
1277
|
-
block1.setdefault("vision_footnote", [])
|
1278
|
-
block1.setdefault("sub_label", block1["block_label"])
|
1279
|
-
|
1280
|
-
if block1["block_label"] not in all_labels:
|
1281
|
-
continue
|
1282
|
-
|
1283
|
-
bbox1 = block1["block_bbox"]
|
1284
|
-
x1, y1, x2, y2 = bbox1
|
1285
|
-
is_horizontal_1 = _get_bbox_direction(block1["block_bbox"])
|
1286
|
-
left_up_title_text_distance = float("inf")
|
1287
|
-
left_up_title_text_index = -1
|
1288
|
-
left_up_title_text_direction = None
|
1289
|
-
right_down_title_text_distance = float("inf")
|
1290
|
-
right_down_title_text_index = -1
|
1291
|
-
right_down_title_text_direction = None
|
1292
|
-
|
1293
|
-
# pre-cuts
|
1294
|
-
# Condition 1: Length is greater than half of the layout region
|
1295
|
-
if is_horizontal_1:
|
1296
|
-
block_length = x2 - x1
|
1297
|
-
required_length = region_width / 2
|
1298
|
-
else:
|
1299
|
-
block_length = y2 - y1
|
1300
|
-
required_length = region_height / 2
|
1301
|
-
if block1["block_label"] in special_pre_cut_labels:
|
1302
|
-
length_condition = True
|
1303
|
-
else:
|
1304
|
-
length_condition = block_length > required_length
|
1305
|
-
|
1306
|
-
# Condition 2: Centered check (must be within ±20 in both horizontal and vertical directions)
|
1307
|
-
block_x_center = (x1 + x2) / 2
|
1308
|
-
block_y_center = (y1 + y2) / 2
|
1309
|
-
tolerance_len = block_length // 5
|
1310
|
-
if block1["block_label"] in special_pre_cut_labels:
|
1311
|
-
tolerance_len = block_length // 10
|
1312
|
-
if is_horizontal_1:
|
1313
|
-
is_centered = abs(block_x_center - region_x_center) <= tolerance_len
|
1314
|
-
else:
|
1315
|
-
is_centered = abs(block_y_center - region_y_center) <= tolerance_len
|
1316
|
-
|
1317
|
-
# Condition 3: Check for surrounding text
|
1318
|
-
has_left_text = False
|
1319
|
-
has_right_text = False
|
1320
|
-
has_above_text = False
|
1321
|
-
has_below_text = False
|
1322
|
-
for block2 in blocks:
|
1323
|
-
if block2["block_label"] != "text":
|
1324
|
-
continue
|
1325
|
-
bbox2 = block2["block_bbox"]
|
1326
|
-
x1_2, y1_2, x2_2, y2_2 = bbox2
|
1327
|
-
if is_horizontal_1:
|
1328
|
-
if x2_2 <= x1 and not (y2_2 <= y1 or y1_2 >= y2):
|
1329
|
-
has_left_text = True
|
1330
|
-
if x1_2 >= x2 and not (y2_2 <= y1 or y1_2 >= y2):
|
1331
|
-
has_right_text = True
|
1332
|
-
else:
|
1333
|
-
if y2_2 <= y1 and not (x2_2 <= x1 or x1_2 >= x2):
|
1334
|
-
has_above_text = True
|
1335
|
-
if y1_2 >= y2 and not (x2_2 <= x1 or x1_2 >= x2):
|
1336
|
-
has_below_text = True
|
1337
|
-
|
1338
|
-
if (is_horizontal_1 and has_left_text and has_right_text) or (
|
1339
|
-
not is_horizontal_1 and has_above_text and has_below_text
|
1340
|
-
):
|
1341
|
-
break
|
1342
|
-
|
1343
|
-
no_text_on_sides = (
|
1344
|
-
not (has_left_text or has_right_text)
|
1345
|
-
if is_horizontal_1
|
1346
|
-
else not (has_above_text or has_below_text)
|
1347
|
-
)
|
1348
|
-
|
1349
|
-
# Add coordinates if all conditions are met
|
1350
|
-
if is_centered and length_condition and no_text_on_sides:
|
1351
|
-
if is_horizontal_1:
|
1352
|
-
pre_cuts.setdefault("y", []).append(y1)
|
1353
|
-
else:
|
1354
|
-
pre_cuts.setdefault("x", []).append(x1)
|
1355
|
-
|
1356
|
-
for j, block2 in enumerate(blocks):
|
1357
|
-
if i == j:
|
1358
|
-
continue
|
1359
|
-
|
1360
|
-
bbox2 = block2["block_bbox"]
|
1361
|
-
x1_prime, y1_prime, x2_prime, y2_prime = bbox2
|
1362
|
-
is_horizontal_2 = _get_bbox_direction(bbox2)
|
1363
|
-
match_block_iou = _get_projection_iou(
|
1364
|
-
bbox2,
|
1365
|
-
bbox1,
|
1366
|
-
is_horizontal_1,
|
786
|
+
list: The new region bbox and the matched block idxes.
|
787
|
+
"""
|
788
|
+
x1, y1, x2, y2 = supplement_region_bbox
|
789
|
+
x1_prime, y1_prime, x2_prime, y2_prime = ref_region_bbox
|
790
|
+
index_conversion_map = {0: 2, 1: 3, 2: 0, 3: 1}
|
791
|
+
edge_distance_list = [
|
792
|
+
(x1_prime - x1) / image_width,
|
793
|
+
(y1_prime - y1) / image_height,
|
794
|
+
(x2 - x2_prime) / image_width,
|
795
|
+
(y2 - y2_prime) / image_height,
|
796
|
+
]
|
797
|
+
edge_distance_list_tmp = edge_distance_list[:]
|
798
|
+
min_distance = min(edge_distance_list)
|
799
|
+
src_index = index_conversion_map[edge_distance_list.index(min_distance)]
|
800
|
+
if len(block_idxes_set) == 0:
|
801
|
+
return supplement_region_bbox, []
|
802
|
+
for _ in range(3):
|
803
|
+
dst_index = index_conversion_map[src_index]
|
804
|
+
tmp_region_bbox = supplement_region_bbox[:]
|
805
|
+
tmp_region_bbox[dst_index] = ref_region_bbox[src_index]
|
806
|
+
iner_block_idxes, split_block_idxes = [], []
|
807
|
+
for block_idx in block_idxes_set:
|
808
|
+
overlap_ratio = calculate_overlap_ratio(
|
809
|
+
tmp_region_bbox, block_bboxes[block_idx], mode="small"
|
1367
810
|
)
|
1368
|
-
|
1369
|
-
|
1370
|
-
if is_horizontal:
|
1371
|
-
if is_left_up:
|
1372
|
-
return (y1 - y2_prime + 2) // 5 + x1_prime / 5000
|
1373
|
-
else:
|
1374
|
-
return (y1_prime - y2 + 2) // 5 + x1_prime / 5000
|
1375
|
-
|
1376
|
-
else:
|
1377
|
-
if is_left_up:
|
1378
|
-
return (x1 - x2_prime + 2) // 5 + y1_prime / 5000
|
1379
|
-
else:
|
1380
|
-
return (x1_prime - x2 + 2) // 5 + y1_prime / 5000
|
1381
|
-
|
1382
|
-
block_iou_threshold = 0.1
|
1383
|
-
if block1["block_label"] in sub_title_labels:
|
1384
|
-
block_iou_threshold = 0.5
|
1385
|
-
|
1386
|
-
if is_horizontal_1:
|
1387
|
-
if match_block_iou >= block_iou_threshold:
|
1388
|
-
left_up_distance = distance_(True, True)
|
1389
|
-
right_down_distance = distance_(True, False)
|
1390
|
-
if (
|
1391
|
-
y2_prime <= y1
|
1392
|
-
and left_up_distance <= left_up_title_text_distance
|
1393
|
-
):
|
1394
|
-
left_up_title_text_distance = left_up_distance
|
1395
|
-
left_up_title_text_index = j
|
1396
|
-
left_up_title_text_direction = is_horizontal_2
|
1397
|
-
elif (
|
1398
|
-
y1_prime > y2
|
1399
|
-
and right_down_distance < right_down_title_text_distance
|
1400
|
-
):
|
1401
|
-
right_down_title_text_distance = right_down_distance
|
1402
|
-
right_down_title_text_index = j
|
1403
|
-
right_down_title_text_direction = is_horizontal_2
|
1404
|
-
else:
|
1405
|
-
if match_block_iou >= block_iou_threshold:
|
1406
|
-
left_up_distance = distance_(False, True)
|
1407
|
-
right_down_distance = distance_(False, False)
|
1408
|
-
if (
|
1409
|
-
x2_prime <= x1
|
1410
|
-
and left_up_distance <= left_up_title_text_distance
|
1411
|
-
):
|
1412
|
-
left_up_title_text_distance = left_up_distance
|
1413
|
-
left_up_title_text_index = j
|
1414
|
-
left_up_title_text_direction = is_horizontal_2
|
1415
|
-
elif (
|
1416
|
-
x1_prime > x2
|
1417
|
-
and right_down_distance < right_down_title_text_distance
|
1418
|
-
):
|
1419
|
-
right_down_title_text_distance = right_down_distance
|
1420
|
-
right_down_title_text_index = j
|
1421
|
-
right_down_title_text_direction = is_horizontal_2
|
1422
|
-
|
1423
|
-
height = bbox1[3] - bbox1[1]
|
1424
|
-
width = bbox1[2] - bbox1[0]
|
1425
|
-
title_text_weight = [0.8, 0.8]
|
1426
|
-
|
1427
|
-
title_text, sub_title, vision_footnote = [], [], []
|
1428
|
-
|
1429
|
-
def get_sub_category_(
|
1430
|
-
title_text_direction,
|
1431
|
-
title_text_index,
|
1432
|
-
label,
|
1433
|
-
is_left_up=True,
|
1434
|
-
):
|
1435
|
-
direction_ = [1, 3] if is_left_up else [2, 4]
|
1436
|
-
if (
|
1437
|
-
title_text_direction == is_horizontal_1
|
1438
|
-
and title_text_index != -1
|
1439
|
-
and (label == "text" or label == "paragraph_title")
|
811
|
+
if overlap_ratio > REGION_SETTINGS.get(
|
812
|
+
"match_block_overlap_ratio_threshold", 0.8
|
1440
813
|
):
|
1441
|
-
|
1442
|
-
|
1443
|
-
|
1444
|
-
width1 = bbox2[2] - bbox2[0]
|
1445
|
-
if label == "text":
|
1446
|
-
if (
|
1447
|
-
_nearest_edge_distance(bbox1, bbox2)[0] <= 15
|
1448
|
-
and block1["block_label"] in vision_labels
|
1449
|
-
and width1 < width
|
1450
|
-
and height1 < 0.5 * height
|
1451
|
-
):
|
1452
|
-
blocks[title_text_index]["sub_label"] = "vision_footnote"
|
1453
|
-
vision_footnote.append(bbox2)
|
1454
|
-
elif (
|
1455
|
-
height1 < height * title_text_weight[0]
|
1456
|
-
and (width1 < width or width1 > 1.5 * width)
|
1457
|
-
and block1["block_label"] in title_labels
|
1458
|
-
):
|
1459
|
-
blocks[title_text_index]["sub_label"] = "title_text"
|
1460
|
-
title_text.append((direction_[0], bbox2))
|
1461
|
-
elif (
|
1462
|
-
label == "paragraph_title"
|
1463
|
-
and block1["block_label"] in sub_title_labels
|
1464
|
-
):
|
1465
|
-
sub_title.append(bbox2)
|
1466
|
-
else:
|
1467
|
-
height1 = bbox2[3] - bbox2[1]
|
1468
|
-
width1 = bbox2[2] - bbox2[0]
|
1469
|
-
if label == "text":
|
1470
|
-
if (
|
1471
|
-
_nearest_edge_distance(bbox1, bbox2)[0] <= 15
|
1472
|
-
and block1["block_label"] in vision_labels
|
1473
|
-
and height1 < height
|
1474
|
-
and width1 < 0.5 * width
|
1475
|
-
):
|
1476
|
-
blocks[title_text_index]["sub_label"] = "vision_footnote"
|
1477
|
-
vision_footnote.append(bbox2)
|
1478
|
-
elif (
|
1479
|
-
width1 < width * title_text_weight[1]
|
1480
|
-
and block1["block_label"] in title_labels
|
1481
|
-
):
|
1482
|
-
blocks[title_text_index]["sub_label"] = "title_text"
|
1483
|
-
title_text.append((direction_[1], bbox2))
|
1484
|
-
elif (
|
1485
|
-
label == "paragraph_title"
|
1486
|
-
and block1["block_label"] in sub_title_labels
|
1487
|
-
):
|
1488
|
-
sub_title.append(bbox2)
|
1489
|
-
|
1490
|
-
if (
|
1491
|
-
is_horizontal_1
|
1492
|
-
and abs(left_up_title_text_distance - right_down_title_text_distance) * 5
|
1493
|
-
> height
|
1494
|
-
) or (
|
1495
|
-
not is_horizontal_1
|
1496
|
-
and abs(left_up_title_text_distance - right_down_title_text_distance) * 5
|
1497
|
-
> width
|
1498
|
-
):
|
1499
|
-
if left_up_title_text_distance < right_down_title_text_distance:
|
1500
|
-
get_sub_category_(
|
1501
|
-
left_up_title_text_direction,
|
1502
|
-
left_up_title_text_index,
|
1503
|
-
blocks[left_up_title_text_index]["block_label"],
|
1504
|
-
True,
|
1505
|
-
)
|
1506
|
-
else:
|
1507
|
-
get_sub_category_(
|
1508
|
-
right_down_title_text_direction,
|
1509
|
-
right_down_title_text_index,
|
1510
|
-
blocks[right_down_title_text_index]["block_label"],
|
1511
|
-
False,
|
1512
|
-
)
|
1513
|
-
else:
|
1514
|
-
get_sub_category_(
|
1515
|
-
left_up_title_text_direction,
|
1516
|
-
left_up_title_text_index,
|
1517
|
-
blocks[left_up_title_text_index]["block_label"],
|
1518
|
-
True,
|
1519
|
-
)
|
1520
|
-
get_sub_category_(
|
1521
|
-
right_down_title_text_direction,
|
1522
|
-
right_down_title_text_index,
|
1523
|
-
blocks[right_down_title_text_index]["block_label"],
|
1524
|
-
False,
|
1525
|
-
)
|
1526
|
-
|
1527
|
-
if block1["block_label"] in title_labels:
|
1528
|
-
if blocks[i].get("title_text") == []:
|
1529
|
-
blocks[i]["title_text"] = title_text
|
1530
|
-
|
1531
|
-
if block1["block_label"] in sub_title_labels:
|
1532
|
-
if blocks[i].get("sub_title") == []:
|
1533
|
-
blocks[i]["sub_title"] = sub_title
|
1534
|
-
|
1535
|
-
if block1["block_label"] in vision_labels:
|
1536
|
-
if blocks[i].get("vision_footnote") == []:
|
1537
|
-
blocks[i]["vision_footnote"] = vision_footnote
|
1538
|
-
|
1539
|
-
return blocks, pre_cuts
|
1540
|
-
|
1541
|
-
|
1542
|
-
def get_layout_ordering(
|
1543
|
-
parsing_res_list: List[Dict[str, Any]],
|
1544
|
-
no_mask_labels: List[str] = [],
|
1545
|
-
) -> None:
|
1546
|
-
"""
|
1547
|
-
Process layout parsing results to remove overlapping bounding boxes
|
1548
|
-
and assign an ordering index based on their positions.
|
1549
|
-
|
1550
|
-
Modifies:
|
1551
|
-
The 'parsing_res_list' list by adding an 'index' to each block.
|
1552
|
-
|
1553
|
-
Args:
|
1554
|
-
parsing_res_list (List[Dict[str, Any]]): List of block dictionaries with 'block_bbox' and 'block_label'.
|
1555
|
-
no_mask_labels (List[str]): Labels for which overlapping removal is not performed.
|
1556
|
-
"""
|
1557
|
-
title_text_labels = ["doc_title"]
|
1558
|
-
title_labels = ["doc_title", "paragraph_title"]
|
1559
|
-
vision_labels = ["image", "table", "seal", "chart", "figure"]
|
1560
|
-
vision_title_labels = ["table_title", "chart_title", "figure_title"]
|
1561
|
-
|
1562
|
-
parsing_res_list, pre_cuts = _get_sub_category(parsing_res_list, title_text_labels)
|
1563
|
-
|
1564
|
-
parsing_res_by_pre_cuts_list = []
|
1565
|
-
if len(pre_cuts) > 0:
|
1566
|
-
block_bboxes = [block["block_bbox"] for block in parsing_res_list]
|
1567
|
-
for axis, cuts in pre_cuts.items():
|
1568
|
-
axis_index = 1 if axis == "y" else 0
|
1569
|
-
|
1570
|
-
max_val = max(bbox[axis_index + 2] for bbox in block_bboxes)
|
1571
|
-
|
1572
|
-
intervals = []
|
1573
|
-
prev = 0
|
1574
|
-
for cut in sorted(cuts):
|
1575
|
-
intervals.append((prev, cut))
|
1576
|
-
prev = cut
|
1577
|
-
intervals.append((prev, max_val))
|
1578
|
-
|
1579
|
-
for start, end in intervals:
|
1580
|
-
mask = [
|
1581
|
-
(bbox[axis_index] >= start) and (bbox[axis_index] < end)
|
1582
|
-
for bbox in block_bboxes
|
1583
|
-
]
|
1584
|
-
parsing_res_by_pre_cuts_list.append(
|
1585
|
-
[parsing_res_list[i] for i, m in enumerate(mask) if m]
|
1586
|
-
)
|
1587
|
-
else:
|
1588
|
-
parsing_res_by_pre_cuts_list = [parsing_res_list]
|
1589
|
-
|
1590
|
-
final_parsing_res_list = []
|
1591
|
-
num_index = 0
|
1592
|
-
num_sub_index = 0
|
1593
|
-
for parsing_res_by_pre_cuts in parsing_res_by_pre_cuts_list:
|
1594
|
-
|
1595
|
-
doc_flag = False
|
1596
|
-
median_width = _get_text_median_width(parsing_res_by_pre_cuts)
|
1597
|
-
parsing_res_by_pre_cuts, projection_direction = _get_layout_property(
|
1598
|
-
parsing_res_by_pre_cuts,
|
1599
|
-
median_width,
|
1600
|
-
no_mask_labels=no_mask_labels,
|
1601
|
-
threshold=0.3,
|
1602
|
-
)
|
1603
|
-
# Convert bounding boxes to float and remove overlaps
|
1604
|
-
(
|
1605
|
-
double_text_blocks,
|
1606
|
-
title_text_blocks,
|
1607
|
-
title_blocks,
|
1608
|
-
vision_blocks,
|
1609
|
-
vision_title_blocks,
|
1610
|
-
vision_footnote_blocks,
|
1611
|
-
other_blocks,
|
1612
|
-
) = ([], [], [], [], [], [], [])
|
1613
|
-
|
1614
|
-
drop_indexes = []
|
1615
|
-
|
1616
|
-
for index, block in enumerate(parsing_res_by_pre_cuts):
|
1617
|
-
label = block["sub_label"]
|
1618
|
-
block["block_bbox"] = list(map(int, block["block_bbox"]))
|
1619
|
-
|
1620
|
-
if label == "doc_title":
|
1621
|
-
doc_flag = True
|
1622
|
-
|
1623
|
-
if label in no_mask_labels:
|
1624
|
-
if block["layout"] == "double":
|
1625
|
-
double_text_blocks.append(block)
|
1626
|
-
drop_indexes.append(index)
|
1627
|
-
elif label == "title_text":
|
1628
|
-
title_text_blocks.append(block)
|
1629
|
-
drop_indexes.append(index)
|
1630
|
-
elif label == "vision_footnote":
|
1631
|
-
vision_footnote_blocks.append(block)
|
1632
|
-
drop_indexes.append(index)
|
1633
|
-
elif label in vision_title_labels:
|
1634
|
-
vision_title_blocks.append(block)
|
1635
|
-
drop_indexes.append(index)
|
1636
|
-
elif label in title_labels:
|
1637
|
-
title_blocks.append(block)
|
1638
|
-
drop_indexes.append(index)
|
1639
|
-
elif label in vision_labels:
|
1640
|
-
vision_blocks.append(block)
|
1641
|
-
drop_indexes.append(index)
|
1642
|
-
else:
|
1643
|
-
other_blocks.append(block)
|
1644
|
-
drop_indexes.append(index)
|
1645
|
-
|
1646
|
-
for index in sorted(drop_indexes, reverse=True):
|
1647
|
-
del parsing_res_by_pre_cuts[index]
|
1648
|
-
|
1649
|
-
if len(parsing_res_by_pre_cuts) > 0:
|
1650
|
-
# single text label
|
1651
|
-
if (
|
1652
|
-
len(double_text_blocks) > len(parsing_res_by_pre_cuts)
|
1653
|
-
or projection_direction
|
814
|
+
iner_block_idxes.append(block_idx)
|
815
|
+
elif overlap_ratio > REGION_SETTINGS.get(
|
816
|
+
"split_block_overlap_ratio_threshold", 0.4
|
1654
817
|
):
|
1655
|
-
|
1656
|
-
|
1657
|
-
|
1658
|
-
|
1659
|
-
|
1660
|
-
|
1661
|
-
|
1662
|
-
|
1663
|
-
|
1664
|
-
|
1665
|
-
|
1666
|
-
|
1667
|
-
|
1668
|
-
|
1669
|
-
|
1670
|
-
|
1671
|
-
|
1672
|
-
|
1673
|
-
|
1674
|
-
|
1675
|
-
|
1676
|
-
|
1677
|
-
|
1678
|
-
|
1679
|
-
|
1680
|
-
block["index"] = num_index + sorted_boxes.index(block["block_bbox"]) + 1
|
1681
|
-
block["sub_index"] = (
|
1682
|
-
num_sub_index + sorted_boxes.index(block["block_bbox"]) + 1
|
1683
|
-
)
|
1684
|
-
|
1685
|
-
def nearest_match_(input_blocks, distance_type="manhattan", is_add_index=True):
|
1686
|
-
for block in input_blocks:
|
1687
|
-
bbox = block["block_bbox"]
|
1688
|
-
min_distance = float("inf")
|
1689
|
-
min_distance_config = [
|
1690
|
-
[float("inf"), float("inf")],
|
1691
|
-
float("inf"),
|
1692
|
-
float("inf"),
|
1693
|
-
] # for double text
|
1694
|
-
nearest_gt_index = 0
|
1695
|
-
for match_block in parsing_res_by_pre_cuts:
|
1696
|
-
match_bbox = match_block["block_bbox"]
|
1697
|
-
if distance_type == "nearest_iou_edge_distance":
|
1698
|
-
distance, min_distance_config = _nearest_iou_edge_distance(
|
1699
|
-
bbox,
|
1700
|
-
match_bbox,
|
1701
|
-
block["sub_label"],
|
1702
|
-
vision_labels=vision_labels,
|
1703
|
-
no_mask_labels=no_mask_labels,
|
1704
|
-
median_width=median_width,
|
1705
|
-
title_labels=title_labels,
|
1706
|
-
title_text=block["title_text"],
|
1707
|
-
sub_title=block["sub_title"],
|
1708
|
-
min_distance_config=min_distance_config,
|
1709
|
-
tolerance_len=10,
|
1710
|
-
)
|
1711
|
-
elif distance_type == "title_text":
|
1712
|
-
if (
|
1713
|
-
match_block["block_label"] in title_labels + ["abstract"]
|
1714
|
-
and match_block["title_text"] != []
|
1715
|
-
):
|
1716
|
-
iou_left_up = _calculate_overlap_area_div_minbox_area_ratio(
|
1717
|
-
bbox,
|
1718
|
-
match_block["title_text"][0][1],
|
1719
|
-
)
|
1720
|
-
iou_right_down = (
|
1721
|
-
_calculate_overlap_area_div_minbox_area_ratio(
|
1722
|
-
bbox,
|
1723
|
-
match_block["title_text"][-1][1],
|
1724
|
-
)
|
1725
|
-
)
|
1726
|
-
iou = 1 - max(iou_left_up, iou_right_down)
|
1727
|
-
distance = _manhattan_distance(bbox, match_bbox) * iou
|
1728
|
-
else:
|
1729
|
-
distance = float("inf")
|
1730
|
-
elif distance_type == "manhattan":
|
1731
|
-
distance = _manhattan_distance(bbox, match_bbox)
|
1732
|
-
elif distance_type == "vision_footnote":
|
1733
|
-
if (
|
1734
|
-
match_block["block_label"] in vision_labels
|
1735
|
-
and match_block["vision_footnote"] != []
|
1736
|
-
):
|
1737
|
-
iou_left_up = _calculate_overlap_area_div_minbox_area_ratio(
|
1738
|
-
bbox,
|
1739
|
-
match_block["vision_footnote"][0],
|
1740
|
-
)
|
1741
|
-
iou_right_down = (
|
1742
|
-
_calculate_overlap_area_div_minbox_area_ratio(
|
1743
|
-
bbox,
|
1744
|
-
match_block["vision_footnote"][-1],
|
1745
|
-
)
|
1746
|
-
)
|
1747
|
-
iou = 1 - max(iou_left_up, iou_right_down)
|
1748
|
-
distance = _manhattan_distance(bbox, match_bbox) * iou
|
1749
|
-
else:
|
1750
|
-
distance = float("inf")
|
1751
|
-
elif distance_type == "vision_body":
|
1752
|
-
if (
|
1753
|
-
match_block["block_label"] in vision_title_labels
|
1754
|
-
and block["vision_footnote"] != []
|
1755
|
-
):
|
1756
|
-
iou_left_up = _calculate_overlap_area_div_minbox_area_ratio(
|
1757
|
-
match_bbox,
|
1758
|
-
block["vision_footnote"][0],
|
1759
|
-
)
|
1760
|
-
iou_right_down = (
|
1761
|
-
_calculate_overlap_area_div_minbox_area_ratio(
|
1762
|
-
match_bbox,
|
1763
|
-
block["vision_footnote"][-1],
|
1764
|
-
)
|
1765
|
-
)
|
1766
|
-
iou = 1 - max(iou_left_up, iou_right_down)
|
1767
|
-
distance = _manhattan_distance(bbox, match_bbox) * iou
|
1768
|
-
else:
|
1769
|
-
distance = float("inf")
|
1770
|
-
# when reference block cross mulitple columns, its order should be after the blocks above it.
|
1771
|
-
elif distance_type == "append":
|
1772
|
-
if match_bbox[3] <= bbox[1]:
|
1773
|
-
distance = -(match_bbox[2] * 10 + match_bbox[3])
|
1774
|
-
else:
|
1775
|
-
distance = float("inf")
|
1776
|
-
else:
|
1777
|
-
raise NotImplementedError
|
1778
|
-
|
1779
|
-
if distance < min_distance:
|
1780
|
-
min_distance = distance
|
1781
|
-
if is_add_index:
|
1782
|
-
nearest_gt_index = match_block.get("index", 999)
|
1783
|
-
else:
|
1784
|
-
nearest_gt_index = match_block.get("sub_index", 999)
|
1785
|
-
|
1786
|
-
if is_add_index:
|
1787
|
-
block["index"] = nearest_gt_index
|
1788
|
-
else:
|
1789
|
-
block["sub_index"] = nearest_gt_index
|
1790
|
-
|
1791
|
-
parsing_res_by_pre_cuts.append(block)
|
1792
|
-
|
1793
|
-
# double text label
|
1794
|
-
double_text_blocks.sort(
|
1795
|
-
key=lambda x: (
|
1796
|
-
x["block_bbox"][1] // 10,
|
1797
|
-
x["block_bbox"][0] // median_width,
|
1798
|
-
x["block_bbox"][1] ** 2 + x["block_bbox"][0] ** 2,
|
1799
|
-
),
|
1800
|
-
)
|
1801
|
-
# filter the reference blocks from all blocks that cross mulitple columns.
|
1802
|
-
# they should be ordered using "append".
|
1803
|
-
double_text_reference_blocks = []
|
1804
|
-
i = 0
|
1805
|
-
while i < len(double_text_blocks):
|
1806
|
-
if double_text_blocks[i]["block_label"] == "reference":
|
1807
|
-
double_text_reference_blocks.append(double_text_blocks.pop(i))
|
1808
|
-
else:
|
1809
|
-
i += 1
|
1810
|
-
nearest_match_(
|
1811
|
-
double_text_blocks,
|
1812
|
-
distance_type="nearest_iou_edge_distance",
|
1813
|
-
)
|
1814
|
-
nearest_match_(
|
1815
|
-
double_text_reference_blocks,
|
1816
|
-
distance_type="append",
|
1817
|
-
)
|
1818
|
-
parsing_res_by_pre_cuts.sort(
|
1819
|
-
key=lambda x: (x["index"], x["block_bbox"][1], x["block_bbox"][0]),
|
1820
|
-
)
|
1821
|
-
|
1822
|
-
for idx, block in enumerate(parsing_res_by_pre_cuts):
|
1823
|
-
block["index"] = num_index + idx + 1
|
1824
|
-
block["sub_index"] = num_sub_index + idx + 1
|
1825
|
-
|
1826
|
-
# title label
|
1827
|
-
title_blocks.sort(
|
1828
|
-
key=lambda x: (
|
1829
|
-
x["block_bbox"][1] // 10,
|
1830
|
-
x["block_bbox"][0] // median_width,
|
1831
|
-
x["block_bbox"][1] ** 2 + x["block_bbox"][0] ** 2,
|
1832
|
-
),
|
1833
|
-
)
|
1834
|
-
nearest_match_(title_blocks, distance_type="nearest_iou_edge_distance")
|
1835
|
-
|
1836
|
-
if doc_flag:
|
1837
|
-
text_sort_labels = ["doc_title"]
|
1838
|
-
text_label_priority = {
|
1839
|
-
label: priority for priority, label in enumerate(text_sort_labels)
|
1840
|
-
}
|
1841
|
-
doc_titles = []
|
1842
|
-
for i, block in enumerate(parsing_res_by_pre_cuts):
|
1843
|
-
if block["block_label"] == "doc_title":
|
1844
|
-
doc_titles.append(
|
1845
|
-
(i, block["block_bbox"][1], block["block_bbox"][0]),
|
818
|
+
split_block_idxes.append(block_idx)
|
819
|
+
|
820
|
+
if len(iner_block_idxes) > 0:
|
821
|
+
if len(split_block_idxes) > 0:
|
822
|
+
for split_block_idx in split_block_idxes:
|
823
|
+
split_block_bbox = block_bboxes[split_block_idx]
|
824
|
+
x1, y1, x2, y2 = tmp_region_bbox
|
825
|
+
x1_prime, y1_prime, x2_prime, y2_prime = split_block_bbox
|
826
|
+
edge_distance_list = [
|
827
|
+
(x1_prime - x1) / image_width,
|
828
|
+
(y1_prime - y1) / image_height,
|
829
|
+
(x2 - x2_prime) / image_width,
|
830
|
+
(y2 - y2_prime) / image_height,
|
831
|
+
]
|
832
|
+
max_distance = max(edge_distance_list)
|
833
|
+
src_index = edge_distance_list.index(max_distance)
|
834
|
+
dst_index = index_conversion_map[src_index]
|
835
|
+
tmp_region_bbox[dst_index] = split_block_bbox[src_index]
|
836
|
+
tmp_region_bbox, iner_idxes = shrink_supplement_region_bbox(
|
837
|
+
tmp_region_bbox,
|
838
|
+
ref_region_bbox,
|
839
|
+
image_width,
|
840
|
+
image_height,
|
841
|
+
iner_block_idxes,
|
842
|
+
block_bboxes,
|
1846
843
|
)
|
1847
|
-
|
1848
|
-
|
1849
|
-
|
1850
|
-
|
1851
|
-
|
1852
|
-
x["index"],
|
1853
|
-
text_label_priority.get(x["block_label"], 9999),
|
1854
|
-
x["block_bbox"][1],
|
1855
|
-
x["block_bbox"][0],
|
1856
|
-
),
|
1857
|
-
)
|
844
|
+
if len(iner_idxes) == 0:
|
845
|
+
continue
|
846
|
+
matched_bboxes = [block_bboxes[idx] for idx in iner_block_idxes]
|
847
|
+
supplement_region_bbox = calculate_minimum_enclosing_bbox(matched_bboxes)
|
848
|
+
break
|
1858
849
|
else:
|
1859
|
-
|
1860
|
-
|
1861
|
-
|
1862
|
-
|
1863
|
-
|
1864
|
-
|
1865
|
-
)
|
1866
|
-
|
1867
|
-
for idx, block in enumerate(parsing_res_by_pre_cuts):
|
1868
|
-
block["index"] = num_index + idx + 1
|
1869
|
-
block["sub_index"] = num_sub_index + idx + 1
|
1870
|
-
|
1871
|
-
# title-text label
|
1872
|
-
nearest_match_(title_text_blocks, distance_type="title_text")
|
1873
|
-
|
1874
|
-
def hor_tb_and_ver_lr(x):
|
1875
|
-
input_bbox = x["block_bbox"]
|
1876
|
-
is_horizontal = _get_bbox_direction(input_bbox)
|
1877
|
-
if is_horizontal:
|
1878
|
-
return input_bbox[1]
|
1879
|
-
else:
|
1880
|
-
return input_bbox[0]
|
1881
|
-
|
1882
|
-
parsing_res_by_pre_cuts.sort(
|
1883
|
-
key=lambda x: (x["index"], hor_tb_and_ver_lr(x)),
|
1884
|
-
)
|
1885
|
-
|
1886
|
-
for idx, block in enumerate(parsing_res_by_pre_cuts):
|
1887
|
-
block["index"] = num_index + idx + 1
|
1888
|
-
block["sub_index"] = num_sub_index + idx + 1
|
1889
|
-
|
1890
|
-
# image,figure,chart,seal label
|
1891
|
-
nearest_match_(
|
1892
|
-
vision_blocks,
|
1893
|
-
distance_type="nearest_iou_edge_distance",
|
1894
|
-
is_add_index=False,
|
1895
|
-
)
|
1896
|
-
parsing_res_by_pre_cuts.sort(
|
1897
|
-
key=lambda x: (
|
1898
|
-
x["sub_index"],
|
1899
|
-
x["block_bbox"][1],
|
1900
|
-
x["block_bbox"][0],
|
1901
|
-
),
|
1902
|
-
)
|
1903
|
-
|
1904
|
-
for idx, block in enumerate(parsing_res_by_pre_cuts):
|
1905
|
-
block["sub_index"] = num_sub_index + idx + 1
|
1906
|
-
|
1907
|
-
# image,figure,chart,seal title label
|
1908
|
-
nearest_match_(
|
1909
|
-
vision_title_blocks,
|
1910
|
-
distance_type="nearest_iou_edge_distance",
|
1911
|
-
is_add_index=False,
|
1912
|
-
)
|
1913
|
-
parsing_res_by_pre_cuts.sort(
|
1914
|
-
key=lambda x: (
|
1915
|
-
x["sub_index"],
|
1916
|
-
x["block_bbox"][1],
|
1917
|
-
x["block_bbox"][0],
|
1918
|
-
),
|
1919
|
-
)
|
1920
|
-
|
1921
|
-
for idx, block in enumerate(parsing_res_by_pre_cuts):
|
1922
|
-
block["sub_index"] = num_sub_index + idx + 1
|
1923
|
-
|
1924
|
-
# vision footnote label
|
1925
|
-
nearest_match_(
|
1926
|
-
vision_footnote_blocks,
|
1927
|
-
distance_type="vision_footnote",
|
1928
|
-
is_add_index=False,
|
1929
|
-
)
|
1930
|
-
text_label_priority = {"vision_footnote": 9999}
|
1931
|
-
parsing_res_by_pre_cuts.sort(
|
1932
|
-
key=lambda x: (
|
1933
|
-
x["sub_index"],
|
1934
|
-
text_label_priority.get(x["sub_label"], 0),
|
1935
|
-
x["block_bbox"][1],
|
1936
|
-
x["block_bbox"][0],
|
1937
|
-
),
|
1938
|
-
)
|
1939
|
-
|
1940
|
-
for idx, block in enumerate(parsing_res_by_pre_cuts):
|
1941
|
-
block["sub_index"] = num_sub_index + idx + 1
|
1942
|
-
|
1943
|
-
# header、footnote、header_image... label
|
1944
|
-
nearest_match_(other_blocks, distance_type="manhattan", is_add_index=False)
|
1945
|
-
|
1946
|
-
# add all parsing result
|
1947
|
-
final_parsing_res_list.extend(parsing_res_by_pre_cuts)
|
1948
|
-
|
1949
|
-
# update num index
|
1950
|
-
num_sub_index += len(parsing_res_by_pre_cuts)
|
1951
|
-
for parsing_res in parsing_res_by_pre_cuts:
|
1952
|
-
if parsing_res.get("index"):
|
1953
|
-
num_index += 1
|
1954
|
-
|
1955
|
-
parsing_res_list = [
|
1956
|
-
{
|
1957
|
-
"block_label": parsing_res["block_label"],
|
1958
|
-
"block_content": parsing_res["block_content"],
|
1959
|
-
"block_bbox": parsing_res["block_bbox"],
|
1960
|
-
"block_image": parsing_res.get("block_image", None),
|
1961
|
-
"sub_label": parsing_res["sub_label"],
|
1962
|
-
"sub_index": parsing_res["sub_index"],
|
1963
|
-
"index": parsing_res.get("index", None),
|
1964
|
-
"seg_start_coordinate": parsing_res.get(
|
1965
|
-
"seg_start_coordinate", float("inf")
|
1966
|
-
),
|
1967
|
-
"seg_end_coordinate": parsing_res.get("seg_end_coordinate", float("-inf")),
|
1968
|
-
"num_of_lines": parsing_res.get("num_of_lines", 1),
|
1969
|
-
}
|
1970
|
-
for parsing_res in final_parsing_res_list
|
1971
|
-
]
|
1972
|
-
|
1973
|
-
return parsing_res_list
|
1974
|
-
|
1975
|
-
|
1976
|
-
def _manhattan_distance(
|
1977
|
-
point1: Tuple[float, float],
|
1978
|
-
point2: Tuple[float, float],
|
1979
|
-
weight_x: float = 1.0,
|
1980
|
-
weight_y: float = 1.0,
|
1981
|
-
) -> float:
|
1982
|
-
"""
|
1983
|
-
Calculate the weighted Manhattan distance between two points.
|
1984
|
-
|
1985
|
-
Args:
|
1986
|
-
point1 (Tuple[float, float]): The first point as (x, y).
|
1987
|
-
point2 (Tuple[float, float]): The second point as (x, y).
|
1988
|
-
weight_x (float): The weight for the x-axis distance. Default is 1.0.
|
1989
|
-
weight_y (float): The weight for the y-axis distance. Default is 1.0.
|
1990
|
-
|
1991
|
-
Returns:
|
1992
|
-
float: The weighted Manhattan distance between the two points.
|
1993
|
-
"""
|
1994
|
-
return weight_x * abs(point1[0] - point2[0]) + weight_y * abs(point1[1] - point2[1])
|
1995
|
-
|
1996
|
-
|
1997
|
-
def _calculate_horizontal_distance(
|
1998
|
-
input_bbox: List[int],
|
1999
|
-
match_bbox: List[int],
|
2000
|
-
height: int,
|
2001
|
-
disperse: int,
|
2002
|
-
title_text: List[Tuple[int, List[int]]],
|
2003
|
-
) -> float:
|
2004
|
-
"""
|
2005
|
-
Calculate the horizontal distance between two bounding boxes, considering title text adjustments.
|
2006
|
-
|
2007
|
-
Args:
|
2008
|
-
input_bbox (List[int]): The bounding box coordinates [x1, y1, x2, y2] of the input object.
|
2009
|
-
match_bbox (List[int]): The bounding box coordinates [x1', y1', x2', y2'] of the object to match against.
|
2010
|
-
height (int): The height of the input bounding box used for normalization.
|
2011
|
-
disperse (int): The dispersion factor used to normalize the horizontal distance.
|
2012
|
-
title_text (List[Tuple[int, List[int]]]): A list of tuples containing title text information and their bounding box coordinates.
|
2013
|
-
Format: [(position_indicator, [x1, y1, x2, y2]), ...].
|
2014
|
-
|
2015
|
-
Returns:
|
2016
|
-
float: The calculated horizontal distance taking into account the title text adjustments.
|
2017
|
-
"""
|
2018
|
-
x1, y1, x2, y2 = input_bbox
|
2019
|
-
x1_prime, y1_prime, x2_prime, y2_prime = match_bbox
|
2020
|
-
|
2021
|
-
# Determine vertical distance adjustment based on title text
|
2022
|
-
if y2 < y1_prime:
|
2023
|
-
if title_text and title_text[-1][0] == 2:
|
2024
|
-
y2 += title_text[-1][1][3] - title_text[-1][1][1]
|
2025
|
-
vertical_adjustment = (y1_prime - y2) * 0.5
|
2026
|
-
else:
|
2027
|
-
if title_text and title_text[0][0] == 1:
|
2028
|
-
y1 -= title_text[0][1][3] - title_text[0][1][1]
|
2029
|
-
vertical_adjustment = y1 - y2_prime
|
2030
|
-
|
2031
|
-
# Calculate horizontal distance with adjustments
|
2032
|
-
horizontal_distance = (
|
2033
|
-
abs(x2_prime - x1) // disperse
|
2034
|
-
+ vertical_adjustment // height
|
2035
|
-
+ vertical_adjustment / 5000
|
2036
|
-
)
|
2037
|
-
|
2038
|
-
return horizontal_distance
|
2039
|
-
|
850
|
+
edge_distance_list_tmp = [
|
851
|
+
x for x in edge_distance_list_tmp if x != min_distance
|
852
|
+
]
|
853
|
+
min_distance = min(edge_distance_list_tmp)
|
854
|
+
src_index = index_conversion_map[edge_distance_list.index(min_distance)]
|
855
|
+
return supplement_region_bbox, iner_block_idxes
|
2040
856
|
|
2041
|
-
def _calculate_vertical_distance(
|
2042
|
-
input_bbox: List[int],
|
2043
|
-
match_bbox: List[int],
|
2044
|
-
width: int,
|
2045
|
-
disperse: int,
|
2046
|
-
title_text: List[Tuple[int, List[int]]],
|
2047
|
-
) -> float:
|
2048
|
-
"""
|
2049
|
-
Calculate the vertical distance between two bounding boxes, considering title text adjustments.
|
2050
857
|
|
2051
|
-
|
2052
|
-
|
2053
|
-
|
2054
|
-
width (int): The width of the input bounding box used for normalization.
|
2055
|
-
disperse (int): The dispersion factor used to normalize the vertical distance.
|
2056
|
-
title_text (List[Tuple[int, List[int]]]): A list of tuples containing title text information and their bounding box coordinates.
|
2057
|
-
Format: [(position_indicator, [x1, y1, x2, y2]), ...].
|
858
|
+
def update_region_box(bbox, region_box):
|
859
|
+
if region_box is None:
|
860
|
+
return bbox
|
2058
861
|
|
2059
|
-
|
2060
|
-
|
2061
|
-
"""
|
2062
|
-
x1, y1, x2, y2 = input_bbox
|
2063
|
-
x1_prime, y1_prime, x2_prime, y2_prime = match_bbox
|
2064
|
-
|
2065
|
-
# Determine horizontal distance adjustment based on title text
|
2066
|
-
if x1 > x2_prime:
|
2067
|
-
if title_text and title_text[0][0] == 3:
|
2068
|
-
x1 -= title_text[0][1][2] - title_text[0][1][0]
|
2069
|
-
horizontal_adjustment = (x1 - x2_prime) * 0.5
|
2070
|
-
else:
|
2071
|
-
if title_text and title_text[-1][0] == 4:
|
2072
|
-
x2 += title_text[-1][1][2] - title_text[-1][1][0]
|
2073
|
-
horizontal_adjustment = x1_prime - x2
|
2074
|
-
|
2075
|
-
# Calculate vertical distance with adjustments
|
2076
|
-
vertical_distance = (
|
2077
|
-
abs(y2_prime - y1) // disperse
|
2078
|
-
+ horizontal_adjustment // width
|
2079
|
-
+ horizontal_adjustment / 5000
|
2080
|
-
)
|
862
|
+
x1, y1, x2, y2 = bbox
|
863
|
+
x1_region, y1_region, x2_region, y2_region = region_box
|
2081
864
|
|
2082
|
-
|
865
|
+
x1_region = int(min(x1, x1_region))
|
866
|
+
y1_region = int(min(y1, y1_region))
|
867
|
+
x2_region = int(max(x2, x2_region))
|
868
|
+
y2_region = int(max(y2, y2_region))
|
2083
869
|
|
870
|
+
region_box = [x1_region, y1_region, x2_region, y2_region]
|
2084
871
|
|
2085
|
-
|
2086
|
-
input_bbox: List[int],
|
2087
|
-
match_bbox: List[int],
|
2088
|
-
weight: List[float] = [1.0, 1.0, 1.0, 1.0],
|
2089
|
-
label: str = "text",
|
2090
|
-
no_mask_labels: List[str] = [],
|
2091
|
-
min_edge_distance_config: List[float] = [],
|
2092
|
-
tolerance_len: float = 10.0,
|
2093
|
-
) -> Tuple[float, List[float]]:
|
2094
|
-
"""
|
2095
|
-
Calculate the nearest edge distance between two bounding boxes, considering directional weights.
|
872
|
+
return region_box
|
2096
873
|
|
2097
|
-
Args:
|
2098
|
-
input_bbox (list): The bounding box coordinates [x1, y1, x2, y2] of the input object.
|
2099
|
-
match_bbox (list): The bounding box coordinates [x1', y1', x2', y2'] of the object to match against.
|
2100
|
-
weight (list, optional): Directional weights for the edge distances [left, right, up, down]. Defaults to [1, 1, 1, 1].
|
2101
|
-
label (str, optional): The label/type of the object in the bounding box (e.g., 'text'). Defaults to 'text'.
|
2102
|
-
no_mask_labels (list, optional): Labels for which no masking is applied when calculating edge distances. Defaults to an empty list.
|
2103
|
-
min_edge_distance_config (list, optional): Configuration for minimum edge distances [min_edge_distance_x, min_edge_distance_y].
|
2104
|
-
Defaults to [float('inf'), float('inf')].
|
2105
|
-
tolerance_len (float, optional): The tolerance length for adjusting edge distances. Defaults to 10.
|
2106
874
|
|
2107
|
-
|
2108
|
-
|
2109
|
-
|
2110
|
-
|
2111
|
-
|
2112
|
-
|
2113
|
-
|
2114
|
-
|
2115
|
-
)
|
2116
|
-
if match_bbox_iou > 0 and label not in no_mask_labels:
|
2117
|
-
return 0, [0, 0]
|
2118
|
-
|
2119
|
-
if not min_edge_distance_config:
|
2120
|
-
min_edge_distance_config = [float("inf"), float("inf")]
|
2121
|
-
min_edge_distance_x, min_edge_distance_y = min_edge_distance_config
|
2122
|
-
|
2123
|
-
x1, y1, x2, y2 = input_bbox
|
2124
|
-
x1_prime, y1_prime, x2_prime, y2_prime = match_bbox
|
2125
|
-
|
2126
|
-
direction_num = 0
|
2127
|
-
distance_x = float("inf")
|
2128
|
-
distance_y = float("inf")
|
2129
|
-
distance = [float("inf")] * 4
|
2130
|
-
|
2131
|
-
# input_bbox is to the left of match_bbox
|
2132
|
-
if x2 < x1_prime:
|
2133
|
-
direction_num += 1
|
2134
|
-
distance[0] = x1_prime - x2
|
2135
|
-
if abs(distance[0] - min_edge_distance_x) <= tolerance_len:
|
2136
|
-
distance_x = min_edge_distance_x * weight[0]
|
2137
|
-
else:
|
2138
|
-
distance_x = distance[0] * weight[0]
|
2139
|
-
# input_bbox is to the right of match_bbox
|
2140
|
-
elif x1 > x2_prime:
|
2141
|
-
direction_num += 1
|
2142
|
-
distance[1] = x1 - x2_prime
|
2143
|
-
if abs(distance[1] - min_edge_distance_x) <= tolerance_len:
|
2144
|
-
distance_x = min_edge_distance_x * weight[1]
|
2145
|
-
else:
|
2146
|
-
distance_x = distance[1] * weight[1]
|
2147
|
-
elif match_bbox_iou > 0:
|
2148
|
-
distance[0] = 0
|
2149
|
-
distance_x = 0
|
2150
|
-
|
2151
|
-
# input_bbox is above match_bbox
|
2152
|
-
if y2 < y1_prime:
|
2153
|
-
direction_num += 1
|
2154
|
-
distance[2] = y1_prime - y2
|
2155
|
-
if abs(distance[2] - min_edge_distance_y) <= tolerance_len:
|
2156
|
-
distance_y = min_edge_distance_y * weight[2]
|
2157
|
-
else:
|
2158
|
-
distance_y = distance[2] * weight[2]
|
2159
|
-
if label in no_mask_labels:
|
2160
|
-
distance_y = max(0.1, distance_y) * 10 # for abstract
|
2161
|
-
# input_bbox is below match_bbox
|
2162
|
-
elif y1 > y2_prime:
|
2163
|
-
direction_num += 1
|
2164
|
-
distance[3] = y1 - y2_prime
|
2165
|
-
if abs(distance[3] - min_edge_distance_y) <= tolerance_len:
|
2166
|
-
distance_y = min_edge_distance_y * weight[3]
|
2167
|
-
else:
|
2168
|
-
distance_y = distance[3] * weight[3]
|
2169
|
-
elif match_bbox_iou > 0:
|
2170
|
-
distance[2] = 0
|
2171
|
-
distance_y = 0
|
2172
|
-
|
2173
|
-
if direction_num == 2:
|
2174
|
-
return (distance_x + distance_y), [
|
2175
|
-
min(distance[0], distance[1]),
|
2176
|
-
min(distance[2], distance[3]),
|
875
|
+
def convert_formula_res_to_ocr_format(formula_res_list: List, ocr_res: dict):
|
876
|
+
for formula_res in formula_res_list:
|
877
|
+
x_min, y_min, x_max, y_max = list(map(int, formula_res["dt_polys"]))
|
878
|
+
poly_points = [
|
879
|
+
(x_min, y_min),
|
880
|
+
(x_max, y_min),
|
881
|
+
(x_max, y_max),
|
882
|
+
(x_min, y_max),
|
2177
883
|
]
|
2178
|
-
|
2179
|
-
|
2180
|
-
|
2181
|
-
|
2182
|
-
|
2183
|
-
|
2184
|
-
|
2185
|
-
|
2186
|
-
"""Define weights based on the label and orientation."""
|
2187
|
-
if label == "doc_title":
|
2188
|
-
return (
|
2189
|
-
[1, 0.1, 0.1, 1] if horizontal else [0.2, 0.1, 1, 1]
|
2190
|
-
) # left-down , right-left
|
2191
|
-
elif label in [
|
2192
|
-
"paragraph_title",
|
2193
|
-
"table_title",
|
2194
|
-
"abstract",
|
2195
|
-
"image",
|
2196
|
-
"seal",
|
2197
|
-
"chart",
|
2198
|
-
"figure",
|
2199
|
-
]:
|
2200
|
-
return [1, 1, 0.1, 1] # down
|
2201
|
-
else:
|
2202
|
-
return [1, 1, 1, 0.1] # up
|
2203
|
-
|
2204
|
-
|
2205
|
-
def _nearest_iou_edge_distance(
|
2206
|
-
input_bbox: List[int],
|
2207
|
-
match_bbox: List[int],
|
2208
|
-
label: str,
|
2209
|
-
vision_labels: List[str],
|
2210
|
-
no_mask_labels: List[str],
|
2211
|
-
median_width: int = -1,
|
2212
|
-
title_labels: List[str] = [],
|
2213
|
-
title_text: List[Tuple[int, List[int]]] = [],
|
2214
|
-
sub_title: List[List[int]] = [],
|
2215
|
-
min_distance_config: List[float] = [],
|
2216
|
-
tolerance_len: float = 10.0,
|
2217
|
-
) -> Tuple[float, List[float]]:
|
2218
|
-
"""
|
2219
|
-
Calculate the nearest IOU edge distance between two bounding boxes, considering label types, title adjustments, and minimum distance configurations.
|
2220
|
-
This function computes the edge distance between two bounding boxes while considering their overlap (IOU) and various adjustments based on label types,
|
2221
|
-
title text, and subtitle information. It also applies minimum distance configurations and tolerance adjustments.
|
2222
|
-
|
2223
|
-
Args:
|
2224
|
-
input_bbox (List[int]): The bounding box coordinates [x1, y1, x2, y2] of the input object.
|
2225
|
-
match_bbox (List[int]): The bounding box coordinates [x1', y1', x2', y2'] of the object to match against.
|
2226
|
-
label (str): The label/type of the object in the bounding box (e.g., 'image', 'text', etc.).
|
2227
|
-
vision_labels (List[str]): List of labels for vision-related objects (e.g., images, icons).
|
2228
|
-
no_mask_labels (List[str]): Labels for which no masking is applied when calculating edge distances.
|
2229
|
-
median_width (int, optional): The median width for title dispersion calculation. Defaults to -1.
|
2230
|
-
title_labels (List[str], optional): Labels that indicate the object is a title. Defaults to an empty list.
|
2231
|
-
title_text (List[Tuple[int, List[int]]], optional): Text content associated with title labels, in the format [(position_indicator, [x1, y1, x2, y2]), ...].
|
2232
|
-
sub_title (List[List[int]], optional): List of subtitle bounding boxes to adjust the input_bbox. Defaults to an empty list.
|
2233
|
-
min_distance_config (List[float], optional): Configuration for minimum distances [min_edge_distance_config, up_edge_distances_config, total_distance].
|
2234
|
-
tolerance_len (float, optional): The tolerance length for adjusting edge distances. Defaults to 10.0.
|
2235
|
-
|
2236
|
-
Returns:
|
2237
|
-
Tuple[float, List[float]]: A tuple containing:
|
2238
|
-
- The calculated distance considering IOU and adjustments.
|
2239
|
-
- The updated minimum distance configuration.
|
2240
|
-
"""
|
2241
|
-
|
2242
|
-
x1, y1, x2, y2 = input_bbox
|
2243
|
-
x1_prime, y1_prime, x2_prime, y2_prime = match_bbox
|
2244
|
-
|
2245
|
-
min_edge_distance_config, up_edge_distances_config, total_distance = (
|
2246
|
-
min_distance_config
|
2247
|
-
)
|
2248
|
-
|
2249
|
-
iou_distance = 0
|
2250
|
-
|
2251
|
-
if label in vision_labels:
|
2252
|
-
horizontal1 = horizontal2 = True
|
2253
|
-
else:
|
2254
|
-
horizontal1 = _get_bbox_direction(input_bbox)
|
2255
|
-
horizontal2 = _get_bbox_direction(match_bbox, 3)
|
2256
|
-
|
2257
|
-
if (
|
2258
|
-
horizontal1 != horizontal2
|
2259
|
-
or _get_projection_iou(input_bbox, match_bbox, horizontal1) < 0.01
|
2260
|
-
):
|
2261
|
-
iou_distance = 1
|
2262
|
-
|
2263
|
-
if label == "doc_title":
|
2264
|
-
# Calculate distance for titles
|
2265
|
-
disperse = max(1, median_width)
|
2266
|
-
tolerance_len = max(tolerance_len, disperse)
|
2267
|
-
|
2268
|
-
# Adjust input_bbox based on sub_title
|
2269
|
-
if sub_title:
|
2270
|
-
for sub in sub_title:
|
2271
|
-
x1_, y1_, x2_, y2_ = sub
|
2272
|
-
x1, y1, x2, y2 = (
|
2273
|
-
min(x1, x1_),
|
2274
|
-
min(y1, y1_),
|
2275
|
-
min(x2, x2_),
|
2276
|
-
max(y2, y2_),
|
884
|
+
ocr_res["dt_polys"].append(poly_points)
|
885
|
+
formula_res_text: str = formula_res["rec_formula"]
|
886
|
+
ocr_res["rec_texts"].append(formula_res_text)
|
887
|
+
if ocr_res["rec_boxes"].size == 0:
|
888
|
+
ocr_res["rec_boxes"] = np.array(formula_res["dt_polys"])
|
889
|
+
else:
|
890
|
+
ocr_res["rec_boxes"] = np.vstack(
|
891
|
+
(ocr_res["rec_boxes"], [formula_res["dt_polys"]])
|
2277
892
|
)
|
2278
|
-
|
2279
|
-
|
2280
|
-
|
2281
|
-
|
2282
|
-
|
2283
|
-
|
2284
|
-
|
2285
|
-
|
2286
|
-
|
2287
|
-
|
2288
|
-
|
2289
|
-
|
2290
|
-
|
2291
|
-
|
2292
|
-
|
2293
|
-
|
2294
|
-
|
2295
|
-
|
2296
|
-
|
2297
|
-
|
2298
|
-
|
2299
|
-
|
2300
|
-
|
2301
|
-
|
2302
|
-
|
2303
|
-
|
2304
|
-
|
2305
|
-
|
2306
|
-
|
2307
|
-
|
2308
|
-
|
2309
|
-
|
2310
|
-
|
2311
|
-
|
2312
|
-
|
2313
|
-
|
2314
|
-
|
2315
|
-
|
2316
|
-
|
2317
|
-
|
2318
|
-
|
2319
|
-
|
2320
|
-
|
2321
|
-
|
2322
|
-
|
2323
|
-
|
2324
|
-
|
2325
|
-
|
2326
|
-
|
2327
|
-
|
2328
|
-
|
2329
|
-
|
2330
|
-
|
2331
|
-
|
2332
|
-
|
2333
|
-
|
2334
|
-
|
2335
|
-
+ left_edge_distance * iou_edge_weight[3]
|
2336
|
-
)
|
2337
|
-
|
2338
|
-
# Update minimum distance configuration if a smaller distance is found
|
2339
|
-
if total_distance > distance:
|
2340
|
-
edge_distance_config = [
|
2341
|
-
edge_distance_config[0],
|
2342
|
-
edge_distance_config[1],
|
2343
|
-
]
|
2344
|
-
min_distance_config = [
|
2345
|
-
edge_distance_config,
|
2346
|
-
up_edge_distance,
|
2347
|
-
distance,
|
2348
|
-
]
|
2349
|
-
|
2350
|
-
return distance, min_distance_config
|
2351
|
-
|
2352
|
-
|
2353
|
-
def get_show_color(label: str) -> Tuple:
|
2354
|
-
label_colors = {
|
2355
|
-
# Medium Blue (from 'titles_list')
|
2356
|
-
"paragraph_title": (102, 102, 255, 100),
|
2357
|
-
"doc_title": (255, 248, 220, 100), # Cornsilk
|
2358
|
-
# Light Yellow (from 'tables_caption_list')
|
2359
|
-
"table_title": (255, 255, 102, 100),
|
2360
|
-
# Sky Blue (from 'imgs_caption_list')
|
2361
|
-
"figure_title": (102, 178, 255, 100),
|
2362
|
-
"chart_title": (221, 160, 221, 100), # Plum
|
2363
|
-
"vision_footnote": (144, 238, 144, 100), # Light Green
|
2364
|
-
# Deep Purple (from 'texts_list')
|
2365
|
-
"text": (153, 0, 76, 100),
|
2366
|
-
# Bright Green (from 'interequations_list')
|
2367
|
-
"formula": (0, 255, 0, 100),
|
2368
|
-
"abstract": (255, 239, 213, 100), # Papaya Whip
|
2369
|
-
# Medium Green (from 'lists_list' and 'indexs_list')
|
2370
|
-
"content": (40, 169, 92, 100),
|
2371
|
-
# Neutral Gray (from 'dropped_bbox_list')
|
2372
|
-
"seal": (158, 158, 158, 100),
|
2373
|
-
# Olive Yellow (from 'tables_body_list')
|
2374
|
-
"table": (204, 204, 0, 100),
|
2375
|
-
# Bright Green (from 'imgs_body_list')
|
2376
|
-
"image": (153, 255, 51, 100),
|
2377
|
-
# Bright Green (from 'imgs_body_list')
|
2378
|
-
"figure": (153, 255, 51, 100),
|
2379
|
-
"chart": (216, 191, 216, 100), # Thistle
|
2380
|
-
# Pale Yellow-Green (from 'tables_footnote_list')
|
2381
|
-
"reference": (229, 255, 204, 100),
|
2382
|
-
"algorithm": (255, 250, 240, 100), # Floral White
|
2383
|
-
}
|
893
|
+
ocr_res["rec_labels"].append("formula")
|
894
|
+
ocr_res["rec_polys"].append(poly_points)
|
895
|
+
ocr_res["rec_scores"].append(1)
|
896
|
+
|
897
|
+
|
898
|
+
def caculate_bbox_area(bbox):
|
899
|
+
x1, y1, x2, y2 = map(float, bbox)
|
900
|
+
area = abs((x2 - x1) * (y2 - y1))
|
901
|
+
return area
|
902
|
+
|
903
|
+
|
904
|
+
def get_show_color(label: str, order_label=False) -> Tuple:
|
905
|
+
if order_label:
|
906
|
+
label_colors = {
|
907
|
+
"doc_title": (255, 248, 220, 100), # Cornsilk
|
908
|
+
"doc_title_text": (255, 239, 213, 100),
|
909
|
+
"paragraph_title": (102, 102, 255, 100),
|
910
|
+
"sub_paragraph_title": (102, 178, 255, 100),
|
911
|
+
"vision": (153, 255, 51, 100),
|
912
|
+
"vision_title": (144, 238, 144, 100), # Light Green
|
913
|
+
"vision_footnote": (144, 238, 144, 100), # Light Green
|
914
|
+
"normal_text": (153, 0, 76, 100),
|
915
|
+
"cross_layout": (53, 218, 207, 100), # Thistle
|
916
|
+
"cross_reference": (221, 160, 221, 100), # Floral White
|
917
|
+
}
|
918
|
+
else:
|
919
|
+
label_colors = {
|
920
|
+
# Medium Blue (from 'titles_list')
|
921
|
+
"paragraph_title": (102, 102, 255, 100),
|
922
|
+
"doc_title": (255, 248, 220, 100), # Cornsilk
|
923
|
+
# Light Yellow (from 'tables_caption_list')
|
924
|
+
"table_title": (255, 255, 102, 100),
|
925
|
+
# Sky Blue (from 'imgs_caption_list')
|
926
|
+
"figure_title": (102, 178, 255, 100),
|
927
|
+
"chart_title": (221, 160, 221, 100), # Plum
|
928
|
+
"vision_footnote": (144, 238, 144, 100), # Light Green
|
929
|
+
# Deep Purple (from 'texts_list')
|
930
|
+
"text": (153, 0, 76, 100),
|
931
|
+
# Bright Green (from 'interequations_list')
|
932
|
+
"formula": (0, 255, 0, 100),
|
933
|
+
"abstract": (255, 239, 213, 100), # Papaya Whip
|
934
|
+
# Medium Green (from 'lists_list' and 'indexs_list')
|
935
|
+
"content": (40, 169, 92, 100),
|
936
|
+
# Neutral Gray (from 'dropped_bbox_list')
|
937
|
+
"seal": (158, 158, 158, 100),
|
938
|
+
# Olive Yellow (from 'tables_body_list')
|
939
|
+
"table": (204, 204, 0, 100),
|
940
|
+
# Bright Green (from 'imgs_body_list')
|
941
|
+
"image": (153, 255, 51, 100),
|
942
|
+
# Bright Green (from 'imgs_body_list')
|
943
|
+
"figure": (153, 255, 51, 100),
|
944
|
+
"chart": (216, 191, 216, 100), # Thistle
|
945
|
+
# Pale Yellow-Green (from 'tables_footnote_list')
|
946
|
+
"reference": (229, 255, 204, 100),
|
947
|
+
# "reference_content": (229, 255, 204, 100),
|
948
|
+
"algorithm": (255, 250, 240, 100), # Floral White
|
949
|
+
}
|
2384
950
|
default_color = (158, 158, 158, 100)
|
2385
951
|
return label_colors.get(label, default_color)
|