paddlex 3.0.0rc0__py3-none-any.whl → 3.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- paddlex/.version +1 -1
- paddlex/__init__.py +17 -34
- paddlex/__main__.py +1 -1
- paddlex/configs/modules/chart_parsing/PP-Chart2Table.yaml +13 -0
- paddlex/configs/modules/doc_vlm/PP-DocBee-2B.yaml +14 -0
- paddlex/configs/modules/doc_vlm/PP-DocBee-7B.yaml +14 -0
- paddlex/configs/modules/doc_vlm/PP-DocBee2-3B.yaml +14 -0
- paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-L.yaml +40 -0
- paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-M.yaml +40 -0
- paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-S.yaml +40 -0
- paddlex/configs/modules/layout_detection/PP-DocBlockLayout.yaml +40 -0
- paddlex/configs/modules/layout_detection/PP-DocLayout-L.yaml +2 -2
- paddlex/configs/modules/layout_detection/PP-DocLayout-M.yaml +2 -2
- paddlex/configs/modules/layout_detection/PP-DocLayout-S.yaml +2 -2
- paddlex/configs/modules/layout_detection/PP-DocLayout_plus-L.yaml +40 -0
- paddlex/configs/modules/open_vocabulary_detection/YOLO-Worldv2-L.yaml +13 -0
- paddlex/configs/modules/text_detection/PP-OCRv5_mobile_det.yaml +40 -0
- paddlex/configs/modules/text_detection/PP-OCRv5_server_det.yaml +40 -0
- paddlex/configs/modules/text_recognition/PP-OCRv5_mobile_rec.yaml +39 -0
- paddlex/configs/modules/text_recognition/PP-OCRv5_server_rec.yaml +39 -0
- paddlex/configs/modules/textline_orientation/PP-LCNet_x1_0_textline_ori.yaml +41 -0
- paddlex/configs/pipelines/OCR.yaml +7 -6
- paddlex/configs/pipelines/PP-ChatOCRv3-doc.yaml +3 -1
- paddlex/configs/pipelines/PP-ChatOCRv4-doc.yaml +91 -34
- paddlex/configs/pipelines/PP-StructureV3.yaml +72 -72
- paddlex/configs/pipelines/anomaly_detection.yaml +1 -1
- paddlex/configs/pipelines/doc_understanding.yaml +9 -0
- paddlex/configs/pipelines/formula_recognition.yaml +2 -2
- paddlex/configs/pipelines/layout_parsing.yaml +3 -2
- paddlex/configs/pipelines/seal_recognition.yaml +1 -0
- paddlex/configs/pipelines/table_recognition.yaml +2 -1
- paddlex/configs/pipelines/table_recognition_v2.yaml +7 -1
- paddlex/configs/pipelines/ts_anomaly_detection.yaml +1 -1
- paddlex/configs/pipelines/ts_classification.yaml +1 -1
- paddlex/configs/pipelines/ts_forecast.yaml +1 -1
- paddlex/constants.py +17 -0
- paddlex/engine.py +7 -5
- paddlex/hpip_links.html +23 -11
- paddlex/inference/__init__.py +3 -3
- paddlex/inference/common/__init__.py +1 -1
- paddlex/inference/common/batch_sampler/__init__.py +5 -4
- paddlex/inference/common/batch_sampler/audio_batch_sampler.py +5 -6
- paddlex/inference/common/batch_sampler/base_batch_sampler.py +20 -16
- paddlex/inference/common/batch_sampler/det_3d_batch_sampler.py +4 -7
- paddlex/inference/common/batch_sampler/doc_vlm_batch_sampler.py +87 -0
- paddlex/inference/common/batch_sampler/image_batch_sampler.py +45 -60
- paddlex/inference/common/batch_sampler/ts_batch_sampler.py +9 -10
- paddlex/inference/common/batch_sampler/video_batch_sampler.py +2 -22
- paddlex/inference/common/reader/__init__.py +4 -4
- paddlex/inference/common/reader/audio_reader.py +3 -3
- paddlex/inference/common/reader/det_3d_reader.py +7 -5
- paddlex/inference/common/reader/image_reader.py +16 -12
- paddlex/inference/common/reader/ts_reader.py +3 -2
- paddlex/inference/common/reader/video_reader.py +3 -3
- paddlex/inference/common/result/__init__.py +7 -7
- paddlex/inference/common/result/base_cv_result.py +12 -2
- paddlex/inference/common/result/base_result.py +7 -5
- paddlex/inference/common/result/base_ts_result.py +1 -2
- paddlex/inference/common/result/base_video_result.py +2 -2
- paddlex/inference/common/result/mixin.py +31 -25
- paddlex/inference/models/__init__.py +41 -85
- paddlex/inference/models/anomaly_detection/__init__.py +1 -1
- paddlex/inference/models/anomaly_detection/predictor.py +9 -19
- paddlex/inference/models/anomaly_detection/processors.py +9 -2
- paddlex/inference/models/anomaly_detection/result.py +3 -2
- paddlex/inference/models/base/__init__.py +2 -2
- paddlex/inference/models/base/predictor/__init__.py +1 -2
- paddlex/inference/models/base/predictor/base_predictor.py +278 -39
- paddlex/inference/models/common/__init__.py +6 -15
- paddlex/inference/models/common/static_infer.py +724 -251
- paddlex/inference/models/common/tokenizer/__init__.py +7 -3
- paddlex/inference/models/common/tokenizer/bert_tokenizer.py +1 -1
- paddlex/inference/models/common/tokenizer/clip_tokenizer.py +609 -0
- paddlex/inference/models/common/tokenizer/gpt_tokenizer.py +9 -7
- paddlex/inference/models/common/tokenizer/qwen2_5_tokenizer.py +112 -0
- paddlex/inference/models/common/tokenizer/qwen2_tokenizer.py +438 -0
- paddlex/inference/models/common/tokenizer/qwen_tokenizer.py +288 -0
- paddlex/inference/models/common/tokenizer/tokenizer_utils.py +85 -77
- paddlex/inference/models/common/tokenizer/tokenizer_utils_base.py +339 -123
- paddlex/inference/models/common/tokenizer/utils.py +1 -1
- paddlex/inference/models/common/tokenizer/vocab.py +8 -8
- paddlex/inference/models/common/ts/__init__.py +1 -1
- paddlex/inference/models/common/ts/funcs.py +13 -6
- paddlex/inference/models/common/ts/processors.py +14 -5
- paddlex/inference/models/common/vision/__init__.py +3 -3
- paddlex/inference/models/common/vision/funcs.py +17 -12
- paddlex/inference/models/common/vision/processors.py +61 -46
- paddlex/inference/models/common/vlm/__init__.py +13 -0
- paddlex/inference/models/common/vlm/activations.py +189 -0
- paddlex/inference/models/common/vlm/bert_padding.py +127 -0
- paddlex/inference/models/common/vlm/conversion_utils.py +99 -0
- paddlex/inference/models/common/vlm/distributed.py +229 -0
- paddlex/inference/models/common/vlm/flash_attn_utils.py +119 -0
- paddlex/inference/models/common/vlm/fusion_ops.py +205 -0
- paddlex/inference/models/common/vlm/generation/__init__.py +34 -0
- paddlex/inference/models/common/vlm/generation/configuration_utils.py +533 -0
- paddlex/inference/models/common/vlm/generation/logits_process.py +730 -0
- paddlex/inference/models/common/vlm/generation/stopping_criteria.py +106 -0
- paddlex/inference/models/common/vlm/generation/utils.py +2162 -0
- paddlex/inference/models/common/vlm/transformers/__init__.py +16 -0
- paddlex/inference/models/common/vlm/transformers/configuration_utils.py +1037 -0
- paddlex/inference/models/common/vlm/transformers/conversion_utils.py +408 -0
- paddlex/inference/models/common/vlm/transformers/model_outputs.py +1612 -0
- paddlex/inference/models/common/vlm/transformers/model_utils.py +2014 -0
- paddlex/inference/models/common/vlm/transformers/utils.py +178 -0
- paddlex/inference/models/common/vlm/utils.py +109 -0
- paddlex/inference/models/doc_vlm/__init__.py +15 -0
- paddlex/inference/models/doc_vlm/modeling/GOT_ocr_2_0.py +830 -0
- paddlex/inference/models/doc_vlm/modeling/__init__.py +17 -0
- paddlex/inference/models/doc_vlm/modeling/qwen2.py +1606 -0
- paddlex/inference/models/doc_vlm/modeling/qwen2_5_vl.py +3006 -0
- paddlex/inference/models/doc_vlm/modeling/qwen2_vl.py +2495 -0
- paddlex/inference/models/doc_vlm/predictor.py +253 -0
- paddlex/inference/models/doc_vlm/processors/GOT_ocr_2_0.py +97 -0
- paddlex/inference/models/doc_vlm/processors/__init__.py +17 -0
- paddlex/inference/models/doc_vlm/processors/common.py +561 -0
- paddlex/inference/models/doc_vlm/processors/qwen2_5_vl.py +548 -0
- paddlex/inference/models/doc_vlm/processors/qwen2_vl.py +543 -0
- paddlex/inference/models/doc_vlm/result.py +21 -0
- paddlex/inference/models/face_feature/__init__.py +1 -1
- paddlex/inference/models/face_feature/predictor.py +2 -1
- paddlex/inference/models/formula_recognition/__init__.py +1 -1
- paddlex/inference/models/formula_recognition/predictor.py +18 -28
- paddlex/inference/models/formula_recognition/processors.py +126 -97
- paddlex/inference/models/formula_recognition/result.py +43 -35
- paddlex/inference/models/image_classification/__init__.py +1 -1
- paddlex/inference/models/image_classification/predictor.py +9 -19
- paddlex/inference/models/image_classification/processors.py +4 -2
- paddlex/inference/models/image_classification/result.py +4 -3
- paddlex/inference/models/image_feature/__init__.py +1 -1
- paddlex/inference/models/image_feature/predictor.py +9 -19
- paddlex/inference/models/image_feature/processors.py +7 -5
- paddlex/inference/models/image_feature/result.py +2 -3
- paddlex/inference/models/image_multilabel_classification/__init__.py +1 -1
- paddlex/inference/models/image_multilabel_classification/predictor.py +7 -6
- paddlex/inference/models/image_multilabel_classification/processors.py +6 -2
- paddlex/inference/models/image_multilabel_classification/result.py +4 -3
- paddlex/inference/models/image_unwarping/__init__.py +1 -1
- paddlex/inference/models/image_unwarping/predictor.py +8 -16
- paddlex/inference/models/image_unwarping/processors.py +6 -2
- paddlex/inference/models/image_unwarping/result.py +4 -2
- paddlex/inference/models/instance_segmentation/__init__.py +1 -1
- paddlex/inference/models/instance_segmentation/predictor.py +7 -15
- paddlex/inference/models/instance_segmentation/processors.py +4 -7
- paddlex/inference/models/instance_segmentation/result.py +11 -10
- paddlex/inference/models/keypoint_detection/__init__.py +1 -1
- paddlex/inference/models/keypoint_detection/predictor.py +5 -3
- paddlex/inference/models/keypoint_detection/processors.py +11 -3
- paddlex/inference/models/keypoint_detection/result.py +9 -4
- paddlex/inference/models/{3d_bev_detection → m_3d_bev_detection}/__init__.py +1 -1
- paddlex/inference/models/{3d_bev_detection → m_3d_bev_detection}/predictor.py +15 -26
- paddlex/inference/models/{3d_bev_detection → m_3d_bev_detection}/processors.py +26 -14
- paddlex/inference/models/{3d_bev_detection → m_3d_bev_detection}/result.py +15 -12
- paddlex/inference/models/{3d_bev_detection → m_3d_bev_detection}/visualizer_3d.py +77 -39
- paddlex/inference/models/multilingual_speech_recognition/__init__.py +1 -1
- paddlex/inference/models/multilingual_speech_recognition/predictor.py +11 -15
- paddlex/inference/models/multilingual_speech_recognition/processors.py +45 -53
- paddlex/inference/models/multilingual_speech_recognition/result.py +1 -1
- paddlex/inference/models/object_detection/__init__.py +1 -1
- paddlex/inference/models/object_detection/predictor.py +8 -12
- paddlex/inference/models/object_detection/processors.py +63 -33
- paddlex/inference/models/object_detection/result.py +5 -4
- paddlex/inference/models/object_detection/utils.py +3 -1
- paddlex/inference/models/open_vocabulary_detection/__init__.py +1 -1
- paddlex/inference/models/open_vocabulary_detection/predictor.py +31 -14
- paddlex/inference/models/open_vocabulary_detection/processors/__init__.py +3 -2
- paddlex/inference/models/open_vocabulary_detection/processors/common.py +114 -0
- paddlex/inference/models/open_vocabulary_detection/processors/groundingdino_processors.py +19 -8
- paddlex/inference/models/open_vocabulary_detection/processors/yoloworld_processors.py +209 -0
- paddlex/inference/models/open_vocabulary_segmentation/__init__.py +1 -1
- paddlex/inference/models/open_vocabulary_segmentation/predictor.py +6 -13
- paddlex/inference/models/open_vocabulary_segmentation/processors/__init__.py +1 -1
- paddlex/inference/models/open_vocabulary_segmentation/processors/sam_processer.py +12 -12
- paddlex/inference/models/open_vocabulary_segmentation/results/__init__.py +1 -1
- paddlex/inference/models/open_vocabulary_segmentation/results/sam_result.py +11 -9
- paddlex/inference/models/semantic_segmentation/__init__.py +1 -1
- paddlex/inference/models/semantic_segmentation/predictor.py +9 -18
- paddlex/inference/models/semantic_segmentation/processors.py +11 -8
- paddlex/inference/models/semantic_segmentation/result.py +4 -3
- paddlex/inference/models/table_structure_recognition/__init__.py +1 -1
- paddlex/inference/models/table_structure_recognition/predictor.py +8 -18
- paddlex/inference/models/table_structure_recognition/processors.py +23 -29
- paddlex/inference/models/table_structure_recognition/result.py +8 -15
- paddlex/inference/models/text_detection/__init__.py +1 -1
- paddlex/inference/models/text_detection/predictor.py +24 -24
- paddlex/inference/models/text_detection/processors.py +116 -44
- paddlex/inference/models/text_detection/result.py +8 -13
- paddlex/inference/models/text_recognition/__init__.py +1 -1
- paddlex/inference/models/text_recognition/predictor.py +11 -19
- paddlex/inference/models/text_recognition/processors.py +27 -13
- paddlex/inference/models/text_recognition/result.py +3 -2
- paddlex/inference/models/ts_anomaly_detection/__init__.py +1 -1
- paddlex/inference/models/ts_anomaly_detection/predictor.py +12 -17
- paddlex/inference/models/ts_anomaly_detection/processors.py +6 -2
- paddlex/inference/models/ts_anomaly_detection/result.py +21 -10
- paddlex/inference/models/ts_classification/__init__.py +1 -1
- paddlex/inference/models/ts_classification/predictor.py +14 -27
- paddlex/inference/models/ts_classification/processors.py +7 -2
- paddlex/inference/models/ts_classification/result.py +21 -12
- paddlex/inference/models/ts_forecasting/__init__.py +1 -1
- paddlex/inference/models/ts_forecasting/predictor.py +13 -18
- paddlex/inference/models/ts_forecasting/processors.py +12 -3
- paddlex/inference/models/ts_forecasting/result.py +24 -11
- paddlex/inference/models/video_classification/__init__.py +1 -1
- paddlex/inference/models/video_classification/predictor.py +9 -15
- paddlex/inference/models/video_classification/processors.py +24 -24
- paddlex/inference/models/video_classification/result.py +7 -3
- paddlex/inference/models/video_detection/__init__.py +1 -1
- paddlex/inference/models/video_detection/predictor.py +8 -15
- paddlex/inference/models/video_detection/processors.py +24 -11
- paddlex/inference/models/video_detection/result.py +10 -5
- paddlex/inference/pipelines/__init__.py +48 -37
- paddlex/inference/pipelines/_parallel.py +172 -0
- paddlex/inference/pipelines/anomaly_detection/__init__.py +1 -1
- paddlex/inference/pipelines/anomaly_detection/pipeline.py +29 -9
- paddlex/inference/pipelines/attribute_recognition/__init__.py +1 -1
- paddlex/inference/pipelines/attribute_recognition/pipeline.py +24 -9
- paddlex/inference/pipelines/attribute_recognition/result.py +10 -8
- paddlex/inference/pipelines/base.py +43 -13
- paddlex/inference/pipelines/components/__init__.py +14 -8
- paddlex/inference/pipelines/components/chat_server/__init__.py +1 -1
- paddlex/inference/pipelines/components/chat_server/base.py +2 -2
- paddlex/inference/pipelines/components/chat_server/openai_bot_chat.py +8 -8
- paddlex/inference/pipelines/components/common/__init__.py +5 -4
- paddlex/inference/pipelines/components/common/base_operator.py +2 -1
- paddlex/inference/pipelines/components/common/base_result.py +3 -2
- paddlex/inference/pipelines/components/common/convert_points_and_boxes.py +1 -2
- paddlex/inference/pipelines/components/common/crop_image_regions.py +11 -5
- paddlex/inference/pipelines/components/common/seal_det_warp.py +44 -13
- paddlex/inference/pipelines/components/common/sort_boxes.py +4 -2
- paddlex/inference/pipelines/components/common/warp_image.py +50 -0
- paddlex/inference/pipelines/components/faisser.py +10 -5
- paddlex/inference/pipelines/components/prompt_engineering/__init__.py +2 -2
- paddlex/inference/pipelines/components/prompt_engineering/base.py +2 -2
- paddlex/inference/pipelines/components/prompt_engineering/generate_ensemble_prompt.py +2 -1
- paddlex/inference/pipelines/components/prompt_engineering/generate_kie_prompt.py +2 -2
- paddlex/inference/pipelines/components/retriever/__init__.py +2 -2
- paddlex/inference/pipelines/components/retriever/base.py +18 -16
- paddlex/inference/pipelines/components/retriever/openai_bot_retriever.py +2 -2
- paddlex/inference/pipelines/components/retriever/qianfan_bot_retriever.py +87 -84
- paddlex/inference/pipelines/components/utils/__init__.py +1 -1
- paddlex/inference/pipelines/components/utils/mixin.py +7 -7
- paddlex/inference/pipelines/doc_preprocessor/__init__.py +1 -1
- paddlex/inference/pipelines/doc_preprocessor/pipeline.py +70 -51
- paddlex/inference/pipelines/doc_preprocessor/result.py +5 -10
- paddlex/inference/pipelines/doc_understanding/__init__.py +15 -0
- paddlex/inference/pipelines/doc_understanding/pipeline.py +71 -0
- paddlex/inference/pipelines/face_recognition/__init__.py +1 -1
- paddlex/inference/pipelines/face_recognition/pipeline.py +3 -1
- paddlex/inference/pipelines/face_recognition/result.py +3 -2
- paddlex/inference/pipelines/formula_recognition/__init__.py +1 -1
- paddlex/inference/pipelines/formula_recognition/pipeline.py +137 -93
- paddlex/inference/pipelines/formula_recognition/result.py +20 -29
- paddlex/inference/pipelines/image_classification/__init__.py +1 -1
- paddlex/inference/pipelines/image_classification/pipeline.py +30 -11
- paddlex/inference/pipelines/image_multilabel_classification/__init__.py +1 -1
- paddlex/inference/pipelines/image_multilabel_classification/pipeline.py +31 -12
- paddlex/inference/pipelines/instance_segmentation/__init__.py +1 -1
- paddlex/inference/pipelines/instance_segmentation/pipeline.py +30 -9
- paddlex/inference/pipelines/keypoint_detection/__init__.py +1 -1
- paddlex/inference/pipelines/keypoint_detection/pipeline.py +30 -9
- paddlex/inference/pipelines/layout_parsing/__init__.py +1 -1
- paddlex/inference/pipelines/layout_parsing/pipeline.py +54 -56
- paddlex/inference/pipelines/layout_parsing/pipeline_v2.py +904 -261
- paddlex/inference/pipelines/layout_parsing/result.py +9 -21
- paddlex/inference/pipelines/layout_parsing/result_v2.py +525 -250
- paddlex/inference/pipelines/layout_parsing/setting.py +87 -0
- paddlex/inference/pipelines/layout_parsing/utils.py +570 -2004
- paddlex/inference/pipelines/layout_parsing/xycut_enhanced/__init__.py +16 -0
- paddlex/inference/pipelines/layout_parsing/xycut_enhanced/utils.py +1144 -0
- paddlex/inference/pipelines/layout_parsing/xycut_enhanced/xycuts.py +563 -0
- paddlex/inference/pipelines/{3d_bev_detection → m_3d_bev_detection}/__init__.py +1 -1
- paddlex/inference/pipelines/{3d_bev_detection → m_3d_bev_detection}/pipeline.py +17 -10
- paddlex/inference/pipelines/multilingual_speech_recognition/__init__.py +1 -1
- paddlex/inference/pipelines/multilingual_speech_recognition/pipeline.py +17 -6
- paddlex/inference/pipelines/object_detection/__init__.py +1 -1
- paddlex/inference/pipelines/object_detection/pipeline.py +29 -9
- paddlex/inference/pipelines/ocr/__init__.py +1 -1
- paddlex/inference/pipelines/ocr/pipeline.py +151 -77
- paddlex/inference/pipelines/ocr/result.py +31 -24
- paddlex/inference/pipelines/open_vocabulary_detection/__init__.py +1 -1
- paddlex/inference/pipelines/open_vocabulary_detection/pipeline.py +17 -6
- paddlex/inference/pipelines/open_vocabulary_segmentation/__init__.py +1 -1
- paddlex/inference/pipelines/open_vocabulary_segmentation/pipeline.py +17 -6
- paddlex/inference/pipelines/pp_chatocr/__init__.py +1 -1
- paddlex/inference/pipelines/pp_chatocr/pipeline_base.py +14 -5
- paddlex/inference/pipelines/pp_chatocr/pipeline_v3.py +22 -14
- paddlex/inference/pipelines/pp_chatocr/pipeline_v4.py +34 -16
- paddlex/inference/pipelines/pp_shitu_v2/__init__.py +1 -1
- paddlex/inference/pipelines/pp_shitu_v2/pipeline.py +12 -8
- paddlex/inference/pipelines/pp_shitu_v2/result.py +4 -4
- paddlex/inference/pipelines/rotated_object_detection/__init__.py +1 -1
- paddlex/inference/pipelines/rotated_object_detection/pipeline.py +30 -9
- paddlex/inference/pipelines/seal_recognition/__init__.py +1 -1
- paddlex/inference/pipelines/seal_recognition/pipeline.py +127 -63
- paddlex/inference/pipelines/seal_recognition/result.py +4 -2
- paddlex/inference/pipelines/semantic_segmentation/__init__.py +1 -1
- paddlex/inference/pipelines/semantic_segmentation/pipeline.py +30 -9
- paddlex/inference/pipelines/small_object_detection/__init__.py +1 -1
- paddlex/inference/pipelines/small_object_detection/pipeline.py +30 -9
- paddlex/inference/pipelines/table_recognition/__init__.py +1 -1
- paddlex/inference/pipelines/table_recognition/pipeline.py +61 -37
- paddlex/inference/pipelines/table_recognition/pipeline_v2.py +668 -65
- paddlex/inference/pipelines/table_recognition/result.py +12 -10
- paddlex/inference/pipelines/table_recognition/table_recognition_post_processing.py +12 -8
- paddlex/inference/pipelines/table_recognition/table_recognition_post_processing_v2.py +55 -37
- paddlex/inference/pipelines/table_recognition/utils.py +1 -1
- paddlex/inference/pipelines/ts_anomaly_detection/__init__.py +1 -1
- paddlex/inference/pipelines/ts_anomaly_detection/pipeline.py +16 -6
- paddlex/inference/pipelines/ts_classification/__init__.py +1 -1
- paddlex/inference/pipelines/ts_classification/pipeline.py +16 -6
- paddlex/inference/pipelines/ts_forecasting/__init__.py +1 -1
- paddlex/inference/pipelines/ts_forecasting/pipeline.py +16 -6
- paddlex/inference/pipelines/video_classification/__init__.py +1 -1
- paddlex/inference/pipelines/video_classification/pipeline.py +17 -6
- paddlex/inference/pipelines/video_detection/__init__.py +1 -1
- paddlex/inference/pipelines/video_detection/pipeline.py +20 -7
- paddlex/inference/serving/__init__.py +5 -1
- paddlex/inference/serving/basic_serving/__init__.py +1 -1
- paddlex/inference/serving/basic_serving/_app.py +31 -19
- paddlex/inference/serving/basic_serving/_pipeline_apps/__init__.py +7 -4
- paddlex/inference/serving/basic_serving/_pipeline_apps/_common/__init__.py +1 -1
- paddlex/inference/serving/basic_serving/_pipeline_apps/_common/common.py +12 -4
- paddlex/inference/serving/basic_serving/_pipeline_apps/_common/image_recognition.py +1 -1
- paddlex/inference/serving/basic_serving/_pipeline_apps/_common/ocr.py +7 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/anomaly_detection.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/doc_preprocessor.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/doc_understanding.py +153 -0
- paddlex/inference/serving/basic_serving/_pipeline_apps/face_recognition.py +16 -13
- paddlex/inference/serving/basic_serving/_pipeline_apps/formula_recognition.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/human_keypoint_detection.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/image_classification.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/image_multilabel_classification.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/instance_segmentation.py +13 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/layout_parsing.py +10 -8
- paddlex/inference/serving/basic_serving/_pipeline_apps/m_3d_bev_detection.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/multilingual_speech_recognition.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/object_detection.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/ocr.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/open_vocabulary_detection.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/open_vocabulary_segmentation.py +13 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/pedestrian_attribute_recognition.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv3_doc.py +14 -12
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv4_doc.py +17 -14
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_shituv2.py +16 -13
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_structurev3.py +16 -9
- paddlex/inference/serving/basic_serving/_pipeline_apps/rotated_object_detection.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/seal_recognition.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/semantic_segmentation.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/small_object_detection.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition.py +11 -12
- paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition_v2.py +14 -12
- paddlex/inference/serving/basic_serving/_pipeline_apps/ts_anomaly_detection.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/ts_classification.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/ts_forecast.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/vehicle_attribute_recognition.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/video_classification.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/video_detection.py +10 -7
- paddlex/inference/serving/basic_serving/_server.py +9 -4
- paddlex/inference/serving/infra/__init__.py +1 -1
- paddlex/inference/serving/infra/config.py +1 -1
- paddlex/inference/serving/infra/models.py +13 -6
- paddlex/inference/serving/infra/storage.py +9 -4
- paddlex/inference/serving/infra/utils.py +54 -28
- paddlex/inference/serving/schemas/__init__.py +1 -1
- paddlex/inference/serving/schemas/anomaly_detection.py +1 -1
- paddlex/inference/serving/schemas/doc_preprocessor.py +1 -1
- paddlex/inference/serving/schemas/doc_understanding.py +78 -0
- paddlex/inference/serving/schemas/face_recognition.py +1 -1
- paddlex/inference/serving/schemas/formula_recognition.py +2 -2
- paddlex/inference/serving/schemas/human_keypoint_detection.py +1 -1
- paddlex/inference/serving/schemas/image_classification.py +1 -1
- paddlex/inference/serving/schemas/image_multilabel_classification.py +1 -1
- paddlex/inference/serving/schemas/instance_segmentation.py +1 -1
- paddlex/inference/serving/schemas/layout_parsing.py +2 -3
- paddlex/inference/serving/schemas/m_3d_bev_detection.py +1 -1
- paddlex/inference/serving/schemas/multilingual_speech_recognition.py +1 -1
- paddlex/inference/serving/schemas/object_detection.py +1 -1
- paddlex/inference/serving/schemas/ocr.py +1 -1
- paddlex/inference/serving/schemas/open_vocabulary_detection.py +1 -1
- paddlex/inference/serving/schemas/open_vocabulary_segmentation.py +1 -1
- paddlex/inference/serving/schemas/pedestrian_attribute_recognition.py +1 -1
- paddlex/inference/serving/schemas/pp_chatocrv3_doc.py +2 -3
- paddlex/inference/serving/schemas/pp_chatocrv4_doc.py +3 -3
- paddlex/inference/serving/schemas/pp_shituv2.py +1 -1
- paddlex/inference/serving/schemas/pp_structurev3.py +11 -7
- paddlex/inference/serving/schemas/rotated_object_detection.py +1 -1
- paddlex/inference/serving/schemas/seal_recognition.py +2 -2
- paddlex/inference/serving/schemas/semantic_segmentation.py +1 -1
- paddlex/inference/serving/schemas/shared/__init__.py +1 -1
- paddlex/inference/serving/schemas/shared/classification.py +1 -1
- paddlex/inference/serving/schemas/shared/image_segmentation.py +1 -1
- paddlex/inference/serving/schemas/shared/object_detection.py +1 -1
- paddlex/inference/serving/schemas/shared/ocr.py +1 -1
- paddlex/inference/serving/schemas/small_object_detection.py +1 -1
- paddlex/inference/serving/schemas/table_recognition.py +3 -7
- paddlex/inference/serving/schemas/table_recognition_v2.py +6 -7
- paddlex/inference/serving/schemas/ts_anomaly_detection.py +1 -1
- paddlex/inference/serving/schemas/ts_classification.py +1 -1
- paddlex/inference/serving/schemas/ts_forecast.py +1 -1
- paddlex/inference/serving/schemas/vehicle_attribute_recognition.py +1 -1
- paddlex/inference/serving/schemas/video_classification.py +1 -1
- paddlex/inference/serving/schemas/video_detection.py +1 -1
- paddlex/inference/utils/__init__.py +1 -1
- paddlex/inference/utils/benchmark.py +332 -179
- paddlex/inference/utils/color_map.py +1 -1
- paddlex/inference/utils/get_pipeline_path.py +1 -1
- paddlex/inference/utils/hpi.py +258 -0
- paddlex/inference/utils/hpi_model_info_collection.json +2331 -0
- paddlex/inference/utils/io/__init__.py +11 -11
- paddlex/inference/utils/io/readers.py +31 -27
- paddlex/inference/utils/io/style.py +21 -14
- paddlex/inference/utils/io/tablepyxl.py +13 -5
- paddlex/inference/utils/io/writers.py +9 -10
- paddlex/inference/utils/mkldnn_blocklist.py +25 -0
- paddlex/inference/utils/model_paths.py +48 -0
- paddlex/inference/utils/{new_ir_blacklist.py → new_ir_blocklist.py} +1 -2
- paddlex/inference/utils/official_models.py +278 -262
- paddlex/inference/utils/pp_option.py +184 -92
- paddlex/inference/utils/trt_blocklist.py +43 -0
- paddlex/inference/utils/trt_config.py +420 -0
- paddlex/model.py +30 -12
- paddlex/modules/__init__.py +57 -80
- paddlex/modules/anomaly_detection/__init__.py +2 -2
- paddlex/modules/anomaly_detection/dataset_checker/__init__.py +2 -3
- paddlex/modules/anomaly_detection/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/anomaly_detection/dataset_checker/dataset_src/analyse_dataset.py +6 -3
- paddlex/modules/anomaly_detection/dataset_checker/dataset_src/check_dataset.py +8 -4
- paddlex/modules/anomaly_detection/dataset_checker/dataset_src/convert_dataset.py +7 -4
- paddlex/modules/anomaly_detection/dataset_checker/dataset_src/split_dataset.py +2 -2
- paddlex/modules/anomaly_detection/dataset_checker/dataset_src/utils/__init__.py +1 -1
- paddlex/modules/anomaly_detection/dataset_checker/dataset_src/utils/visualizer.py +7 -2
- paddlex/modules/anomaly_detection/evaluator.py +3 -3
- paddlex/modules/anomaly_detection/exportor.py +1 -1
- paddlex/modules/anomaly_detection/model_list.py +1 -1
- paddlex/modules/anomaly_detection/trainer.py +3 -4
- paddlex/modules/base/__init__.py +5 -5
- paddlex/modules/base/build_model.py +1 -2
- paddlex/modules/base/dataset_checker/__init__.py +2 -2
- paddlex/modules/base/dataset_checker/dataset_checker.py +4 -4
- paddlex/modules/base/dataset_checker/utils.py +1 -3
- paddlex/modules/base/evaluator.py +13 -13
- paddlex/modules/base/exportor.py +12 -13
- paddlex/modules/base/trainer.py +21 -11
- paddlex/modules/base/utils/__init__.py +13 -0
- paddlex/modules/base/utils/cinn_setting.py +89 -0
- paddlex/modules/base/utils/coco_eval.py +94 -0
- paddlex/modules/base/utils/topk_eval.py +118 -0
- paddlex/modules/doc_vlm/__init__.py +18 -0
- paddlex/modules/doc_vlm/dataset_checker.py +29 -0
- paddlex/modules/doc_vlm/evaluator.py +29 -0
- paddlex/modules/doc_vlm/exportor.py +29 -0
- paddlex/modules/doc_vlm/model_list.py +16 -0
- paddlex/modules/doc_vlm/trainer.py +41 -0
- paddlex/modules/face_recognition/__init__.py +2 -2
- paddlex/modules/face_recognition/dataset_checker/__init__.py +2 -2
- paddlex/modules/face_recognition/dataset_checker/dataset_src/__init__.py +1 -1
- paddlex/modules/face_recognition/dataset_checker/dataset_src/check_dataset.py +3 -5
- paddlex/modules/face_recognition/dataset_checker/dataset_src/utils/__init__.py +1 -1
- paddlex/modules/face_recognition/dataset_checker/dataset_src/utils/visualizer.py +2 -5
- paddlex/modules/face_recognition/evaluator.py +3 -3
- paddlex/modules/face_recognition/exportor.py +1 -1
- paddlex/modules/face_recognition/model_list.py +1 -1
- paddlex/modules/face_recognition/trainer.py +1 -1
- paddlex/modules/formula_recognition/__init__.py +2 -2
- paddlex/modules/formula_recognition/dataset_checker/__init__.py +3 -3
- paddlex/modules/formula_recognition/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/formula_recognition/dataset_checker/dataset_src/analyse_dataset.py +13 -12
- paddlex/modules/formula_recognition/dataset_checker/dataset_src/check_dataset.py +2 -6
- paddlex/modules/formula_recognition/dataset_checker/dataset_src/convert_dataset.py +11 -10
- paddlex/modules/formula_recognition/dataset_checker/dataset_src/split_dataset.py +1 -2
- paddlex/modules/formula_recognition/evaluator.py +6 -3
- paddlex/modules/formula_recognition/exportor.py +1 -1
- paddlex/modules/formula_recognition/model_list.py +4 -1
- paddlex/modules/formula_recognition/trainer.py +5 -3
- paddlex/modules/general_recognition/__init__.py +2 -2
- paddlex/modules/general_recognition/dataset_checker/__init__.py +2 -2
- paddlex/modules/general_recognition/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/general_recognition/dataset_checker/dataset_src/analyse_dataset.py +7 -9
- paddlex/modules/general_recognition/dataset_checker/dataset_src/check_dataset.py +4 -5
- paddlex/modules/general_recognition/dataset_checker/dataset_src/convert_dataset.py +6 -5
- paddlex/modules/general_recognition/dataset_checker/dataset_src/split_dataset.py +1 -1
- paddlex/modules/general_recognition/dataset_checker/dataset_src/utils/__init__.py +1 -1
- paddlex/modules/general_recognition/dataset_checker/dataset_src/utils/visualizer.py +2 -5
- paddlex/modules/general_recognition/evaluator.py +2 -2
- paddlex/modules/general_recognition/exportor.py +1 -1
- paddlex/modules/general_recognition/model_list.py +1 -1
- paddlex/modules/general_recognition/trainer.py +1 -1
- paddlex/modules/image_classification/__init__.py +2 -2
- paddlex/modules/image_classification/dataset_checker/__init__.py +2 -2
- paddlex/modules/image_classification/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/image_classification/dataset_checker/dataset_src/analyse_dataset.py +8 -9
- paddlex/modules/image_classification/dataset_checker/dataset_src/check_dataset.py +4 -3
- paddlex/modules/image_classification/dataset_checker/dataset_src/convert_dataset.py +4 -4
- paddlex/modules/image_classification/dataset_checker/dataset_src/split_dataset.py +1 -1
- paddlex/modules/image_classification/dataset_checker/dataset_src/utils/__init__.py +1 -1
- paddlex/modules/image_classification/dataset_checker/dataset_src/utils/visualizer.py +2 -5
- paddlex/modules/image_classification/evaluator.py +3 -3
- paddlex/modules/image_classification/exportor.py +1 -1
- paddlex/modules/image_classification/model_list.py +2 -1
- paddlex/modules/image_classification/trainer.py +3 -3
- paddlex/modules/image_unwarping/__init__.py +1 -1
- paddlex/modules/image_unwarping/model_list.py +1 -1
- paddlex/modules/instance_segmentation/__init__.py +2 -2
- paddlex/modules/instance_segmentation/dataset_checker/__init__.py +2 -3
- paddlex/modules/instance_segmentation/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/instance_segmentation/dataset_checker/dataset_src/analyse_dataset.py +9 -5
- paddlex/modules/instance_segmentation/dataset_checker/dataset_src/check_dataset.py +8 -5
- paddlex/modules/instance_segmentation/dataset_checker/dataset_src/convert_dataset.py +8 -8
- paddlex/modules/instance_segmentation/dataset_checker/dataset_src/split_dataset.py +7 -4
- paddlex/modules/instance_segmentation/dataset_checker/dataset_src/utils/__init__.py +1 -1
- paddlex/modules/instance_segmentation/dataset_checker/dataset_src/utils/visualizer.py +10 -8
- paddlex/modules/instance_segmentation/evaluator.py +2 -2
- paddlex/modules/instance_segmentation/exportor.py +1 -1
- paddlex/modules/instance_segmentation/model_list.py +1 -1
- paddlex/modules/instance_segmentation/trainer.py +1 -1
- paddlex/modules/keypoint_detection/__init__.py +2 -2
- paddlex/modules/keypoint_detection/dataset_checker/__init__.py +2 -2
- paddlex/modules/keypoint_detection/dataset_checker/dataset_src/__init__.py +1 -1
- paddlex/modules/keypoint_detection/dataset_checker/dataset_src/check_dataset.py +10 -5
- paddlex/modules/keypoint_detection/dataset_checker/dataset_src/utils/__init__.py +1 -1
- paddlex/modules/keypoint_detection/dataset_checker/dataset_src/utils/visualizer.py +8 -3
- paddlex/modules/keypoint_detection/evaluator.py +2 -2
- paddlex/modules/keypoint_detection/exportor.py +1 -1
- paddlex/modules/keypoint_detection/model_list.py +1 -1
- paddlex/modules/keypoint_detection/trainer.py +2 -2
- paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/__init__.py +2 -2
- paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/dataset_checker/__init__.py +3 -3
- paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/dataset_checker/dataset_src/analyse_dataset.py +8 -8
- paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/dataset_checker/dataset_src/check_dataset.py +1 -2
- paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/evaluator.py +3 -3
- paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/exportor.py +1 -1
- paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/model_list.py +1 -1
- paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/trainer.py +5 -7
- paddlex/modules/multilabel_classification/__init__.py +2 -2
- paddlex/modules/multilabel_classification/dataset_checker/__init__.py +2 -2
- paddlex/modules/multilabel_classification/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/multilabel_classification/dataset_checker/dataset_src/analyse_dataset.py +8 -9
- paddlex/modules/multilabel_classification/dataset_checker/dataset_src/check_dataset.py +4 -3
- paddlex/modules/multilabel_classification/dataset_checker/dataset_src/convert_dataset.py +10 -7
- paddlex/modules/multilabel_classification/dataset_checker/dataset_src/split_dataset.py +1 -1
- paddlex/modules/multilabel_classification/dataset_checker/dataset_src/utils/__init__.py +1 -1
- paddlex/modules/multilabel_classification/dataset_checker/dataset_src/utils/visualizer.py +1 -5
- paddlex/modules/multilabel_classification/evaluator.py +3 -3
- paddlex/modules/multilabel_classification/exportor.py +1 -1
- paddlex/modules/multilabel_classification/model_list.py +1 -1
- paddlex/modules/multilabel_classification/trainer.py +3 -3
- paddlex/modules/multilingual_speech_recognition/__init__.py +2 -2
- paddlex/modules/multilingual_speech_recognition/dataset_checker.py +3 -3
- paddlex/modules/multilingual_speech_recognition/evaluator.py +3 -3
- paddlex/modules/multilingual_speech_recognition/exportor.py +3 -3
- paddlex/modules/multilingual_speech_recognition/model_list.py +1 -1
- paddlex/modules/multilingual_speech_recognition/trainer.py +7 -5
- paddlex/modules/object_detection/__init__.py +2 -2
- paddlex/modules/object_detection/dataset_checker/__init__.py +2 -11
- paddlex/modules/object_detection/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/object_detection/dataset_checker/dataset_src/analyse_dataset.py +10 -8
- paddlex/modules/object_detection/dataset_checker/dataset_src/check_dataset.py +10 -5
- paddlex/modules/object_detection/dataset_checker/dataset_src/convert_dataset.py +17 -12
- paddlex/modules/object_detection/dataset_checker/dataset_src/split_dataset.py +8 -4
- paddlex/modules/object_detection/dataset_checker/dataset_src/utils/__init__.py +1 -1
- paddlex/modules/object_detection/dataset_checker/dataset_src/utils/visualizer.py +9 -8
- paddlex/modules/object_detection/evaluator.py +11 -6
- paddlex/modules/object_detection/exportor.py +1 -1
- paddlex/modules/object_detection/model_list.py +3 -1
- paddlex/modules/object_detection/trainer.py +4 -5
- paddlex/modules/open_vocabulary_detection/__init__.py +2 -2
- paddlex/modules/open_vocabulary_detection/dataset_checker.py +3 -3
- paddlex/modules/open_vocabulary_detection/evaluator.py +3 -3
- paddlex/modules/open_vocabulary_detection/exportor.py +3 -3
- paddlex/modules/open_vocabulary_detection/model_list.py +2 -4
- paddlex/modules/open_vocabulary_detection/trainer.py +7 -5
- paddlex/modules/open_vocabulary_segmentation/__init__.py +2 -2
- paddlex/modules/open_vocabulary_segmentation/dataset_checker.py +3 -3
- paddlex/modules/open_vocabulary_segmentation/evaluator.py +3 -3
- paddlex/modules/open_vocabulary_segmentation/exportor.py +3 -3
- paddlex/modules/open_vocabulary_segmentation/model_list.py +1 -1
- paddlex/modules/open_vocabulary_segmentation/trainer.py +7 -5
- paddlex/modules/semantic_segmentation/__init__.py +2 -2
- paddlex/modules/semantic_segmentation/dataset_checker/__init__.py +2 -3
- paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/analyse_dataset.py +6 -3
- paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/check_dataset.py +2 -2
- paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/convert_dataset.py +7 -4
- paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/split_dataset.py +2 -2
- paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/utils/__init__.py +1 -1
- paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/utils/visualizer.py +6 -2
- paddlex/modules/semantic_segmentation/evaluator.py +3 -3
- paddlex/modules/semantic_segmentation/exportor.py +1 -1
- paddlex/modules/semantic_segmentation/model_list.py +1 -1
- paddlex/modules/semantic_segmentation/trainer.py +3 -4
- paddlex/modules/table_recognition/__init__.py +2 -2
- paddlex/modules/table_recognition/dataset_checker/__init__.py +5 -5
- paddlex/modules/table_recognition/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/table_recognition/dataset_checker/dataset_src/analyse_dataset.py +3 -2
- paddlex/modules/table_recognition/dataset_checker/dataset_src/check_dataset.py +8 -7
- paddlex/modules/table_recognition/dataset_checker/dataset_src/split_dataset.py +2 -1
- paddlex/modules/table_recognition/evaluator.py +3 -3
- paddlex/modules/table_recognition/exportor.py +1 -1
- paddlex/modules/table_recognition/model_list.py +1 -1
- paddlex/modules/table_recognition/trainer.py +2 -5
- paddlex/modules/text_detection/__init__.py +2 -2
- paddlex/modules/text_detection/dataset_checker/__init__.py +4 -6
- paddlex/modules/text_detection/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/text_detection/dataset_checker/dataset_src/analyse_dataset.py +12 -9
- paddlex/modules/text_detection/dataset_checker/dataset_src/check_dataset.py +3 -3
- paddlex/modules/text_detection/dataset_checker/dataset_src/split_dataset.py +3 -3
- paddlex/modules/text_detection/evaluator.py +3 -3
- paddlex/modules/text_detection/exportor.py +1 -1
- paddlex/modules/text_detection/model_list.py +3 -1
- paddlex/modules/text_detection/trainer.py +2 -5
- paddlex/modules/text_recognition/__init__.py +2 -2
- paddlex/modules/text_recognition/dataset_checker/__init__.py +4 -5
- paddlex/modules/text_recognition/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/text_recognition/dataset_checker/dataset_src/analyse_dataset.py +13 -12
- paddlex/modules/text_recognition/dataset_checker/dataset_src/check_dataset.py +2 -5
- paddlex/modules/text_recognition/dataset_checker/dataset_src/convert_dataset.py +11 -10
- paddlex/modules/text_recognition/dataset_checker/dataset_src/split_dataset.py +1 -2
- paddlex/modules/text_recognition/evaluator.py +3 -3
- paddlex/modules/text_recognition/exportor.py +1 -1
- paddlex/modules/text_recognition/model_list.py +3 -1
- paddlex/modules/text_recognition/trainer.py +2 -3
- paddlex/modules/ts_anomaly_detection/__init__.py +2 -2
- paddlex/modules/ts_anomaly_detection/dataset_checker/__init__.py +4 -5
- paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/analyse_dataset.py +1 -9
- paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/check_dataset.py +2 -2
- paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/convert_dataset.py +2 -6
- paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/split_dataset.py +4 -4
- paddlex/modules/ts_anomaly_detection/evaluator.py +3 -3
- paddlex/modules/ts_anomaly_detection/exportor.py +2 -3
- paddlex/modules/ts_anomaly_detection/model_list.py +1 -1
- paddlex/modules/ts_anomaly_detection/trainer.py +8 -8
- paddlex/modules/ts_classification/__init__.py +2 -2
- paddlex/modules/ts_classification/dataset_checker/__init__.py +4 -5
- paddlex/modules/ts_classification/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/ts_classification/dataset_checker/dataset_src/analyse_dataset.py +8 -5
- paddlex/modules/ts_classification/dataset_checker/dataset_src/check_dataset.py +2 -2
- paddlex/modules/ts_classification/dataset_checker/dataset_src/convert_dataset.py +2 -6
- paddlex/modules/ts_classification/dataset_checker/dataset_src/split_dataset.py +5 -5
- paddlex/modules/ts_classification/evaluator.py +3 -3
- paddlex/modules/ts_classification/exportor.py +2 -3
- paddlex/modules/ts_classification/model_list.py +1 -1
- paddlex/modules/ts_classification/trainer.py +7 -7
- paddlex/modules/ts_forecast/__init__.py +2 -2
- paddlex/modules/ts_forecast/dataset_checker/__init__.py +4 -5
- paddlex/modules/ts_forecast/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/ts_forecast/dataset_checker/dataset_src/analyse_dataset.py +1 -9
- paddlex/modules/ts_forecast/dataset_checker/dataset_src/check_dataset.py +2 -2
- paddlex/modules/ts_forecast/dataset_checker/dataset_src/convert_dataset.py +2 -6
- paddlex/modules/ts_forecast/dataset_checker/dataset_src/split_dataset.py +4 -4
- paddlex/modules/ts_forecast/evaluator.py +3 -3
- paddlex/modules/ts_forecast/exportor.py +2 -3
- paddlex/modules/ts_forecast/model_list.py +1 -1
- paddlex/modules/ts_forecast/trainer.py +7 -7
- paddlex/modules/video_classification/__init__.py +2 -2
- paddlex/modules/video_classification/dataset_checker/__init__.py +2 -2
- paddlex/modules/video_classification/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/video_classification/dataset_checker/dataset_src/analyse_dataset.py +9 -9
- paddlex/modules/video_classification/dataset_checker/dataset_src/check_dataset.py +2 -3
- paddlex/modules/video_classification/dataset_checker/dataset_src/split_dataset.py +1 -1
- paddlex/modules/video_classification/evaluator.py +3 -3
- paddlex/modules/video_classification/exportor.py +1 -1
- paddlex/modules/video_classification/model_list.py +1 -1
- paddlex/modules/video_classification/trainer.py +3 -3
- paddlex/modules/video_detection/__init__.py +2 -2
- paddlex/modules/video_detection/dataset_checker/__init__.py +2 -2
- paddlex/modules/video_detection/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/video_detection/dataset_checker/dataset_src/analyse_dataset.py +8 -9
- paddlex/modules/video_detection/dataset_checker/dataset_src/check_dataset.py +3 -5
- paddlex/modules/video_detection/evaluator.py +3 -3
- paddlex/modules/video_detection/exportor.py +1 -1
- paddlex/modules/video_detection/model_list.py +1 -1
- paddlex/modules/video_detection/trainer.py +3 -3
- paddlex/ops/__init__.py +7 -4
- paddlex/ops/iou3d_nms/iou3d_cpu.cpp +8 -6
- paddlex/ops/iou3d_nms/iou3d_cpu.h +3 -2
- paddlex/ops/iou3d_nms/iou3d_nms.cpp +8 -6
- paddlex/ops/iou3d_nms/iou3d_nms.h +6 -4
- paddlex/ops/iou3d_nms/iou3d_nms_api.cpp +24 -18
- paddlex/ops/iou3d_nms/iou3d_nms_kernel.cu +9 -7
- paddlex/ops/setup.py +3 -3
- paddlex/ops/voxel/voxelize_op.cc +22 -19
- paddlex/ops/voxel/voxelize_op.cu +25 -25
- paddlex/paddlex_cli.py +104 -87
- paddlex/repo_apis/Paddle3D_api/__init__.py +1 -1
- paddlex/repo_apis/Paddle3D_api/bev_fusion/__init__.py +1 -1
- paddlex/repo_apis/Paddle3D_api/bev_fusion/config.py +1 -1
- paddlex/repo_apis/Paddle3D_api/bev_fusion/model.py +6 -6
- paddlex/repo_apis/Paddle3D_api/bev_fusion/register.py +2 -2
- paddlex/repo_apis/Paddle3D_api/bev_fusion/runner.py +1 -1
- paddlex/repo_apis/Paddle3D_api/pp3d_config.py +3 -2
- paddlex/repo_apis/PaddleClas_api/__init__.py +1 -1
- paddlex/repo_apis/PaddleClas_api/cls/__init__.py +3 -3
- paddlex/repo_apis/PaddleClas_api/cls/config.py +5 -4
- paddlex/repo_apis/PaddleClas_api/cls/model.py +4 -4
- paddlex/repo_apis/PaddleClas_api/cls/register.py +12 -3
- paddlex/repo_apis/PaddleClas_api/cls/runner.py +2 -3
- paddlex/repo_apis/PaddleClas_api/shitu_rec/__init__.py +2 -2
- paddlex/repo_apis/PaddleClas_api/shitu_rec/config.py +2 -2
- paddlex/repo_apis/PaddleClas_api/shitu_rec/model.py +1 -4
- paddlex/repo_apis/PaddleClas_api/shitu_rec/register.py +2 -2
- paddlex/repo_apis/PaddleClas_api/shitu_rec/runner.py +1 -6
- paddlex/repo_apis/PaddleDetection_api/__init__.py +2 -2
- paddlex/repo_apis/PaddleDetection_api/config_helper.py +3 -3
- paddlex/repo_apis/PaddleDetection_api/instance_seg/__init__.py +2 -2
- paddlex/repo_apis/PaddleDetection_api/instance_seg/config.py +2 -3
- paddlex/repo_apis/PaddleDetection_api/instance_seg/model.py +4 -4
- paddlex/repo_apis/PaddleDetection_api/instance_seg/register.py +2 -3
- paddlex/repo_apis/PaddleDetection_api/instance_seg/runner.py +2 -3
- paddlex/repo_apis/PaddleDetection_api/object_det/__init__.py +3 -3
- paddlex/repo_apis/PaddleDetection_api/object_det/config.py +5 -4
- paddlex/repo_apis/PaddleDetection_api/object_det/model.py +6 -7
- paddlex/repo_apis/PaddleDetection_api/object_det/official_categories.py +26 -1
- paddlex/repo_apis/PaddleDetection_api/object_det/register.py +32 -3
- paddlex/repo_apis/PaddleDetection_api/object_det/runner.py +2 -3
- paddlex/repo_apis/PaddleNLP_api/__init__.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/__init__.py +4 -3
- paddlex/repo_apis/PaddleOCR_api/config_utils.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/formula_rec/__init__.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/formula_rec/config.py +7 -6
- paddlex/repo_apis/PaddleOCR_api/formula_rec/model.py +9 -13
- paddlex/repo_apis/PaddleOCR_api/formula_rec/register.py +29 -3
- paddlex/repo_apis/PaddleOCR_api/formula_rec/runner.py +2 -3
- paddlex/repo_apis/PaddleOCR_api/table_rec/__init__.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/table_rec/config.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/table_rec/model.py +4 -4
- paddlex/repo_apis/PaddleOCR_api/table_rec/register.py +2 -3
- paddlex/repo_apis/PaddleOCR_api/table_rec/runner.py +3 -3
- paddlex/repo_apis/PaddleOCR_api/text_det/__init__.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/text_det/config.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/text_det/model.py +4 -4
- paddlex/repo_apis/PaddleOCR_api/text_det/register.py +20 -3
- paddlex/repo_apis/PaddleOCR_api/text_det/runner.py +3 -3
- paddlex/repo_apis/PaddleOCR_api/text_rec/__init__.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/text_rec/config.py +7 -6
- paddlex/repo_apis/PaddleOCR_api/text_rec/model.py +9 -13
- paddlex/repo_apis/PaddleOCR_api/text_rec/register.py +20 -3
- paddlex/repo_apis/PaddleOCR_api/text_rec/runner.py +2 -3
- paddlex/repo_apis/PaddleSeg_api/__init__.py +1 -1
- paddlex/repo_apis/PaddleSeg_api/base_seg_config.py +2 -2
- paddlex/repo_apis/PaddleSeg_api/seg/__init__.py +1 -1
- paddlex/repo_apis/PaddleSeg_api/seg/config.py +3 -6
- paddlex/repo_apis/PaddleSeg_api/seg/model.py +6 -6
- paddlex/repo_apis/PaddleSeg_api/seg/register.py +2 -3
- paddlex/repo_apis/PaddleSeg_api/seg/runner.py +2 -3
- paddlex/repo_apis/PaddleTS_api/__init__.py +4 -3
- paddlex/repo_apis/PaddleTS_api/ts_ad/__init__.py +1 -1
- paddlex/repo_apis/PaddleTS_api/ts_ad/config.py +5 -6
- paddlex/repo_apis/PaddleTS_api/ts_ad/register.py +2 -2
- paddlex/repo_apis/PaddleTS_api/ts_ad/runner.py +2 -2
- paddlex/repo_apis/PaddleTS_api/ts_base/__init__.py +1 -1
- paddlex/repo_apis/PaddleTS_api/ts_base/config.py +2 -4
- paddlex/repo_apis/PaddleTS_api/ts_base/model.py +4 -4
- paddlex/repo_apis/PaddleTS_api/ts_base/runner.py +2 -2
- paddlex/repo_apis/PaddleTS_api/ts_cls/__init__.py +1 -1
- paddlex/repo_apis/PaddleTS_api/ts_cls/config.py +4 -5
- paddlex/repo_apis/PaddleTS_api/ts_cls/register.py +2 -2
- paddlex/repo_apis/PaddleTS_api/ts_cls/runner.py +2 -2
- paddlex/repo_apis/PaddleTS_api/ts_fc/__init__.py +1 -1
- paddlex/repo_apis/PaddleTS_api/ts_fc/config.py +6 -7
- paddlex/repo_apis/PaddleTS_api/ts_fc/register.py +1 -1
- paddlex/repo_apis/PaddleVideo_api/__init__.py +1 -1
- paddlex/repo_apis/PaddleVideo_api/config_utils.py +1 -1
- paddlex/repo_apis/PaddleVideo_api/video_cls/__init__.py +3 -3
- paddlex/repo_apis/PaddleVideo_api/video_cls/config.py +5 -4
- paddlex/repo_apis/PaddleVideo_api/video_cls/model.py +4 -4
- paddlex/repo_apis/PaddleVideo_api/video_cls/register.py +2 -3
- paddlex/repo_apis/PaddleVideo_api/video_cls/runner.py +2 -3
- paddlex/repo_apis/PaddleVideo_api/video_det/__init__.py +3 -3
- paddlex/repo_apis/PaddleVideo_api/video_det/config.py +5 -4
- paddlex/repo_apis/PaddleVideo_api/video_det/model.py +5 -5
- paddlex/repo_apis/PaddleVideo_api/video_det/register.py +2 -3
- paddlex/repo_apis/PaddleVideo_api/video_det/runner.py +2 -3
- paddlex/repo_apis/__init__.py +1 -1
- paddlex/repo_apis/base/__init__.py +4 -5
- paddlex/repo_apis/base/config.py +3 -4
- paddlex/repo_apis/base/model.py +11 -19
- paddlex/repo_apis/base/register.py +1 -1
- paddlex/repo_apis/base/runner.py +11 -12
- paddlex/repo_apis/base/utils/__init__.py +1 -1
- paddlex/repo_apis/base/utils/arg.py +1 -1
- paddlex/repo_apis/base/utils/subprocess.py +1 -1
- paddlex/repo_manager/__init__.py +2 -9
- paddlex/repo_manager/core.py +12 -30
- paddlex/repo_manager/meta.py +41 -31
- paddlex/repo_manager/repo.py +171 -161
- paddlex/repo_manager/utils.py +13 -224
- paddlex/utils/__init__.py +1 -1
- paddlex/utils/cache.py +8 -10
- paddlex/utils/config.py +6 -5
- paddlex/utils/{custom_device_whitelist.py → custom_device_list.py} +53 -199
- paddlex/utils/deps.py +249 -0
- paddlex/utils/device.py +87 -36
- paddlex/utils/download.py +4 -4
- paddlex/utils/env.py +37 -7
- paddlex/utils/errors/__init__.py +1 -1
- paddlex/utils/errors/dataset_checker.py +1 -1
- paddlex/utils/errors/others.py +2 -16
- paddlex/utils/file_interface.py +4 -5
- paddlex/utils/flags.py +17 -12
- paddlex/utils/fonts/__init__.py +36 -5
- paddlex/utils/func_register.py +1 -1
- paddlex/utils/install.py +87 -0
- paddlex/utils/interactive_get_pipeline.py +3 -3
- paddlex/utils/lazy_loader.py +3 -3
- paddlex/utils/logging.py +10 -1
- paddlex/utils/misc.py +6 -6
- paddlex/utils/pipeline_arguments.py +15 -7
- paddlex/utils/result_saver.py +4 -5
- paddlex/utils/subclass_register.py +2 -4
- paddlex/version.py +2 -1
- {paddlex-3.0.0rc0.dist-info → paddlex-3.0.1.dist-info}/METADATA +237 -102
- paddlex-3.0.1.dist-info/RECORD +1095 -0
- {paddlex-3.0.0rc0.dist-info → paddlex-3.0.1.dist-info}/WHEEL +1 -1
- paddlex/inference/models/base/predictor/basic_predictor.py +0 -139
- paddlex/paddle2onnx_requirements.txt +0 -1
- paddlex/repo_manager/requirements.txt +0 -21
- paddlex/serving_requirements.txt +0 -9
- paddlex-3.0.0rc0.dist-info/RECORD +0 -1015
- {paddlex-3.0.0rc0.dist-info → paddlex-3.0.1.dist-info}/entry_points.txt +0 -0
- {paddlex-3.0.0rc0.dist-info → paddlex-3.0.1.dist-info/licenses}/LICENSE +0 -0
- {paddlex-3.0.0rc0.dist-info → paddlex-3.0.1.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
|
|
1
|
-
#
|
1
|
+
# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -13,33 +13,25 @@
|
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
15
|
import bisect
|
16
|
+
import functools
|
17
|
+
import inspect
|
16
18
|
import io
|
17
19
|
import itertools
|
18
20
|
import json
|
19
21
|
import os
|
20
22
|
import re
|
21
|
-
import six
|
22
|
-
import inspect
|
23
23
|
import unicodedata
|
24
|
-
import functools
|
25
24
|
from collections import OrderedDict
|
26
25
|
from dataclasses import asdict, dataclass
|
27
|
-
from
|
26
|
+
from functools import lru_cache
|
27
|
+
from typing import Any, Dict, List, Literal, Optional, Tuple, Union
|
28
28
|
|
29
|
-
import numpy
|
30
29
|
import numpy as np
|
31
|
-
import lazy_paddle as paddle
|
32
|
-
from jinja2 import Template
|
33
|
-
from jinja2.exceptions import TemplateError, TemplateSyntaxError
|
34
|
-
from jinja2.sandbox import ImmutableSandboxedEnvironment
|
35
30
|
|
36
|
-
from .tokenizer_utils_base import CHAT_TEMPLATE_CONFIG_NAME
|
37
31
|
from .....utils import logging
|
38
|
-
|
39
|
-
from functools import lru_cache
|
40
|
-
|
41
|
-
from .vocab import Vocab
|
32
|
+
from .....utils.deps import class_requires_deps, is_dep_available
|
42
33
|
from .tokenizer_utils_base import (
|
34
|
+
CHAT_TEMPLATE_CONFIG_NAME,
|
43
35
|
AddedToken,
|
44
36
|
BatchEncoding,
|
45
37
|
EncodedInput,
|
@@ -54,6 +46,12 @@ from .tokenizer_utils_base import (
|
|
54
46
|
TruncationStrategy,
|
55
47
|
)
|
56
48
|
from .utils import convert_to_dict_message, fn_args_to_dict
|
49
|
+
from .vocab import Vocab
|
50
|
+
|
51
|
+
if is_dep_available("Jinja2"):
|
52
|
+
from jinja2 import Template
|
53
|
+
from jinja2.exceptions import TemplateError, TemplateSyntaxError
|
54
|
+
from jinja2.sandbox import ImmutableSandboxedEnvironment
|
57
55
|
|
58
56
|
__all__ = [
|
59
57
|
"ChatTemplate",
|
@@ -64,6 +62,7 @@ __all__ = [
|
|
64
62
|
]
|
65
63
|
|
66
64
|
|
65
|
+
@class_requires_deps("Jinja2")
|
67
66
|
@dataclass
|
68
67
|
class ChatTemplate:
|
69
68
|
conversation: Union[List[str], None] = None
|
@@ -72,7 +71,7 @@ class ChatTemplate:
|
|
72
71
|
|
73
72
|
@staticmethod
|
74
73
|
@lru_cache()
|
75
|
-
def _compile_jinja_template(chat_template) -> Template:
|
74
|
+
def _compile_jinja_template(chat_template) -> "Template":
|
76
75
|
def raise_exception(message):
|
77
76
|
raise TemplateError(message)
|
78
77
|
|
@@ -204,6 +203,7 @@ def adapt_stale_fwd_patch(self, name, value):
|
|
204
203
|
model compression, we make these patches compatible with the latest forward
|
205
204
|
method.
|
206
205
|
"""
|
206
|
+
|
207
207
|
if name == "forward":
|
208
208
|
# NOTE(guosheng): In dygraph to static, `layer.forward` would be patched
|
209
209
|
# by an instance of `StaticFunction`. And use string compare to avoid to
|
@@ -231,13 +231,15 @@ def adapt_stale_fwd_patch(self, name, value):
|
|
231
231
|
]
|
232
232
|
|
233
233
|
if new_args:
|
234
|
+
import paddle
|
235
|
+
|
234
236
|
if self.__module__.startswith("paddlenlp"):
|
235
237
|
logging.warning(
|
236
238
|
f"The `forward` method of {self.__class__ if isinstance(self, paddle.nn.Layer) else self} is patched and the patch "
|
237
239
|
"might be based on an old oversion which missing some "
|
238
240
|
f"arguments compared with the latest, such as {new_args}. "
|
239
241
|
"We automatically add compatibility on the patch for "
|
240
|
-
"these
|
242
|
+
"these arguments, and maybe the patch should be updated."
|
241
243
|
)
|
242
244
|
else:
|
243
245
|
logging.warning(
|
@@ -245,7 +247,7 @@ def adapt_stale_fwd_patch(self, name, value):
|
|
245
247
|
"is patched and the patch might be conflict with patches made "
|
246
248
|
f"by paddlenlp which seems have more arguments such as {new_args}. "
|
247
249
|
"We automatically add compatibility on the patch for "
|
248
|
-
"these
|
250
|
+
"these arguments, and maybe the patch should be updated."
|
249
251
|
)
|
250
252
|
if isinstance(self, paddle.nn.Layer) and inspect.isfunction(value):
|
251
253
|
|
@@ -288,8 +290,8 @@ class InitTrackerMeta(type):
|
|
288
290
|
|
289
291
|
def __init__(cls, name, bases, attrs):
|
290
292
|
init_func = cls.__init__
|
291
|
-
# If attrs has `__init__`, wrap it using
|
292
|
-
# Otherwise, no need to wrap again since the super cls has been
|
293
|
+
# If attrs has `__init__`, wrap it using accessible `_pre_init, _post_init`.
|
294
|
+
# Otherwise, no need to wrap again since the super cls has been wrapped.
|
293
295
|
# TODO: remove reduplicated tracker if using super cls `__init__`
|
294
296
|
pre_init_func = getattr(cls, "_pre_init", None) if "__init__" in attrs else None
|
295
297
|
post_init_func = (
|
@@ -321,12 +323,12 @@ class InitTrackerMeta(type):
|
|
321
323
|
|
322
324
|
@functools.wraps(init_func)
|
323
325
|
def __impl__(self, *args, **kwargs):
|
324
|
-
#
|
326
|
+
# registered helper by `pre_init_func`
|
325
327
|
if pre_init_func:
|
326
328
|
pre_init_func(self, init_func, *args, **kwargs)
|
327
329
|
# keep full configuration
|
328
330
|
init_func(self, *args, **kwargs)
|
329
|
-
#
|
331
|
+
# registered helper by `post_init_func`
|
330
332
|
if post_init_func:
|
331
333
|
post_init_func(self, init_func, *args, **kwargs)
|
332
334
|
self.init_config = kwargs
|
@@ -586,7 +588,7 @@ def _is_control(char):
|
|
586
588
|
|
587
589
|
|
588
590
|
def _is_nonnormalized_char(char):
|
589
|
-
"""Check
|
591
|
+
"""Check whether `chars` is a non-normalized character."""
|
590
592
|
cp = ord(char)
|
591
593
|
if (
|
592
594
|
(0xFF00 <= cp <= 0xFFEF)
|
@@ -641,20 +643,20 @@ class ChatTemplateMixin:
|
|
641
643
|
|
642
644
|
def apply_chat_template(
|
643
645
|
self,
|
644
|
-
conversation: Union[Dict[str, str], str],
|
646
|
+
conversation: Union[List[List[str]], Dict[str, str], str],
|
645
647
|
tokenize: bool = True,
|
646
648
|
context_data: Dict[str, Any] = {},
|
647
649
|
**tokenizer_kwargs,
|
648
|
-
)
|
650
|
+
):
|
649
651
|
"""apply chat_template rules to conversation which should not be batched data
|
650
652
|
|
651
653
|
Args:
|
652
|
-
conversation (List[List[str
|
654
|
+
conversation (List[List[str]] , str): the conversation messages between user and bot
|
653
655
|
context_data (Dict[str, Any]): the context data for chat_template.json
|
654
656
|
tokenize (bool, optional): whether do tokenization. Defaults to True.
|
655
657
|
|
656
658
|
Returns:
|
657
|
-
str | dict[str, Union[
|
659
|
+
str | dict[str, Union[numpy.ndarray, paddle.Tensor]]: return the result of applied data
|
658
660
|
"""
|
659
661
|
if not self.chat_template:
|
660
662
|
raise ValueError(
|
@@ -677,16 +679,16 @@ class ChatTemplateMixin:
|
|
677
679
|
|
678
680
|
def _apply_chat_template_paddle(
|
679
681
|
self,
|
680
|
-
conversation: Union[List[
|
682
|
+
conversation: Union[List[List[str]], str],
|
681
683
|
context_data: Dict[str, Any] = {},
|
682
|
-
)
|
684
|
+
):
|
683
685
|
context_data = self.chat_template._init_context_data(context_data)
|
684
686
|
|
685
687
|
if isinstance(conversation, str):
|
686
688
|
conversation = [[conversation]]
|
687
689
|
elif isinstance(conversation, list) and isinstance(conversation[0], str):
|
688
690
|
raise ValueError(
|
689
|
-
"apply_chat_template do not support
|
691
|
+
"apply_chat_template do not support applying batch conversations, "
|
690
692
|
"so you should apply the conversation one by one."
|
691
693
|
)
|
692
694
|
|
@@ -695,9 +697,9 @@ class ChatTemplateMixin:
|
|
695
697
|
|
696
698
|
def _apply_chat_template(
|
697
699
|
self,
|
698
|
-
conversation: Union[Dict[str, str], str],
|
700
|
+
conversation: Union[List[List[str]], Dict[str, str], str],
|
699
701
|
add_generation_prompt=True,
|
700
|
-
)
|
702
|
+
):
|
701
703
|
if isinstance(conversation, str):
|
702
704
|
conversations = [{"role": "user", "content": conversation}]
|
703
705
|
elif isinstance(conversation, list):
|
@@ -708,7 +710,7 @@ class ChatTemplateMixin:
|
|
708
710
|
conversations = conversation
|
709
711
|
else:
|
710
712
|
raise ValueError(
|
711
|
-
"apply_chat_template do not support
|
713
|
+
"apply_chat_template do not support applying batch conversations, "
|
712
714
|
"so you should apply the conversation one by one."
|
713
715
|
)
|
714
716
|
query = self.chat_template.render(
|
@@ -720,7 +722,7 @@ class ChatTemplateMixin:
|
|
720
722
|
|
721
723
|
def encode_chat_inputs(
|
722
724
|
self,
|
723
|
-
conversations: List[
|
725
|
+
conversations: List[List[str]],
|
724
726
|
context_data: Dict[str, Any] = {},
|
725
727
|
**kwargs,
|
726
728
|
):
|
@@ -729,7 +731,7 @@ class ChatTemplateMixin:
|
|
729
731
|
Turn t: sep + bot + query bot + eos
|
730
732
|
|
731
733
|
Args:
|
732
|
-
conversation (List[
|
734
|
+
conversation (List[List[str]]): the conversation of data
|
733
735
|
context_data (Dict[str, Any]): the context data of conversation
|
734
736
|
|
735
737
|
Returns:
|
@@ -749,7 +751,7 @@ class ChatTemplateMixin:
|
|
749
751
|
return query
|
750
752
|
|
751
753
|
def _encode_chat_inputs_paddle(
|
752
|
-
self, conversations: List[
|
754
|
+
self, conversations: List[List[str]], context_data: Dict[str, Any] = {}
|
753
755
|
):
|
754
756
|
context_data = self.chat_template._init_context_data(context_data)
|
755
757
|
# encode system
|
@@ -779,7 +781,7 @@ class ChatTemplateMixin:
|
|
779
781
|
|
780
782
|
def _encode_chat_inputs(
|
781
783
|
self,
|
782
|
-
conversations: List[
|
784
|
+
conversations: List[List[str]],
|
783
785
|
context_data: Dict[str, Any] = {},
|
784
786
|
system: str = None,
|
785
787
|
add_generation_prompt=True,
|
@@ -824,7 +826,9 @@ class ChatTemplateMixin:
|
|
824
826
|
ans.append(ans_roundi)
|
825
827
|
|
826
828
|
non_learnable_parts = self._extract_non_learnable_parts(origin_msg, ans)
|
827
|
-
assert len(non_learnable_parts) == len(
|
829
|
+
assert len(non_learnable_parts) == len(
|
830
|
+
ans
|
831
|
+
), f"Get non_learnable_parts len: {len(non_learnable_parts)}, but ans len: {len(ans)}."
|
828
832
|
|
829
833
|
conversation_ids = []
|
830
834
|
for i in range(len(non_learnable_parts)):
|
@@ -843,7 +847,7 @@ class ChatTemplateMixin:
|
|
843
847
|
self, origin_msg: List[Dict[str, str]], split_s: List[str]
|
844
848
|
):
|
845
849
|
"""Split the entire chat by specified words. Extract the non-learnable parts."""
|
846
|
-
#
|
850
|
+
# distinguish and replace the special words in original string to an uncompiled form: Like | -> \|
|
847
851
|
regex_pattern = "|".join(map(re.escape, split_s))
|
848
852
|
# splited by replaced specified words
|
849
853
|
non_learnable_parts = re.split(
|
@@ -893,11 +897,11 @@ class ChatTemplateMixin:
|
|
893
897
|
tokenizer.init_chat_template(chat_template_file)
|
894
898
|
return tokenizer
|
895
899
|
|
896
|
-
def init_chat_template(self, chat_template: Union[str,
|
900
|
+
def init_chat_template(self, chat_template: Union[str, dict]):
|
897
901
|
"""init chat_tempalte by file_path or template dict data
|
898
902
|
|
899
903
|
Args:
|
900
|
-
chat_template (str
|
904
|
+
chat_template (str, dict): file_path or template dict data
|
901
905
|
"""
|
902
906
|
if isinstance(chat_template, str):
|
903
907
|
if not os.path.exists(chat_template):
|
@@ -934,8 +938,9 @@ class ChatTemplateMixin:
|
|
934
938
|
logging.info("Chat-template config file saved in " + chat_template_file)
|
935
939
|
|
936
940
|
|
937
|
-
|
938
|
-
|
941
|
+
class PretrainedTokenizer(
|
942
|
+
ChatTemplateMixin, PretrainedTokenizerBase, metaclass=InitTrackerMeta
|
943
|
+
):
|
939
944
|
"""
|
940
945
|
Base class for all tokenizers.
|
941
946
|
|
@@ -992,8 +997,12 @@ class PretrainedTokenizer(ChatTemplateMixin, PretrainedTokenizerBase):
|
|
992
997
|
init_dict.pop("self", None)
|
993
998
|
super(PretrainedTokenizer, self).__init__(**init_dict)
|
994
999
|
|
995
|
-
self.
|
996
|
-
self.added_tokens_decoder
|
1000
|
+
self.added_tokens_decoder: Dict[int, AddedToken] = {}
|
1001
|
+
self.added_tokens_decoder.update(kwargs.pop("added_tokens_decoder", {}))
|
1002
|
+
self.added_tokens_encoder: Dict[str, int] = {
|
1003
|
+
k.content: v for v, k in self.added_tokens_decoder.items()
|
1004
|
+
}
|
1005
|
+
|
997
1006
|
self.unique_no_split_tokens: List[str] = []
|
998
1007
|
self.tokens_trie = Trie()
|
999
1008
|
|
@@ -1091,6 +1100,7 @@ class PretrainedTokenizer(ChatTemplateMixin, PretrainedTokenizerBase):
|
|
1091
1100
|
and self.convert_tokens_to_ids(token)
|
1092
1101
|
== self.convert_tokens_to_ids(self.unk_token)
|
1093
1102
|
and token not in tokens_to_add
|
1103
|
+
and token not in self.added_tokens_encoder.keys()
|
1094
1104
|
):
|
1095
1105
|
tokens_to_add.append(token)
|
1096
1106
|
if self.verbose:
|
@@ -1179,6 +1189,11 @@ class PretrainedTokenizer(ChatTemplateMixin, PretrainedTokenizerBase):
|
|
1179
1189
|
Returns:
|
1180
1190
|
`List[str]`: The list of tokens.
|
1181
1191
|
"""
|
1192
|
+
|
1193
|
+
split_special_tokens = kwargs.pop(
|
1194
|
+
"split_special_tokens", self.split_special_tokens
|
1195
|
+
)
|
1196
|
+
|
1182
1197
|
# Simple mapping string => AddedToken for special tokens with specific tokenization behaviors
|
1183
1198
|
all_special_tokens_extended = dict(
|
1184
1199
|
(str(t), t)
|
@@ -1200,8 +1215,15 @@ class PretrainedTokenizer(ChatTemplateMixin, PretrainedTokenizerBase):
|
|
1200
1215
|
pattern, lambda m: m.groups()[0] or m.groups()[1].lower(), text
|
1201
1216
|
)
|
1202
1217
|
|
1203
|
-
|
1204
|
-
|
1218
|
+
if split_special_tokens:
|
1219
|
+
no_split_token = []
|
1220
|
+
tokens = [text]
|
1221
|
+
else:
|
1222
|
+
no_split_token = set(
|
1223
|
+
self.unique_no_split_tokens
|
1224
|
+
) # don't split on any of the added tokens
|
1225
|
+
# "This is something<special_token_1> else"
|
1226
|
+
tokens = self.tokens_trie.split(text)
|
1205
1227
|
|
1206
1228
|
# ["This is something", "<special_token_1>", " else"]
|
1207
1229
|
for i, token in enumerate(tokens):
|
@@ -1286,7 +1308,9 @@ class PretrainedTokenizer(ChatTemplateMixin, PretrainedTokenizerBase):
|
|
1286
1308
|
def convert_ids_to_tokens(self, ids, skip_special_tokens=False):
|
1287
1309
|
if isinstance(ids, int):
|
1288
1310
|
if ids in self.added_tokens_decoder:
|
1289
|
-
|
1311
|
+
token = self.added_tokens_decoder[ids]
|
1312
|
+
token = token.content if isinstance(token, AddedToken) else token
|
1313
|
+
return token
|
1290
1314
|
else:
|
1291
1315
|
return self._convert_id_to_token(ids)
|
1292
1316
|
tokens = []
|
@@ -1295,7 +1319,9 @@ class PretrainedTokenizer(ChatTemplateMixin, PretrainedTokenizerBase):
|
|
1295
1319
|
if skip_special_tokens and index in self.all_special_ids:
|
1296
1320
|
continue
|
1297
1321
|
if index in self.added_tokens_decoder:
|
1298
|
-
|
1322
|
+
token = self.added_tokens_decoder[index]
|
1323
|
+
token = token.content if isinstance(token, AddedToken) else token
|
1324
|
+
tokens.append(token)
|
1299
1325
|
else:
|
1300
1326
|
tokens.append(self._convert_id_to_token(index))
|
1301
1327
|
return tokens
|
@@ -1427,6 +1453,7 @@ class PretrainedTokenizer(ChatTemplateMixin, PretrainedTokenizerBase):
|
|
1427
1453
|
stride: int = 0,
|
1428
1454
|
is_split_into_words: bool = False,
|
1429
1455
|
pad_to_multiple_of: Optional[int] = None,
|
1456
|
+
padding_side: Optional[Literal["right", "left"]] = None,
|
1430
1457
|
return_tensors: Optional[Union[str, TensorType]] = None,
|
1431
1458
|
return_position_ids: Optional[bool] = None,
|
1432
1459
|
return_token_type_ids: Optional[bool] = None,
|
@@ -1491,6 +1518,7 @@ class PretrainedTokenizer(ChatTemplateMixin, PretrainedTokenizerBase):
|
|
1491
1518
|
max_length=max_length,
|
1492
1519
|
stride=stride,
|
1493
1520
|
pad_to_multiple_of=pad_to_multiple_of,
|
1521
|
+
padding_side=padding_side,
|
1494
1522
|
return_tensors=return_tensors,
|
1495
1523
|
prepend_batch_axis=True,
|
1496
1524
|
return_position_ids=return_position_ids,
|
@@ -1521,6 +1549,7 @@ class PretrainedTokenizer(ChatTemplateMixin, PretrainedTokenizerBase):
|
|
1521
1549
|
stride: int = 0,
|
1522
1550
|
is_split_into_words: bool = False,
|
1523
1551
|
pad_to_multiple_of: Optional[int] = None,
|
1552
|
+
padding_side: Optional[Literal["right", "left"]] = None,
|
1524
1553
|
return_position_ids: Optional[bool] = None,
|
1525
1554
|
return_tensors: Optional[Union[str, TensorType]] = None,
|
1526
1555
|
return_token_type_ids: Optional[bool] = None,
|
@@ -1606,6 +1635,7 @@ class PretrainedTokenizer(ChatTemplateMixin, PretrainedTokenizerBase):
|
|
1606
1635
|
max_length=max_length,
|
1607
1636
|
stride=stride,
|
1608
1637
|
pad_to_multiple_of=pad_to_multiple_of,
|
1638
|
+
padding_side=padding_side,
|
1609
1639
|
return_position_ids=return_position_ids,
|
1610
1640
|
return_attention_mask=return_attention_mask,
|
1611
1641
|
return_token_type_ids=return_token_type_ids,
|
@@ -1630,6 +1660,7 @@ class PretrainedTokenizer(ChatTemplateMixin, PretrainedTokenizerBase):
|
|
1630
1660
|
max_length: Optional[int] = None,
|
1631
1661
|
stride: int = 0,
|
1632
1662
|
pad_to_multiple_of: Optional[int] = None,
|
1663
|
+
padding_side: Optional[Literal["right", "left"]] = None,
|
1633
1664
|
return_position_ids: Optional[bool] = None,
|
1634
1665
|
return_tensors: Optional[str] = None,
|
1635
1666
|
return_token_type_ids: Optional[bool] = None,
|
@@ -1707,7 +1738,7 @@ class PretrainedTokenizer(ChatTemplateMixin, PretrainedTokenizerBase):
|
|
1707
1738
|
[0] * len(pair_ids) if pair else []
|
1708
1739
|
)
|
1709
1740
|
encoded_inputs["offset_mapping"] = offset_mapping
|
1710
|
-
# Build output
|
1741
|
+
# Build output dictionary
|
1711
1742
|
encoded_inputs["input_ids"] = sequence
|
1712
1743
|
if return_token_type_ids:
|
1713
1744
|
encoded_inputs["token_type_ids"] = token_type_ids
|
@@ -1758,6 +1789,7 @@ class PretrainedTokenizer(ChatTemplateMixin, PretrainedTokenizerBase):
|
|
1758
1789
|
max_length=max_length,
|
1759
1790
|
stride=stride,
|
1760
1791
|
pad_to_multiple_of=None, # we pad in batch afterward
|
1792
|
+
padding_side=padding_side, # we pad in batch afterward
|
1761
1793
|
return_position_ids=return_position_ids, # we pad in batch afterward
|
1762
1794
|
return_attention_mask=False, # we pad in batch afterward
|
1763
1795
|
return_token_type_ids=return_token_type_ids,
|
@@ -1780,6 +1812,7 @@ class PretrainedTokenizer(ChatTemplateMixin, PretrainedTokenizerBase):
|
|
1780
1812
|
padding=padding_strategy.value,
|
1781
1813
|
max_length=max_length,
|
1782
1814
|
pad_to_multiple_of=pad_to_multiple_of,
|
1815
|
+
padding_side=padding_side,
|
1783
1816
|
return_attention_mask=return_attention_mask,
|
1784
1817
|
)
|
1785
1818
|
if return_dict:
|
@@ -2028,31 +2061,6 @@ class PretrainedTokenizer(ChatTemplateMixin, PretrainedTokenizerBase):
|
|
2028
2061
|
else:
|
2029
2062
|
return text
|
2030
2063
|
|
2031
|
-
def decode_token(
|
2032
|
-
self,
|
2033
|
-
all_input_ids: List[int],
|
2034
|
-
prefix_offset: int = 0,
|
2035
|
-
read_offset: int = 0,
|
2036
|
-
) -> Tuple[str, int, int]:
|
2037
|
-
"""tokenizer decoding for the streaming generation use case. This method can be overrided for tokenizer that doesn't follow this API"""
|
2038
|
-
# The prefix text is necessary only to defeat cleanup algorithms in the decode
|
2039
|
-
# which decide to add a space or not depending on the surrounding ids.
|
2040
|
-
prefix_text = self.decode(
|
2041
|
-
all_input_ids[prefix_offset:read_offset], skip_special_tokens=False
|
2042
|
-
)
|
2043
|
-
new_text = self.decode(all_input_ids[prefix_offset:], skip_special_tokens=False)
|
2044
|
-
|
2045
|
-
if len(new_text) > len(prefix_text) and not new_text.endswith("�"):
|
2046
|
-
# utf-8 char at the end means it's a potential unfinished byte sequence
|
2047
|
-
# from byte fallback tokenization.
|
2048
|
-
# If it's in the middle, it's probably a real invalid id generated
|
2049
|
-
# by the model
|
2050
|
-
prefix_index = new_text.index(prefix_text)
|
2051
|
-
new_text = new_text[prefix_index + len(prefix_text) :]
|
2052
|
-
return new_text, read_offset, len(all_input_ids)
|
2053
|
-
else:
|
2054
|
-
return "", prefix_offset, read_offset
|
2055
|
-
|
2056
2064
|
|
2057
2065
|
def _is_control(char):
|
2058
2066
|
"""Checks whether `chars` is a control character."""
|
@@ -2100,7 +2108,7 @@ def _is_whitespace(char):
|
|
2100
2108
|
"""
|
2101
2109
|
Checks whether `chars` is a whitespace character.
|
2102
2110
|
"""
|
2103
|
-
# \t, \n, and \r are technically
|
2111
|
+
# \t, \n, and \r are technically control characters but we treat them
|
2104
2112
|
# as whitespace since they are generally considered as such.
|
2105
2113
|
if char == " " or char == "\t" or char == "\n" or char == "\r":
|
2106
2114
|
return True
|
@@ -2128,7 +2136,7 @@ def convert_to_unicode(text):
|
|
2128
2136
|
|
2129
2137
|
def whitespace_tokenize(text):
|
2130
2138
|
"""
|
2131
|
-
Runs basic whitespace cleaning and splitting on a
|
2139
|
+
Runs basic whitespace cleaning and splitting on a piece of text.
|
2132
2140
|
Args:
|
2133
2141
|
text (str): Text to be tokenized.
|
2134
2142
|
Returns:
|