paddlex 3.0.0rc0__py3-none-any.whl → 3.0.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- paddlex/.version +1 -1
- paddlex/__init__.py +17 -34
- paddlex/__main__.py +1 -1
- paddlex/configs/modules/doc_vlm/PP-DocBee-2B.yaml +14 -0
- paddlex/configs/modules/doc_vlm/PP-DocBee-7B.yaml +14 -0
- paddlex/configs/modules/open_vocabulary_detection/YOLO-Worldv2-L.yaml +13 -0
- paddlex/configs/pipelines/anomaly_detection.yaml +1 -1
- paddlex/configs/pipelines/doc_understanding.yaml +9 -0
- paddlex/configs/pipelines/ts_anomaly_detection.yaml +1 -1
- paddlex/configs/pipelines/ts_classification.yaml +1 -1
- paddlex/configs/pipelines/ts_forecast.yaml +1 -1
- paddlex/constants.py +17 -0
- paddlex/engine.py +7 -5
- paddlex/hpip_links.html +23 -11
- paddlex/inference/__init__.py +3 -3
- paddlex/inference/common/__init__.py +1 -1
- paddlex/inference/common/batch_sampler/__init__.py +5 -4
- paddlex/inference/common/batch_sampler/audio_batch_sampler.py +5 -6
- paddlex/inference/common/batch_sampler/base_batch_sampler.py +20 -16
- paddlex/inference/common/batch_sampler/det_3d_batch_sampler.py +4 -7
- paddlex/inference/common/batch_sampler/doc_vlm_batch_sampler.py +64 -0
- paddlex/inference/common/batch_sampler/image_batch_sampler.py +12 -36
- paddlex/inference/common/batch_sampler/ts_batch_sampler.py +9 -10
- paddlex/inference/common/batch_sampler/video_batch_sampler.py +2 -22
- paddlex/inference/common/reader/__init__.py +4 -4
- paddlex/inference/common/reader/audio_reader.py +3 -3
- paddlex/inference/common/reader/det_3d_reader.py +7 -5
- paddlex/inference/common/reader/image_reader.py +16 -12
- paddlex/inference/common/reader/ts_reader.py +3 -2
- paddlex/inference/common/reader/video_reader.py +3 -3
- paddlex/inference/common/result/__init__.py +7 -7
- paddlex/inference/common/result/base_cv_result.py +12 -2
- paddlex/inference/common/result/base_result.py +7 -5
- paddlex/inference/common/result/base_ts_result.py +1 -2
- paddlex/inference/common/result/base_video_result.py +2 -2
- paddlex/inference/common/result/mixin.py +12 -13
- paddlex/inference/models/__init__.py +41 -85
- paddlex/inference/models/anomaly_detection/__init__.py +1 -1
- paddlex/inference/models/anomaly_detection/predictor.py +9 -19
- paddlex/inference/models/anomaly_detection/processors.py +9 -2
- paddlex/inference/models/anomaly_detection/result.py +3 -2
- paddlex/inference/models/base/__init__.py +2 -2
- paddlex/inference/models/base/predictor/__init__.py +1 -2
- paddlex/inference/models/base/predictor/base_predictor.py +284 -39
- paddlex/inference/models/common/__init__.py +6 -15
- paddlex/inference/models/common/static_infer.py +764 -243
- paddlex/inference/models/common/tokenizer/__init__.py +5 -3
- paddlex/inference/models/common/tokenizer/bert_tokenizer.py +1 -1
- paddlex/inference/models/common/tokenizer/clip_tokenizer.py +609 -0
- paddlex/inference/models/common/tokenizer/gpt_tokenizer.py +7 -5
- paddlex/inference/models/common/tokenizer/qwen2_tokenizer.py +432 -0
- paddlex/inference/models/common/tokenizer/tokenizer_utils.py +72 -64
- paddlex/inference/models/common/tokenizer/tokenizer_utils_base.py +337 -121
- paddlex/inference/models/common/tokenizer/utils.py +1 -1
- paddlex/inference/models/common/tokenizer/vocab.py +1 -1
- paddlex/inference/models/common/ts/__init__.py +1 -1
- paddlex/inference/models/common/ts/funcs.py +13 -6
- paddlex/inference/models/common/ts/processors.py +14 -5
- paddlex/inference/models/common/vision/__init__.py +3 -3
- paddlex/inference/models/common/vision/funcs.py +17 -12
- paddlex/inference/models/common/vision/processors.py +61 -46
- paddlex/inference/models/common/vlm/__init__.py +13 -0
- paddlex/inference/models/common/vlm/activations.py +189 -0
- paddlex/inference/models/common/vlm/bert_padding.py +127 -0
- paddlex/inference/models/common/vlm/distributed.py +229 -0
- paddlex/inference/models/common/vlm/flash_attn_utils.py +119 -0
- paddlex/inference/models/common/vlm/generation/__init__.py +34 -0
- paddlex/inference/models/common/vlm/generation/configuration_utils.py +533 -0
- paddlex/inference/models/common/vlm/generation/logits_process.py +730 -0
- paddlex/inference/models/common/vlm/generation/stopping_criteria.py +106 -0
- paddlex/inference/models/common/vlm/generation/utils.py +2162 -0
- paddlex/inference/models/common/vlm/transformers/__init__.py +16 -0
- paddlex/inference/models/common/vlm/transformers/configuration_utils.py +1037 -0
- paddlex/inference/models/common/vlm/transformers/conversion_utils.py +408 -0
- paddlex/inference/models/common/vlm/transformers/model_outputs.py +1612 -0
- paddlex/inference/models/common/vlm/transformers/model_utils.py +2038 -0
- paddlex/inference/models/common/vlm/transformers/utils.py +178 -0
- paddlex/inference/models/common/vlm/utils.py +109 -0
- paddlex/inference/models/doc_vlm/__init__.py +15 -0
- paddlex/inference/models/doc_vlm/modeling/__init__.py +15 -0
- paddlex/inference/models/doc_vlm/modeling/qwen2_vl.py +2600 -0
- paddlex/inference/models/doc_vlm/predictor.py +198 -0
- paddlex/inference/models/doc_vlm/processors/__init__.py +15 -0
- paddlex/inference/models/doc_vlm/processors/common.py +372 -0
- paddlex/inference/models/doc_vlm/processors/qwen2_vl.py +698 -0
- paddlex/inference/models/doc_vlm/result.py +21 -0
- paddlex/inference/models/face_feature/__init__.py +1 -1
- paddlex/inference/models/face_feature/predictor.py +2 -1
- paddlex/inference/models/formula_recognition/__init__.py +1 -1
- paddlex/inference/models/formula_recognition/predictor.py +11 -27
- paddlex/inference/models/formula_recognition/processors.py +35 -19
- paddlex/inference/models/formula_recognition/result.py +19 -12
- paddlex/inference/models/image_classification/__init__.py +1 -1
- paddlex/inference/models/image_classification/predictor.py +9 -19
- paddlex/inference/models/image_classification/processors.py +4 -2
- paddlex/inference/models/image_classification/result.py +4 -3
- paddlex/inference/models/image_feature/__init__.py +1 -1
- paddlex/inference/models/image_feature/predictor.py +9 -19
- paddlex/inference/models/image_feature/processors.py +4 -1
- paddlex/inference/models/image_feature/result.py +2 -3
- paddlex/inference/models/image_multilabel_classification/__init__.py +1 -1
- paddlex/inference/models/image_multilabel_classification/predictor.py +7 -6
- paddlex/inference/models/image_multilabel_classification/processors.py +6 -2
- paddlex/inference/models/image_multilabel_classification/result.py +4 -3
- paddlex/inference/models/image_unwarping/__init__.py +1 -1
- paddlex/inference/models/image_unwarping/predictor.py +8 -16
- paddlex/inference/models/image_unwarping/processors.py +6 -2
- paddlex/inference/models/image_unwarping/result.py +4 -2
- paddlex/inference/models/instance_segmentation/__init__.py +1 -1
- paddlex/inference/models/instance_segmentation/predictor.py +7 -15
- paddlex/inference/models/instance_segmentation/processors.py +4 -7
- paddlex/inference/models/instance_segmentation/result.py +11 -10
- paddlex/inference/models/keypoint_detection/__init__.py +1 -1
- paddlex/inference/models/keypoint_detection/predictor.py +2 -3
- paddlex/inference/models/keypoint_detection/processors.py +11 -3
- paddlex/inference/models/keypoint_detection/result.py +9 -4
- paddlex/inference/models/{3d_bev_detection → m_3d_bev_detection}/__init__.py +1 -1
- paddlex/inference/models/{3d_bev_detection → m_3d_bev_detection}/predictor.py +15 -26
- paddlex/inference/models/{3d_bev_detection → m_3d_bev_detection}/processors.py +26 -14
- paddlex/inference/models/{3d_bev_detection → m_3d_bev_detection}/result.py +15 -12
- paddlex/inference/models/{3d_bev_detection → m_3d_bev_detection}/visualizer_3d.py +77 -39
- paddlex/inference/models/multilingual_speech_recognition/__init__.py +1 -1
- paddlex/inference/models/multilingual_speech_recognition/predictor.py +11 -15
- paddlex/inference/models/multilingual_speech_recognition/processors.py +45 -53
- paddlex/inference/models/multilingual_speech_recognition/result.py +1 -1
- paddlex/inference/models/object_detection/__init__.py +1 -1
- paddlex/inference/models/object_detection/predictor.py +6 -12
- paddlex/inference/models/object_detection/processors.py +36 -31
- paddlex/inference/models/object_detection/result.py +5 -4
- paddlex/inference/models/object_detection/utils.py +1 -1
- paddlex/inference/models/open_vocabulary_detection/__init__.py +1 -1
- paddlex/inference/models/open_vocabulary_detection/predictor.py +31 -14
- paddlex/inference/models/open_vocabulary_detection/processors/__init__.py +3 -2
- paddlex/inference/models/open_vocabulary_detection/processors/common.py +114 -0
- paddlex/inference/models/open_vocabulary_detection/processors/groundingdino_processors.py +19 -8
- paddlex/inference/models/open_vocabulary_detection/processors/yoloworld_processors.py +209 -0
- paddlex/inference/models/open_vocabulary_segmentation/__init__.py +1 -1
- paddlex/inference/models/open_vocabulary_segmentation/predictor.py +6 -13
- paddlex/inference/models/open_vocabulary_segmentation/processors/__init__.py +1 -1
- paddlex/inference/models/open_vocabulary_segmentation/processors/sam_processer.py +12 -12
- paddlex/inference/models/open_vocabulary_segmentation/results/__init__.py +1 -1
- paddlex/inference/models/open_vocabulary_segmentation/results/sam_result.py +11 -9
- paddlex/inference/models/semantic_segmentation/__init__.py +1 -1
- paddlex/inference/models/semantic_segmentation/predictor.py +9 -18
- paddlex/inference/models/semantic_segmentation/processors.py +11 -8
- paddlex/inference/models/semantic_segmentation/result.py +4 -3
- paddlex/inference/models/table_structure_recognition/__init__.py +1 -1
- paddlex/inference/models/table_structure_recognition/predictor.py +8 -18
- paddlex/inference/models/table_structure_recognition/processors.py +23 -29
- paddlex/inference/models/table_structure_recognition/result.py +9 -6
- paddlex/inference/models/text_detection/__init__.py +1 -1
- paddlex/inference/models/text_detection/predictor.py +16 -24
- paddlex/inference/models/text_detection/processors.py +74 -36
- paddlex/inference/models/text_detection/result.py +9 -4
- paddlex/inference/models/text_recognition/__init__.py +1 -1
- paddlex/inference/models/text_recognition/predictor.py +11 -19
- paddlex/inference/models/text_recognition/processors.py +27 -13
- paddlex/inference/models/text_recognition/result.py +3 -2
- paddlex/inference/models/ts_anomaly_detection/__init__.py +1 -1
- paddlex/inference/models/ts_anomaly_detection/predictor.py +12 -17
- paddlex/inference/models/ts_anomaly_detection/processors.py +6 -2
- paddlex/inference/models/ts_anomaly_detection/result.py +21 -10
- paddlex/inference/models/ts_classification/__init__.py +1 -1
- paddlex/inference/models/ts_classification/predictor.py +14 -27
- paddlex/inference/models/ts_classification/processors.py +7 -2
- paddlex/inference/models/ts_classification/result.py +21 -12
- paddlex/inference/models/ts_forecasting/__init__.py +1 -1
- paddlex/inference/models/ts_forecasting/predictor.py +13 -18
- paddlex/inference/models/ts_forecasting/processors.py +12 -3
- paddlex/inference/models/ts_forecasting/result.py +24 -11
- paddlex/inference/models/video_classification/__init__.py +1 -1
- paddlex/inference/models/video_classification/predictor.py +9 -15
- paddlex/inference/models/video_classification/processors.py +24 -24
- paddlex/inference/models/video_classification/result.py +7 -3
- paddlex/inference/models/video_detection/__init__.py +1 -1
- paddlex/inference/models/video_detection/predictor.py +8 -15
- paddlex/inference/models/video_detection/processors.py +24 -11
- paddlex/inference/models/video_detection/result.py +10 -5
- paddlex/inference/pipelines/__init__.py +44 -37
- paddlex/inference/pipelines/anomaly_detection/__init__.py +1 -1
- paddlex/inference/pipelines/anomaly_detection/pipeline.py +16 -6
- paddlex/inference/pipelines/attribute_recognition/__init__.py +1 -1
- paddlex/inference/pipelines/attribute_recognition/pipeline.py +13 -8
- paddlex/inference/pipelines/attribute_recognition/result.py +10 -8
- paddlex/inference/pipelines/base.py +31 -11
- paddlex/inference/pipelines/components/__init__.py +14 -8
- paddlex/inference/pipelines/components/chat_server/__init__.py +1 -1
- paddlex/inference/pipelines/components/chat_server/base.py +2 -2
- paddlex/inference/pipelines/components/chat_server/openai_bot_chat.py +8 -8
- paddlex/inference/pipelines/components/common/__init__.py +5 -4
- paddlex/inference/pipelines/components/common/base_operator.py +2 -1
- paddlex/inference/pipelines/components/common/base_result.py +3 -2
- paddlex/inference/pipelines/components/common/convert_points_and_boxes.py +1 -2
- paddlex/inference/pipelines/components/common/crop_image_regions.py +11 -5
- paddlex/inference/pipelines/components/common/seal_det_warp.py +44 -13
- paddlex/inference/pipelines/components/common/sort_boxes.py +4 -2
- paddlex/inference/pipelines/components/common/warp_image.py +50 -0
- paddlex/inference/pipelines/components/faisser.py +9 -4
- paddlex/inference/pipelines/components/prompt_engineering/__init__.py +2 -2
- paddlex/inference/pipelines/components/prompt_engineering/base.py +2 -2
- paddlex/inference/pipelines/components/prompt_engineering/generate_ensemble_prompt.py +2 -1
- paddlex/inference/pipelines/components/prompt_engineering/generate_kie_prompt.py +2 -2
- paddlex/inference/pipelines/components/retriever/__init__.py +2 -2
- paddlex/inference/pipelines/components/retriever/base.py +18 -16
- paddlex/inference/pipelines/components/retriever/openai_bot_retriever.py +2 -2
- paddlex/inference/pipelines/components/retriever/qianfan_bot_retriever.py +87 -84
- paddlex/inference/pipelines/components/utils/__init__.py +1 -1
- paddlex/inference/pipelines/components/utils/mixin.py +7 -7
- paddlex/inference/pipelines/doc_preprocessor/__init__.py +1 -1
- paddlex/inference/pipelines/doc_preprocessor/pipeline.py +21 -28
- paddlex/inference/pipelines/doc_preprocessor/result.py +5 -10
- paddlex/inference/pipelines/doc_understanding/__init__.py +15 -0
- paddlex/inference/pipelines/doc_understanding/pipeline.py +71 -0
- paddlex/inference/pipelines/face_recognition/__init__.py +1 -1
- paddlex/inference/pipelines/face_recognition/pipeline.py +3 -1
- paddlex/inference/pipelines/face_recognition/result.py +3 -2
- paddlex/inference/pipelines/formula_recognition/__init__.py +1 -1
- paddlex/inference/pipelines/formula_recognition/pipeline.py +22 -16
- paddlex/inference/pipelines/formula_recognition/result.py +20 -19
- paddlex/inference/pipelines/image_classification/__init__.py +1 -1
- paddlex/inference/pipelines/image_classification/pipeline.py +17 -8
- paddlex/inference/pipelines/image_multilabel_classification/__init__.py +1 -1
- paddlex/inference/pipelines/image_multilabel_classification/pipeline.py +18 -9
- paddlex/inference/pipelines/instance_segmentation/__init__.py +1 -1
- paddlex/inference/pipelines/instance_segmentation/pipeline.py +17 -6
- paddlex/inference/pipelines/keypoint_detection/__init__.py +1 -1
- paddlex/inference/pipelines/keypoint_detection/pipeline.py +17 -6
- paddlex/inference/pipelines/layout_parsing/__init__.py +1 -1
- paddlex/inference/pipelines/layout_parsing/pipeline.py +23 -12
- paddlex/inference/pipelines/layout_parsing/pipeline_v2.py +16 -6
- paddlex/inference/pipelines/layout_parsing/result.py +5 -4
- paddlex/inference/pipelines/layout_parsing/result_v2.py +5 -8
- paddlex/inference/pipelines/layout_parsing/utils.py +7 -8
- paddlex/inference/pipelines/{3d_bev_detection → m_3d_bev_detection}/__init__.py +1 -1
- paddlex/inference/pipelines/{3d_bev_detection → m_3d_bev_detection}/pipeline.py +17 -10
- paddlex/inference/pipelines/multilingual_speech_recognition/__init__.py +1 -1
- paddlex/inference/pipelines/multilingual_speech_recognition/pipeline.py +17 -6
- paddlex/inference/pipelines/object_detection/__init__.py +1 -1
- paddlex/inference/pipelines/object_detection/pipeline.py +16 -6
- paddlex/inference/pipelines/ocr/__init__.py +1 -1
- paddlex/inference/pipelines/ocr/pipeline.py +28 -11
- paddlex/inference/pipelines/ocr/result.py +13 -9
- paddlex/inference/pipelines/open_vocabulary_detection/__init__.py +1 -1
- paddlex/inference/pipelines/open_vocabulary_detection/pipeline.py +17 -6
- paddlex/inference/pipelines/open_vocabulary_segmentation/__init__.py +1 -1
- paddlex/inference/pipelines/open_vocabulary_segmentation/pipeline.py +17 -6
- paddlex/inference/pipelines/pp_chatocr/__init__.py +1 -1
- paddlex/inference/pipelines/pp_chatocr/pipeline_base.py +14 -5
- paddlex/inference/pipelines/pp_chatocr/pipeline_v3.py +22 -11
- paddlex/inference/pipelines/pp_chatocr/pipeline_v4.py +31 -13
- paddlex/inference/pipelines/pp_shitu_v2/__init__.py +1 -1
- paddlex/inference/pipelines/pp_shitu_v2/pipeline.py +12 -8
- paddlex/inference/pipelines/pp_shitu_v2/result.py +4 -4
- paddlex/inference/pipelines/rotated_object_detection/__init__.py +1 -1
- paddlex/inference/pipelines/rotated_object_detection/pipeline.py +17 -6
- paddlex/inference/pipelines/seal_recognition/__init__.py +1 -1
- paddlex/inference/pipelines/seal_recognition/pipeline.py +21 -13
- paddlex/inference/pipelines/seal_recognition/result.py +4 -2
- paddlex/inference/pipelines/semantic_segmentation/__init__.py +1 -1
- paddlex/inference/pipelines/semantic_segmentation/pipeline.py +17 -6
- paddlex/inference/pipelines/small_object_detection/__init__.py +1 -1
- paddlex/inference/pipelines/small_object_detection/pipeline.py +17 -6
- paddlex/inference/pipelines/table_recognition/__init__.py +1 -1
- paddlex/inference/pipelines/table_recognition/pipeline.py +41 -25
- paddlex/inference/pipelines/table_recognition/pipeline_v2.py +65 -33
- paddlex/inference/pipelines/table_recognition/result.py +11 -9
- paddlex/inference/pipelines/table_recognition/table_recognition_post_processing.py +12 -8
- paddlex/inference/pipelines/table_recognition/table_recognition_post_processing_v2.py +46 -32
- paddlex/inference/pipelines/table_recognition/utils.py +1 -1
- paddlex/inference/pipelines/ts_anomaly_detection/__init__.py +1 -1
- paddlex/inference/pipelines/ts_anomaly_detection/pipeline.py +16 -6
- paddlex/inference/pipelines/ts_classification/__init__.py +1 -1
- paddlex/inference/pipelines/ts_classification/pipeline.py +16 -6
- paddlex/inference/pipelines/ts_forecasting/__init__.py +1 -1
- paddlex/inference/pipelines/ts_forecasting/pipeline.py +16 -6
- paddlex/inference/pipelines/video_classification/__init__.py +1 -1
- paddlex/inference/pipelines/video_classification/pipeline.py +17 -6
- paddlex/inference/pipelines/video_detection/__init__.py +1 -1
- paddlex/inference/pipelines/video_detection/pipeline.py +20 -7
- paddlex/inference/serving/__init__.py +5 -1
- paddlex/inference/serving/basic_serving/__init__.py +1 -1
- paddlex/inference/serving/basic_serving/_app.py +31 -19
- paddlex/inference/serving/basic_serving/_pipeline_apps/__init__.py +7 -4
- paddlex/inference/serving/basic_serving/_pipeline_apps/_common/__init__.py +1 -1
- paddlex/inference/serving/basic_serving/_pipeline_apps/_common/common.py +7 -3
- paddlex/inference/serving/basic_serving/_pipeline_apps/_common/image_recognition.py +1 -1
- paddlex/inference/serving/basic_serving/_pipeline_apps/_common/ocr.py +7 -2
- paddlex/inference/serving/basic_serving/_pipeline_apps/anomaly_detection.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/doc_preprocessor.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/doc_understanding.py +153 -0
- paddlex/inference/serving/basic_serving/_pipeline_apps/face_recognition.py +16 -13
- paddlex/inference/serving/basic_serving/_pipeline_apps/formula_recognition.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/human_keypoint_detection.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/image_classification.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/image_multilabel_classification.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/instance_segmentation.py +13 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/layout_parsing.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/m_3d_bev_detection.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/multilingual_speech_recognition.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/object_detection.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/ocr.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/open_vocabulary_detection.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/open_vocabulary_segmentation.py +13 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/pedestrian_attribute_recognition.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv3_doc.py +14 -11
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv4_doc.py +16 -13
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_shituv2.py +16 -13
- paddlex/inference/serving/basic_serving/_pipeline_apps/pp_structurev3.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/rotated_object_detection.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/seal_recognition.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/semantic_segmentation.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/small_object_detection.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition_v2.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/ts_anomaly_detection.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/ts_classification.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/ts_forecast.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/vehicle_attribute_recognition.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/video_classification.py +10 -7
- paddlex/inference/serving/basic_serving/_pipeline_apps/video_detection.py +10 -7
- paddlex/inference/serving/basic_serving/_server.py +9 -4
- paddlex/inference/serving/infra/__init__.py +1 -1
- paddlex/inference/serving/infra/config.py +1 -1
- paddlex/inference/serving/infra/models.py +13 -6
- paddlex/inference/serving/infra/storage.py +9 -4
- paddlex/inference/serving/infra/utils.py +37 -9
- paddlex/inference/serving/schemas/__init__.py +1 -1
- paddlex/inference/serving/schemas/anomaly_detection.py +1 -1
- paddlex/inference/serving/schemas/doc_preprocessor.py +1 -1
- paddlex/inference/serving/schemas/doc_understanding.py +78 -0
- paddlex/inference/serving/schemas/face_recognition.py +1 -1
- paddlex/inference/serving/schemas/formula_recognition.py +1 -1
- paddlex/inference/serving/schemas/human_keypoint_detection.py +1 -1
- paddlex/inference/serving/schemas/image_classification.py +1 -1
- paddlex/inference/serving/schemas/image_multilabel_classification.py +1 -1
- paddlex/inference/serving/schemas/instance_segmentation.py +1 -1
- paddlex/inference/serving/schemas/layout_parsing.py +1 -1
- paddlex/inference/serving/schemas/m_3d_bev_detection.py +1 -1
- paddlex/inference/serving/schemas/multilingual_speech_recognition.py +1 -1
- paddlex/inference/serving/schemas/object_detection.py +1 -1
- paddlex/inference/serving/schemas/ocr.py +1 -1
- paddlex/inference/serving/schemas/open_vocabulary_detection.py +1 -1
- paddlex/inference/serving/schemas/open_vocabulary_segmentation.py +1 -1
- paddlex/inference/serving/schemas/pedestrian_attribute_recognition.py +1 -1
- paddlex/inference/serving/schemas/pp_chatocrv3_doc.py +1 -1
- paddlex/inference/serving/schemas/pp_chatocrv4_doc.py +1 -1
- paddlex/inference/serving/schemas/pp_shituv2.py +1 -1
- paddlex/inference/serving/schemas/pp_structurev3.py +1 -1
- paddlex/inference/serving/schemas/rotated_object_detection.py +1 -1
- paddlex/inference/serving/schemas/seal_recognition.py +1 -1
- paddlex/inference/serving/schemas/semantic_segmentation.py +1 -1
- paddlex/inference/serving/schemas/shared/__init__.py +1 -1
- paddlex/inference/serving/schemas/shared/classification.py +1 -1
- paddlex/inference/serving/schemas/shared/image_segmentation.py +1 -1
- paddlex/inference/serving/schemas/shared/object_detection.py +1 -1
- paddlex/inference/serving/schemas/shared/ocr.py +1 -1
- paddlex/inference/serving/schemas/small_object_detection.py +1 -1
- paddlex/inference/serving/schemas/table_recognition.py +1 -1
- paddlex/inference/serving/schemas/table_recognition_v2.py +1 -1
- paddlex/inference/serving/schemas/ts_anomaly_detection.py +1 -1
- paddlex/inference/serving/schemas/ts_classification.py +1 -1
- paddlex/inference/serving/schemas/ts_forecast.py +1 -1
- paddlex/inference/serving/schemas/vehicle_attribute_recognition.py +1 -1
- paddlex/inference/serving/schemas/video_classification.py +1 -1
- paddlex/inference/serving/schemas/video_detection.py +1 -1
- paddlex/inference/utils/__init__.py +1 -1
- paddlex/inference/utils/benchmark.py +332 -179
- paddlex/inference/utils/color_map.py +1 -1
- paddlex/inference/utils/get_pipeline_path.py +1 -1
- paddlex/inference/utils/hpi.py +251 -0
- paddlex/inference/utils/hpi_model_info_collection.json +2252 -0
- paddlex/inference/utils/io/__init__.py +11 -11
- paddlex/inference/utils/io/readers.py +22 -18
- paddlex/inference/utils/io/style.py +21 -14
- paddlex/inference/utils/io/tablepyxl.py +13 -5
- paddlex/inference/utils/io/writers.py +9 -10
- paddlex/inference/utils/model_paths.py +48 -0
- paddlex/inference/utils/{new_ir_blacklist.py → new_ir_blocklist.py} +1 -2
- paddlex/inference/utils/official_models.py +264 -262
- paddlex/inference/utils/pp_option.py +164 -93
- paddlex/inference/utils/trt_blocklist.py +43 -0
- paddlex/inference/utils/trt_config.py +420 -0
- paddlex/model.py +28 -10
- paddlex/modules/__init__.py +57 -80
- paddlex/modules/anomaly_detection/__init__.py +2 -2
- paddlex/modules/anomaly_detection/dataset_checker/__init__.py +2 -3
- paddlex/modules/anomaly_detection/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/anomaly_detection/dataset_checker/dataset_src/analyse_dataset.py +6 -3
- paddlex/modules/anomaly_detection/dataset_checker/dataset_src/check_dataset.py +8 -4
- paddlex/modules/anomaly_detection/dataset_checker/dataset_src/convert_dataset.py +7 -4
- paddlex/modules/anomaly_detection/dataset_checker/dataset_src/split_dataset.py +2 -2
- paddlex/modules/anomaly_detection/dataset_checker/dataset_src/utils/__init__.py +1 -1
- paddlex/modules/anomaly_detection/dataset_checker/dataset_src/utils/visualizer.py +7 -2
- paddlex/modules/anomaly_detection/evaluator.py +1 -1
- paddlex/modules/anomaly_detection/exportor.py +1 -1
- paddlex/modules/anomaly_detection/model_list.py +1 -1
- paddlex/modules/anomaly_detection/trainer.py +3 -4
- paddlex/modules/base/__init__.py +5 -5
- paddlex/modules/base/build_model.py +1 -2
- paddlex/modules/base/dataset_checker/__init__.py +2 -2
- paddlex/modules/base/dataset_checker/dataset_checker.py +4 -4
- paddlex/modules/base/dataset_checker/utils.py +1 -3
- paddlex/modules/base/evaluator.py +8 -8
- paddlex/modules/base/exportor.py +12 -13
- paddlex/modules/base/trainer.py +21 -11
- paddlex/modules/base/utils/__init__.py +13 -0
- paddlex/modules/base/utils/cinn_setting.py +89 -0
- paddlex/modules/base/utils/coco_eval.py +94 -0
- paddlex/modules/base/utils/topk_eval.py +118 -0
- paddlex/modules/doc_vlm/__init__.py +18 -0
- paddlex/modules/doc_vlm/dataset_checker.py +29 -0
- paddlex/modules/doc_vlm/evaluator.py +29 -0
- paddlex/modules/doc_vlm/exportor.py +29 -0
- paddlex/modules/doc_vlm/model_list.py +16 -0
- paddlex/modules/doc_vlm/trainer.py +41 -0
- paddlex/modules/face_recognition/__init__.py +2 -2
- paddlex/modules/face_recognition/dataset_checker/__init__.py +2 -2
- paddlex/modules/face_recognition/dataset_checker/dataset_src/__init__.py +1 -1
- paddlex/modules/face_recognition/dataset_checker/dataset_src/check_dataset.py +3 -5
- paddlex/modules/face_recognition/dataset_checker/dataset_src/utils/__init__.py +1 -1
- paddlex/modules/face_recognition/dataset_checker/dataset_src/utils/visualizer.py +2 -5
- paddlex/modules/face_recognition/evaluator.py +1 -1
- paddlex/modules/face_recognition/exportor.py +1 -1
- paddlex/modules/face_recognition/model_list.py +1 -1
- paddlex/modules/face_recognition/trainer.py +1 -1
- paddlex/modules/formula_recognition/__init__.py +2 -2
- paddlex/modules/formula_recognition/dataset_checker/__init__.py +3 -3
- paddlex/modules/formula_recognition/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/formula_recognition/dataset_checker/dataset_src/analyse_dataset.py +13 -12
- paddlex/modules/formula_recognition/dataset_checker/dataset_src/check_dataset.py +2 -6
- paddlex/modules/formula_recognition/dataset_checker/dataset_src/convert_dataset.py +11 -10
- paddlex/modules/formula_recognition/dataset_checker/dataset_src/split_dataset.py +1 -2
- paddlex/modules/formula_recognition/evaluator.py +1 -1
- paddlex/modules/formula_recognition/exportor.py +1 -1
- paddlex/modules/formula_recognition/model_list.py +1 -1
- paddlex/modules/formula_recognition/trainer.py +2 -3
- paddlex/modules/general_recognition/__init__.py +2 -2
- paddlex/modules/general_recognition/dataset_checker/__init__.py +2 -2
- paddlex/modules/general_recognition/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/general_recognition/dataset_checker/dataset_src/analyse_dataset.py +7 -9
- paddlex/modules/general_recognition/dataset_checker/dataset_src/check_dataset.py +4 -5
- paddlex/modules/general_recognition/dataset_checker/dataset_src/convert_dataset.py +6 -5
- paddlex/modules/general_recognition/dataset_checker/dataset_src/split_dataset.py +1 -1
- paddlex/modules/general_recognition/dataset_checker/dataset_src/utils/__init__.py +1 -1
- paddlex/modules/general_recognition/dataset_checker/dataset_src/utils/visualizer.py +2 -5
- paddlex/modules/general_recognition/evaluator.py +1 -1
- paddlex/modules/general_recognition/exportor.py +1 -1
- paddlex/modules/general_recognition/model_list.py +1 -1
- paddlex/modules/general_recognition/trainer.py +1 -1
- paddlex/modules/image_classification/__init__.py +2 -2
- paddlex/modules/image_classification/dataset_checker/__init__.py +2 -2
- paddlex/modules/image_classification/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/image_classification/dataset_checker/dataset_src/analyse_dataset.py +8 -9
- paddlex/modules/image_classification/dataset_checker/dataset_src/check_dataset.py +4 -3
- paddlex/modules/image_classification/dataset_checker/dataset_src/convert_dataset.py +4 -4
- paddlex/modules/image_classification/dataset_checker/dataset_src/split_dataset.py +1 -1
- paddlex/modules/image_classification/dataset_checker/dataset_src/utils/__init__.py +1 -1
- paddlex/modules/image_classification/dataset_checker/dataset_src/utils/visualizer.py +2 -5
- paddlex/modules/image_classification/evaluator.py +1 -1
- paddlex/modules/image_classification/exportor.py +1 -1
- paddlex/modules/image_classification/model_list.py +1 -1
- paddlex/modules/image_classification/trainer.py +3 -3
- paddlex/modules/image_unwarping/__init__.py +1 -1
- paddlex/modules/image_unwarping/model_list.py +1 -1
- paddlex/modules/instance_segmentation/__init__.py +2 -2
- paddlex/modules/instance_segmentation/dataset_checker/__init__.py +2 -3
- paddlex/modules/instance_segmentation/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/instance_segmentation/dataset_checker/dataset_src/analyse_dataset.py +9 -5
- paddlex/modules/instance_segmentation/dataset_checker/dataset_src/check_dataset.py +8 -5
- paddlex/modules/instance_segmentation/dataset_checker/dataset_src/convert_dataset.py +8 -8
- paddlex/modules/instance_segmentation/dataset_checker/dataset_src/split_dataset.py +7 -4
- paddlex/modules/instance_segmentation/dataset_checker/dataset_src/utils/__init__.py +1 -1
- paddlex/modules/instance_segmentation/dataset_checker/dataset_src/utils/visualizer.py +10 -8
- paddlex/modules/instance_segmentation/evaluator.py +1 -1
- paddlex/modules/instance_segmentation/exportor.py +1 -1
- paddlex/modules/instance_segmentation/model_list.py +1 -1
- paddlex/modules/instance_segmentation/trainer.py +1 -1
- paddlex/modules/keypoint_detection/__init__.py +2 -2
- paddlex/modules/keypoint_detection/dataset_checker/__init__.py +2 -2
- paddlex/modules/keypoint_detection/dataset_checker/dataset_src/__init__.py +1 -1
- paddlex/modules/keypoint_detection/dataset_checker/dataset_src/check_dataset.py +10 -5
- paddlex/modules/keypoint_detection/dataset_checker/dataset_src/utils/__init__.py +1 -1
- paddlex/modules/keypoint_detection/dataset_checker/dataset_src/utils/visualizer.py +8 -3
- paddlex/modules/keypoint_detection/evaluator.py +1 -1
- paddlex/modules/keypoint_detection/exportor.py +1 -1
- paddlex/modules/keypoint_detection/model_list.py +1 -1
- paddlex/modules/keypoint_detection/trainer.py +2 -2
- paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/__init__.py +2 -2
- paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/dataset_checker/__init__.py +3 -3
- paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/dataset_checker/dataset_src/analyse_dataset.py +8 -8
- paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/dataset_checker/dataset_src/check_dataset.py +1 -2
- paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/evaluator.py +1 -1
- paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/exportor.py +1 -1
- paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/model_list.py +1 -1
- paddlex/modules/{3d_bev_detection → m_3d_bev_detection}/trainer.py +5 -7
- paddlex/modules/multilabel_classification/__init__.py +2 -2
- paddlex/modules/multilabel_classification/dataset_checker/__init__.py +2 -2
- paddlex/modules/multilabel_classification/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/multilabel_classification/dataset_checker/dataset_src/analyse_dataset.py +8 -9
- paddlex/modules/multilabel_classification/dataset_checker/dataset_src/check_dataset.py +4 -3
- paddlex/modules/multilabel_classification/dataset_checker/dataset_src/convert_dataset.py +10 -7
- paddlex/modules/multilabel_classification/dataset_checker/dataset_src/split_dataset.py +1 -1
- paddlex/modules/multilabel_classification/dataset_checker/dataset_src/utils/__init__.py +1 -1
- paddlex/modules/multilabel_classification/dataset_checker/dataset_src/utils/visualizer.py +1 -5
- paddlex/modules/multilabel_classification/evaluator.py +1 -1
- paddlex/modules/multilabel_classification/exportor.py +1 -1
- paddlex/modules/multilabel_classification/model_list.py +1 -1
- paddlex/modules/multilabel_classification/trainer.py +3 -3
- paddlex/modules/multilingual_speech_recognition/__init__.py +2 -2
- paddlex/modules/multilingual_speech_recognition/dataset_checker.py +3 -3
- paddlex/modules/multilingual_speech_recognition/evaluator.py +3 -3
- paddlex/modules/multilingual_speech_recognition/exportor.py +3 -3
- paddlex/modules/multilingual_speech_recognition/model_list.py +1 -1
- paddlex/modules/multilingual_speech_recognition/trainer.py +7 -5
- paddlex/modules/object_detection/__init__.py +2 -2
- paddlex/modules/object_detection/dataset_checker/__init__.py +2 -11
- paddlex/modules/object_detection/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/object_detection/dataset_checker/dataset_src/analyse_dataset.py +10 -8
- paddlex/modules/object_detection/dataset_checker/dataset_src/check_dataset.py +10 -5
- paddlex/modules/object_detection/dataset_checker/dataset_src/convert_dataset.py +13 -8
- paddlex/modules/object_detection/dataset_checker/dataset_src/split_dataset.py +8 -4
- paddlex/modules/object_detection/dataset_checker/dataset_src/utils/__init__.py +1 -1
- paddlex/modules/object_detection/dataset_checker/dataset_src/utils/visualizer.py +9 -8
- paddlex/modules/object_detection/evaluator.py +9 -4
- paddlex/modules/object_detection/exportor.py +1 -1
- paddlex/modules/object_detection/model_list.py +1 -1
- paddlex/modules/object_detection/trainer.py +4 -5
- paddlex/modules/open_vocabulary_detection/__init__.py +2 -2
- paddlex/modules/open_vocabulary_detection/dataset_checker.py +3 -3
- paddlex/modules/open_vocabulary_detection/evaluator.py +3 -3
- paddlex/modules/open_vocabulary_detection/exportor.py +3 -3
- paddlex/modules/open_vocabulary_detection/model_list.py +2 -4
- paddlex/modules/open_vocabulary_detection/trainer.py +7 -5
- paddlex/modules/open_vocabulary_segmentation/__init__.py +2 -2
- paddlex/modules/open_vocabulary_segmentation/dataset_checker.py +3 -3
- paddlex/modules/open_vocabulary_segmentation/evaluator.py +3 -3
- paddlex/modules/open_vocabulary_segmentation/exportor.py +3 -3
- paddlex/modules/open_vocabulary_segmentation/model_list.py +1 -1
- paddlex/modules/open_vocabulary_segmentation/trainer.py +7 -5
- paddlex/modules/semantic_segmentation/__init__.py +2 -2
- paddlex/modules/semantic_segmentation/dataset_checker/__init__.py +2 -3
- paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/analyse_dataset.py +6 -3
- paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/check_dataset.py +2 -2
- paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/convert_dataset.py +7 -4
- paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/split_dataset.py +2 -2
- paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/utils/__init__.py +1 -1
- paddlex/modules/semantic_segmentation/dataset_checker/dataset_src/utils/visualizer.py +6 -2
- paddlex/modules/semantic_segmentation/evaluator.py +1 -1
- paddlex/modules/semantic_segmentation/exportor.py +1 -1
- paddlex/modules/semantic_segmentation/model_list.py +1 -1
- paddlex/modules/semantic_segmentation/trainer.py +3 -4
- paddlex/modules/table_recognition/__init__.py +2 -2
- paddlex/modules/table_recognition/dataset_checker/__init__.py +5 -5
- paddlex/modules/table_recognition/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/table_recognition/dataset_checker/dataset_src/analyse_dataset.py +3 -2
- paddlex/modules/table_recognition/dataset_checker/dataset_src/check_dataset.py +8 -7
- paddlex/modules/table_recognition/dataset_checker/dataset_src/split_dataset.py +2 -1
- paddlex/modules/table_recognition/evaluator.py +1 -1
- paddlex/modules/table_recognition/exportor.py +1 -1
- paddlex/modules/table_recognition/model_list.py +1 -1
- paddlex/modules/table_recognition/trainer.py +2 -5
- paddlex/modules/text_detection/__init__.py +2 -2
- paddlex/modules/text_detection/dataset_checker/__init__.py +4 -6
- paddlex/modules/text_detection/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/text_detection/dataset_checker/dataset_src/analyse_dataset.py +12 -9
- paddlex/modules/text_detection/dataset_checker/dataset_src/check_dataset.py +3 -3
- paddlex/modules/text_detection/dataset_checker/dataset_src/split_dataset.py +3 -3
- paddlex/modules/text_detection/evaluator.py +1 -1
- paddlex/modules/text_detection/exportor.py +1 -1
- paddlex/modules/text_detection/model_list.py +1 -1
- paddlex/modules/text_detection/trainer.py +2 -5
- paddlex/modules/text_recognition/__init__.py +2 -2
- paddlex/modules/text_recognition/dataset_checker/__init__.py +4 -5
- paddlex/modules/text_recognition/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/text_recognition/dataset_checker/dataset_src/analyse_dataset.py +13 -12
- paddlex/modules/text_recognition/dataset_checker/dataset_src/check_dataset.py +2 -5
- paddlex/modules/text_recognition/dataset_checker/dataset_src/convert_dataset.py +11 -10
- paddlex/modules/text_recognition/dataset_checker/dataset_src/split_dataset.py +1 -2
- paddlex/modules/text_recognition/evaluator.py +1 -1
- paddlex/modules/text_recognition/exportor.py +1 -1
- paddlex/modules/text_recognition/model_list.py +1 -1
- paddlex/modules/text_recognition/trainer.py +2 -3
- paddlex/modules/ts_anomaly_detection/__init__.py +2 -2
- paddlex/modules/ts_anomaly_detection/dataset_checker/__init__.py +4 -5
- paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/analyse_dataset.py +1 -9
- paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/check_dataset.py +2 -2
- paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/convert_dataset.py +2 -6
- paddlex/modules/ts_anomaly_detection/dataset_checker/dataset_src/split_dataset.py +4 -4
- paddlex/modules/ts_anomaly_detection/evaluator.py +1 -1
- paddlex/modules/ts_anomaly_detection/exportor.py +2 -3
- paddlex/modules/ts_anomaly_detection/model_list.py +1 -1
- paddlex/modules/ts_anomaly_detection/trainer.py +8 -8
- paddlex/modules/ts_classification/__init__.py +2 -2
- paddlex/modules/ts_classification/dataset_checker/__init__.py +4 -5
- paddlex/modules/ts_classification/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/ts_classification/dataset_checker/dataset_src/analyse_dataset.py +8 -5
- paddlex/modules/ts_classification/dataset_checker/dataset_src/check_dataset.py +2 -2
- paddlex/modules/ts_classification/dataset_checker/dataset_src/convert_dataset.py +2 -6
- paddlex/modules/ts_classification/dataset_checker/dataset_src/split_dataset.py +4 -4
- paddlex/modules/ts_classification/evaluator.py +1 -1
- paddlex/modules/ts_classification/exportor.py +2 -3
- paddlex/modules/ts_classification/model_list.py +1 -1
- paddlex/modules/ts_classification/trainer.py +7 -7
- paddlex/modules/ts_forecast/__init__.py +2 -2
- paddlex/modules/ts_forecast/dataset_checker/__init__.py +4 -5
- paddlex/modules/ts_forecast/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/ts_forecast/dataset_checker/dataset_src/analyse_dataset.py +1 -9
- paddlex/modules/ts_forecast/dataset_checker/dataset_src/check_dataset.py +2 -2
- paddlex/modules/ts_forecast/dataset_checker/dataset_src/convert_dataset.py +2 -6
- paddlex/modules/ts_forecast/dataset_checker/dataset_src/split_dataset.py +4 -4
- paddlex/modules/ts_forecast/evaluator.py +1 -1
- paddlex/modules/ts_forecast/exportor.py +2 -3
- paddlex/modules/ts_forecast/model_list.py +1 -1
- paddlex/modules/ts_forecast/trainer.py +7 -7
- paddlex/modules/video_classification/__init__.py +2 -2
- paddlex/modules/video_classification/dataset_checker/__init__.py +2 -2
- paddlex/modules/video_classification/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/video_classification/dataset_checker/dataset_src/analyse_dataset.py +9 -9
- paddlex/modules/video_classification/dataset_checker/dataset_src/check_dataset.py +2 -3
- paddlex/modules/video_classification/dataset_checker/dataset_src/split_dataset.py +1 -1
- paddlex/modules/video_classification/evaluator.py +1 -1
- paddlex/modules/video_classification/exportor.py +1 -1
- paddlex/modules/video_classification/model_list.py +1 -1
- paddlex/modules/video_classification/trainer.py +3 -3
- paddlex/modules/video_detection/__init__.py +2 -2
- paddlex/modules/video_detection/dataset_checker/__init__.py +2 -2
- paddlex/modules/video_detection/dataset_checker/dataset_src/__init__.py +2 -2
- paddlex/modules/video_detection/dataset_checker/dataset_src/analyse_dataset.py +8 -9
- paddlex/modules/video_detection/dataset_checker/dataset_src/check_dataset.py +3 -5
- paddlex/modules/video_detection/evaluator.py +1 -1
- paddlex/modules/video_detection/exportor.py +1 -1
- paddlex/modules/video_detection/model_list.py +1 -1
- paddlex/modules/video_detection/trainer.py +3 -3
- paddlex/ops/__init__.py +5 -2
- paddlex/ops/iou3d_nms/iou3d_cpu.cpp +8 -6
- paddlex/ops/iou3d_nms/iou3d_cpu.h +3 -2
- paddlex/ops/iou3d_nms/iou3d_nms.cpp +8 -6
- paddlex/ops/iou3d_nms/iou3d_nms.h +6 -4
- paddlex/ops/iou3d_nms/iou3d_nms_api.cpp +24 -18
- paddlex/ops/iou3d_nms/iou3d_nms_kernel.cu +9 -7
- paddlex/ops/setup.py +3 -3
- paddlex/ops/voxel/voxelize_op.cc +22 -19
- paddlex/ops/voxel/voxelize_op.cu +25 -25
- paddlex/paddlex_cli.py +86 -75
- paddlex/repo_apis/Paddle3D_api/__init__.py +1 -1
- paddlex/repo_apis/Paddle3D_api/bev_fusion/__init__.py +1 -1
- paddlex/repo_apis/Paddle3D_api/bev_fusion/config.py +1 -1
- paddlex/repo_apis/Paddle3D_api/bev_fusion/model.py +4 -4
- paddlex/repo_apis/Paddle3D_api/bev_fusion/register.py +2 -2
- paddlex/repo_apis/Paddle3D_api/bev_fusion/runner.py +1 -1
- paddlex/repo_apis/Paddle3D_api/pp3d_config.py +3 -2
- paddlex/repo_apis/PaddleClas_api/__init__.py +1 -1
- paddlex/repo_apis/PaddleClas_api/cls/__init__.py +3 -3
- paddlex/repo_apis/PaddleClas_api/cls/config.py +4 -3
- paddlex/repo_apis/PaddleClas_api/cls/model.py +3 -3
- paddlex/repo_apis/PaddleClas_api/cls/register.py +2 -3
- paddlex/repo_apis/PaddleClas_api/cls/runner.py +1 -2
- paddlex/repo_apis/PaddleClas_api/shitu_rec/__init__.py +2 -2
- paddlex/repo_apis/PaddleClas_api/shitu_rec/config.py +2 -2
- paddlex/repo_apis/PaddleClas_api/shitu_rec/model.py +1 -4
- paddlex/repo_apis/PaddleClas_api/shitu_rec/register.py +2 -2
- paddlex/repo_apis/PaddleClas_api/shitu_rec/runner.py +1 -6
- paddlex/repo_apis/PaddleDetection_api/__init__.py +2 -2
- paddlex/repo_apis/PaddleDetection_api/config_helper.py +3 -3
- paddlex/repo_apis/PaddleDetection_api/instance_seg/__init__.py +2 -2
- paddlex/repo_apis/PaddleDetection_api/instance_seg/config.py +2 -3
- paddlex/repo_apis/PaddleDetection_api/instance_seg/model.py +3 -3
- paddlex/repo_apis/PaddleDetection_api/instance_seg/register.py +2 -3
- paddlex/repo_apis/PaddleDetection_api/instance_seg/runner.py +1 -2
- paddlex/repo_apis/PaddleDetection_api/object_det/__init__.py +3 -3
- paddlex/repo_apis/PaddleDetection_api/object_det/config.py +4 -3
- paddlex/repo_apis/PaddleDetection_api/object_det/model.py +5 -6
- paddlex/repo_apis/PaddleDetection_api/object_det/official_categories.py +1 -1
- paddlex/repo_apis/PaddleDetection_api/object_det/register.py +2 -3
- paddlex/repo_apis/PaddleDetection_api/object_det/runner.py +1 -2
- paddlex/repo_apis/PaddleNLP_api/__init__.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/__init__.py +4 -3
- paddlex/repo_apis/PaddleOCR_api/config_utils.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/formula_rec/__init__.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/formula_rec/config.py +4 -3
- paddlex/repo_apis/PaddleOCR_api/formula_rec/model.py +4 -4
- paddlex/repo_apis/PaddleOCR_api/formula_rec/register.py +2 -3
- paddlex/repo_apis/PaddleOCR_api/formula_rec/runner.py +1 -2
- paddlex/repo_apis/PaddleOCR_api/table_rec/__init__.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/table_rec/config.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/table_rec/model.py +3 -3
- paddlex/repo_apis/PaddleOCR_api/table_rec/register.py +2 -3
- paddlex/repo_apis/PaddleOCR_api/table_rec/runner.py +2 -2
- paddlex/repo_apis/PaddleOCR_api/text_det/__init__.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/text_det/config.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/text_det/model.py +3 -3
- paddlex/repo_apis/PaddleOCR_api/text_det/register.py +2 -3
- paddlex/repo_apis/PaddleOCR_api/text_det/runner.py +2 -2
- paddlex/repo_apis/PaddleOCR_api/text_rec/__init__.py +1 -1
- paddlex/repo_apis/PaddleOCR_api/text_rec/config.py +4 -3
- paddlex/repo_apis/PaddleOCR_api/text_rec/model.py +4 -4
- paddlex/repo_apis/PaddleOCR_api/text_rec/register.py +2 -3
- paddlex/repo_apis/PaddleOCR_api/text_rec/runner.py +1 -2
- paddlex/repo_apis/PaddleSeg_api/__init__.py +1 -1
- paddlex/repo_apis/PaddleSeg_api/base_seg_config.py +2 -2
- paddlex/repo_apis/PaddleSeg_api/seg/__init__.py +1 -1
- paddlex/repo_apis/PaddleSeg_api/seg/config.py +3 -6
- paddlex/repo_apis/PaddleSeg_api/seg/model.py +5 -5
- paddlex/repo_apis/PaddleSeg_api/seg/register.py +2 -3
- paddlex/repo_apis/PaddleSeg_api/seg/runner.py +1 -2
- paddlex/repo_apis/PaddleTS_api/__init__.py +4 -3
- paddlex/repo_apis/PaddleTS_api/ts_ad/__init__.py +1 -1
- paddlex/repo_apis/PaddleTS_api/ts_ad/config.py +2 -3
- paddlex/repo_apis/PaddleTS_api/ts_ad/register.py +2 -2
- paddlex/repo_apis/PaddleTS_api/ts_ad/runner.py +2 -2
- paddlex/repo_apis/PaddleTS_api/ts_base/__init__.py +1 -1
- paddlex/repo_apis/PaddleTS_api/ts_base/config.py +2 -4
- paddlex/repo_apis/PaddleTS_api/ts_base/model.py +4 -4
- paddlex/repo_apis/PaddleTS_api/ts_base/runner.py +2 -2
- paddlex/repo_apis/PaddleTS_api/ts_cls/__init__.py +1 -1
- paddlex/repo_apis/PaddleTS_api/ts_cls/config.py +2 -3
- paddlex/repo_apis/PaddleTS_api/ts_cls/register.py +2 -2
- paddlex/repo_apis/PaddleTS_api/ts_cls/runner.py +2 -2
- paddlex/repo_apis/PaddleTS_api/ts_fc/__init__.py +1 -1
- paddlex/repo_apis/PaddleTS_api/ts_fc/config.py +2 -3
- paddlex/repo_apis/PaddleTS_api/ts_fc/register.py +1 -1
- paddlex/repo_apis/PaddleVideo_api/__init__.py +1 -1
- paddlex/repo_apis/PaddleVideo_api/config_utils.py +1 -1
- paddlex/repo_apis/PaddleVideo_api/video_cls/__init__.py +3 -3
- paddlex/repo_apis/PaddleVideo_api/video_cls/config.py +4 -3
- paddlex/repo_apis/PaddleVideo_api/video_cls/model.py +3 -3
- paddlex/repo_apis/PaddleVideo_api/video_cls/register.py +2 -3
- paddlex/repo_apis/PaddleVideo_api/video_cls/runner.py +1 -2
- paddlex/repo_apis/PaddleVideo_api/video_det/__init__.py +3 -3
- paddlex/repo_apis/PaddleVideo_api/video_det/config.py +4 -3
- paddlex/repo_apis/PaddleVideo_api/video_det/model.py +4 -4
- paddlex/repo_apis/PaddleVideo_api/video_det/register.py +2 -3
- paddlex/repo_apis/PaddleVideo_api/video_det/runner.py +1 -2
- paddlex/repo_apis/__init__.py +1 -1
- paddlex/repo_apis/base/__init__.py +4 -5
- paddlex/repo_apis/base/config.py +2 -3
- paddlex/repo_apis/base/model.py +11 -19
- paddlex/repo_apis/base/register.py +1 -1
- paddlex/repo_apis/base/runner.py +11 -12
- paddlex/repo_apis/base/utils/__init__.py +1 -1
- paddlex/repo_apis/base/utils/arg.py +1 -1
- paddlex/repo_apis/base/utils/subprocess.py +1 -1
- paddlex/repo_manager/__init__.py +2 -9
- paddlex/repo_manager/core.py +9 -27
- paddlex/repo_manager/meta.py +37 -31
- paddlex/repo_manager/repo.py +169 -160
- paddlex/repo_manager/utils.py +13 -224
- paddlex/utils/__init__.py +1 -1
- paddlex/utils/cache.py +8 -10
- paddlex/utils/config.py +6 -5
- paddlex/utils/{custom_device_whitelist.py → custom_device_list.py} +29 -199
- paddlex/utils/deps.py +249 -0
- paddlex/utils/device.py +73 -29
- paddlex/utils/download.py +4 -4
- paddlex/utils/env.py +33 -7
- paddlex/utils/errors/__init__.py +1 -1
- paddlex/utils/errors/dataset_checker.py +1 -1
- paddlex/utils/errors/others.py +2 -16
- paddlex/utils/file_interface.py +4 -5
- paddlex/utils/flags.py +19 -12
- paddlex/utils/fonts/__init__.py +2 -1
- paddlex/utils/func_register.py +1 -1
- paddlex/utils/install.py +87 -0
- paddlex/utils/interactive_get_pipeline.py +3 -3
- paddlex/utils/lazy_loader.py +3 -3
- paddlex/utils/logging.py +10 -1
- paddlex/utils/misc.py +5 -5
- paddlex/utils/pipeline_arguments.py +15 -7
- paddlex/utils/result_saver.py +4 -5
- paddlex/utils/subclass_register.py +2 -4
- paddlex/version.py +2 -1
- {paddlex-3.0.0rc0.dist-info → paddlex-3.0.0rc1.dist-info}/METADATA +212 -73
- paddlex-3.0.0rc1.dist-info/RECORD +1068 -0
- {paddlex-3.0.0rc0.dist-info → paddlex-3.0.0rc1.dist-info}/WHEEL +1 -1
- paddlex/inference/models/base/predictor/basic_predictor.py +0 -139
- paddlex/paddle2onnx_requirements.txt +0 -1
- paddlex/repo_manager/requirements.txt +0 -21
- paddlex/serving_requirements.txt +0 -9
- paddlex-3.0.0rc0.dist-info/RECORD +0 -1015
- {paddlex-3.0.0rc0.dist-info → paddlex-3.0.0rc1.dist-info}/entry_points.txt +0 -0
- {paddlex-3.0.0rc0.dist-info → paddlex-3.0.0rc1.dist-info/licenses}/LICENSE +0 -0
- {paddlex-3.0.0rc0.dist-info → paddlex-3.0.0rc1.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
|
|
1
|
-
#
|
1
|
+
# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -13,33 +13,25 @@
|
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
15
|
import bisect
|
16
|
+
import functools
|
17
|
+
import inspect
|
16
18
|
import io
|
17
19
|
import itertools
|
18
20
|
import json
|
19
21
|
import os
|
20
22
|
import re
|
21
|
-
import six
|
22
|
-
import inspect
|
23
23
|
import unicodedata
|
24
|
-
import functools
|
25
24
|
from collections import OrderedDict
|
26
25
|
from dataclasses import asdict, dataclass
|
27
|
-
from
|
26
|
+
from functools import lru_cache
|
27
|
+
from typing import Any, Dict, List, Literal, Optional, Tuple, Union
|
28
28
|
|
29
|
-
import numpy
|
30
29
|
import numpy as np
|
31
|
-
import lazy_paddle as paddle
|
32
|
-
from jinja2 import Template
|
33
|
-
from jinja2.exceptions import TemplateError, TemplateSyntaxError
|
34
|
-
from jinja2.sandbox import ImmutableSandboxedEnvironment
|
35
30
|
|
36
|
-
from .tokenizer_utils_base import CHAT_TEMPLATE_CONFIG_NAME
|
37
31
|
from .....utils import logging
|
38
|
-
|
39
|
-
from functools import lru_cache
|
40
|
-
|
41
|
-
from .vocab import Vocab
|
32
|
+
from .....utils.deps import class_requires_deps, is_dep_available
|
42
33
|
from .tokenizer_utils_base import (
|
34
|
+
CHAT_TEMPLATE_CONFIG_NAME,
|
43
35
|
AddedToken,
|
44
36
|
BatchEncoding,
|
45
37
|
EncodedInput,
|
@@ -54,6 +46,12 @@ from .tokenizer_utils_base import (
|
|
54
46
|
TruncationStrategy,
|
55
47
|
)
|
56
48
|
from .utils import convert_to_dict_message, fn_args_to_dict
|
49
|
+
from .vocab import Vocab
|
50
|
+
|
51
|
+
if is_dep_available("Jinja2"):
|
52
|
+
from jinja2 import Template
|
53
|
+
from jinja2.exceptions import TemplateError, TemplateSyntaxError
|
54
|
+
from jinja2.sandbox import ImmutableSandboxedEnvironment
|
57
55
|
|
58
56
|
__all__ = [
|
59
57
|
"ChatTemplate",
|
@@ -64,6 +62,7 @@ __all__ = [
|
|
64
62
|
]
|
65
63
|
|
66
64
|
|
65
|
+
@class_requires_deps("Jinja2")
|
67
66
|
@dataclass
|
68
67
|
class ChatTemplate:
|
69
68
|
conversation: Union[List[str], None] = None
|
@@ -72,7 +71,7 @@ class ChatTemplate:
|
|
72
71
|
|
73
72
|
@staticmethod
|
74
73
|
@lru_cache()
|
75
|
-
def _compile_jinja_template(chat_template) -> Template:
|
74
|
+
def _compile_jinja_template(chat_template) -> "Template":
|
76
75
|
def raise_exception(message):
|
77
76
|
raise TemplateError(message)
|
78
77
|
|
@@ -204,6 +203,7 @@ def adapt_stale_fwd_patch(self, name, value):
|
|
204
203
|
model compression, we make these patches compatible with the latest forward
|
205
204
|
method.
|
206
205
|
"""
|
206
|
+
|
207
207
|
if name == "forward":
|
208
208
|
# NOTE(guosheng): In dygraph to static, `layer.forward` would be patched
|
209
209
|
# by an instance of `StaticFunction`. And use string compare to avoid to
|
@@ -231,6 +231,8 @@ def adapt_stale_fwd_patch(self, name, value):
|
|
231
231
|
]
|
232
232
|
|
233
233
|
if new_args:
|
234
|
+
import paddle
|
235
|
+
|
234
236
|
if self.__module__.startswith("paddlenlp"):
|
235
237
|
logging.warning(
|
236
238
|
f"The `forward` method of {self.__class__ if isinstance(self, paddle.nn.Layer) else self} is patched and the patch "
|
@@ -641,20 +643,20 @@ class ChatTemplateMixin:
|
|
641
643
|
|
642
644
|
def apply_chat_template(
|
643
645
|
self,
|
644
|
-
conversation: Union[Dict[str, str], str],
|
646
|
+
conversation: Union[List[List[str]], Dict[str, str], str],
|
645
647
|
tokenize: bool = True,
|
646
648
|
context_data: Dict[str, Any] = {},
|
647
649
|
**tokenizer_kwargs,
|
648
|
-
)
|
650
|
+
):
|
649
651
|
"""apply chat_template rules to conversation which should not be batched data
|
650
652
|
|
651
653
|
Args:
|
652
|
-
conversation (List[List[str
|
654
|
+
conversation (List[List[str]] , str): the conversation messages between user and bot
|
653
655
|
context_data (Dict[str, Any]): the context data for chat_template.json
|
654
656
|
tokenize (bool, optional): whether do tokenization. Defaults to True.
|
655
657
|
|
656
658
|
Returns:
|
657
|
-
str | dict[str, Union[
|
659
|
+
str | dict[str, Union[numpy.ndarray, paddle.Tensor]]: return the result of applied data
|
658
660
|
"""
|
659
661
|
if not self.chat_template:
|
660
662
|
raise ValueError(
|
@@ -677,9 +679,9 @@ class ChatTemplateMixin:
|
|
677
679
|
|
678
680
|
def _apply_chat_template_paddle(
|
679
681
|
self,
|
680
|
-
conversation: Union[List[
|
682
|
+
conversation: Union[List[List[str]], str],
|
681
683
|
context_data: Dict[str, Any] = {},
|
682
|
-
)
|
684
|
+
):
|
683
685
|
context_data = self.chat_template._init_context_data(context_data)
|
684
686
|
|
685
687
|
if isinstance(conversation, str):
|
@@ -695,9 +697,9 @@ class ChatTemplateMixin:
|
|
695
697
|
|
696
698
|
def _apply_chat_template(
|
697
699
|
self,
|
698
|
-
conversation: Union[Dict[str, str], str],
|
700
|
+
conversation: Union[List[List[str]], Dict[str, str], str],
|
699
701
|
add_generation_prompt=True,
|
700
|
-
)
|
702
|
+
):
|
701
703
|
if isinstance(conversation, str):
|
702
704
|
conversations = [{"role": "user", "content": conversation}]
|
703
705
|
elif isinstance(conversation, list):
|
@@ -720,7 +722,7 @@ class ChatTemplateMixin:
|
|
720
722
|
|
721
723
|
def encode_chat_inputs(
|
722
724
|
self,
|
723
|
-
conversations: List[
|
725
|
+
conversations: List[List[str]],
|
724
726
|
context_data: Dict[str, Any] = {},
|
725
727
|
**kwargs,
|
726
728
|
):
|
@@ -729,7 +731,7 @@ class ChatTemplateMixin:
|
|
729
731
|
Turn t: sep + bot + query bot + eos
|
730
732
|
|
731
733
|
Args:
|
732
|
-
conversation (List[
|
734
|
+
conversation (List[List[str]]): the conversation of data
|
733
735
|
context_data (Dict[str, Any]): the context data of conversation
|
734
736
|
|
735
737
|
Returns:
|
@@ -749,7 +751,7 @@ class ChatTemplateMixin:
|
|
749
751
|
return query
|
750
752
|
|
751
753
|
def _encode_chat_inputs_paddle(
|
752
|
-
self, conversations: List[
|
754
|
+
self, conversations: List[List[str]], context_data: Dict[str, Any] = {}
|
753
755
|
):
|
754
756
|
context_data = self.chat_template._init_context_data(context_data)
|
755
757
|
# encode system
|
@@ -779,7 +781,7 @@ class ChatTemplateMixin:
|
|
779
781
|
|
780
782
|
def _encode_chat_inputs(
|
781
783
|
self,
|
782
|
-
conversations: List[
|
784
|
+
conversations: List[List[str]],
|
783
785
|
context_data: Dict[str, Any] = {},
|
784
786
|
system: str = None,
|
785
787
|
add_generation_prompt=True,
|
@@ -824,7 +826,9 @@ class ChatTemplateMixin:
|
|
824
826
|
ans.append(ans_roundi)
|
825
827
|
|
826
828
|
non_learnable_parts = self._extract_non_learnable_parts(origin_msg, ans)
|
827
|
-
assert len(non_learnable_parts) == len(
|
829
|
+
assert len(non_learnable_parts) == len(
|
830
|
+
ans
|
831
|
+
), f"Get non_learnable_parts len: {len(non_learnable_parts)}, but ans len: {len(ans)}."
|
828
832
|
|
829
833
|
conversation_ids = []
|
830
834
|
for i in range(len(non_learnable_parts)):
|
@@ -893,11 +897,11 @@ class ChatTemplateMixin:
|
|
893
897
|
tokenizer.init_chat_template(chat_template_file)
|
894
898
|
return tokenizer
|
895
899
|
|
896
|
-
def init_chat_template(self, chat_template: Union[str,
|
900
|
+
def init_chat_template(self, chat_template: Union[str, dict]):
|
897
901
|
"""init chat_tempalte by file_path or template dict data
|
898
902
|
|
899
903
|
Args:
|
900
|
-
chat_template (str
|
904
|
+
chat_template (str, dict): file_path or template dict data
|
901
905
|
"""
|
902
906
|
if isinstance(chat_template, str):
|
903
907
|
if not os.path.exists(chat_template):
|
@@ -934,8 +938,9 @@ class ChatTemplateMixin:
|
|
934
938
|
logging.info("Chat-template config file saved in " + chat_template_file)
|
935
939
|
|
936
940
|
|
937
|
-
|
938
|
-
|
941
|
+
class PretrainedTokenizer(
|
942
|
+
ChatTemplateMixin, PretrainedTokenizerBase, metaclass=InitTrackerMeta
|
943
|
+
):
|
939
944
|
"""
|
940
945
|
Base class for all tokenizers.
|
941
946
|
|
@@ -992,8 +997,12 @@ class PretrainedTokenizer(ChatTemplateMixin, PretrainedTokenizerBase):
|
|
992
997
|
init_dict.pop("self", None)
|
993
998
|
super(PretrainedTokenizer, self).__init__(**init_dict)
|
994
999
|
|
995
|
-
self.
|
996
|
-
self.added_tokens_decoder
|
1000
|
+
self.added_tokens_decoder: Dict[int, AddedToken] = {}
|
1001
|
+
self.added_tokens_decoder.update(kwargs.pop("added_tokens_decoder", {}))
|
1002
|
+
self.added_tokens_encoder: Dict[str, int] = {
|
1003
|
+
k.content: v for v, k in self.added_tokens_decoder.items()
|
1004
|
+
}
|
1005
|
+
|
997
1006
|
self.unique_no_split_tokens: List[str] = []
|
998
1007
|
self.tokens_trie = Trie()
|
999
1008
|
|
@@ -1091,6 +1100,7 @@ class PretrainedTokenizer(ChatTemplateMixin, PretrainedTokenizerBase):
|
|
1091
1100
|
and self.convert_tokens_to_ids(token)
|
1092
1101
|
== self.convert_tokens_to_ids(self.unk_token)
|
1093
1102
|
and token not in tokens_to_add
|
1103
|
+
and token not in self.added_tokens_encoder.keys()
|
1094
1104
|
):
|
1095
1105
|
tokens_to_add.append(token)
|
1096
1106
|
if self.verbose:
|
@@ -1179,6 +1189,11 @@ class PretrainedTokenizer(ChatTemplateMixin, PretrainedTokenizerBase):
|
|
1179
1189
|
Returns:
|
1180
1190
|
`List[str]`: The list of tokens.
|
1181
1191
|
"""
|
1192
|
+
|
1193
|
+
split_special_tokens = kwargs.pop(
|
1194
|
+
"split_special_tokens", self.split_special_tokens
|
1195
|
+
)
|
1196
|
+
|
1182
1197
|
# Simple mapping string => AddedToken for special tokens with specific tokenization behaviors
|
1183
1198
|
all_special_tokens_extended = dict(
|
1184
1199
|
(str(t), t)
|
@@ -1200,8 +1215,15 @@ class PretrainedTokenizer(ChatTemplateMixin, PretrainedTokenizerBase):
|
|
1200
1215
|
pattern, lambda m: m.groups()[0] or m.groups()[1].lower(), text
|
1201
1216
|
)
|
1202
1217
|
|
1203
|
-
|
1204
|
-
|
1218
|
+
if split_special_tokens:
|
1219
|
+
no_split_token = []
|
1220
|
+
tokens = [text]
|
1221
|
+
else:
|
1222
|
+
no_split_token = set(
|
1223
|
+
self.unique_no_split_tokens
|
1224
|
+
) # don't split on any of the added tokens
|
1225
|
+
# "This is something<special_token_1> else"
|
1226
|
+
tokens = self.tokens_trie.split(text)
|
1205
1227
|
|
1206
1228
|
# ["This is something", "<special_token_1>", " else"]
|
1207
1229
|
for i, token in enumerate(tokens):
|
@@ -1286,7 +1308,9 @@ class PretrainedTokenizer(ChatTemplateMixin, PretrainedTokenizerBase):
|
|
1286
1308
|
def convert_ids_to_tokens(self, ids, skip_special_tokens=False):
|
1287
1309
|
if isinstance(ids, int):
|
1288
1310
|
if ids in self.added_tokens_decoder:
|
1289
|
-
|
1311
|
+
token = self.added_tokens_decoder[ids]
|
1312
|
+
token = token.content if isinstance(token, AddedToken) else token
|
1313
|
+
return token
|
1290
1314
|
else:
|
1291
1315
|
return self._convert_id_to_token(ids)
|
1292
1316
|
tokens = []
|
@@ -1295,7 +1319,9 @@ class PretrainedTokenizer(ChatTemplateMixin, PretrainedTokenizerBase):
|
|
1295
1319
|
if skip_special_tokens and index in self.all_special_ids:
|
1296
1320
|
continue
|
1297
1321
|
if index in self.added_tokens_decoder:
|
1298
|
-
|
1322
|
+
token = self.added_tokens_decoder[index]
|
1323
|
+
token = token.content if isinstance(token, AddedToken) else token
|
1324
|
+
tokens.append(token)
|
1299
1325
|
else:
|
1300
1326
|
tokens.append(self._convert_id_to_token(index))
|
1301
1327
|
return tokens
|
@@ -1427,6 +1453,7 @@ class PretrainedTokenizer(ChatTemplateMixin, PretrainedTokenizerBase):
|
|
1427
1453
|
stride: int = 0,
|
1428
1454
|
is_split_into_words: bool = False,
|
1429
1455
|
pad_to_multiple_of: Optional[int] = None,
|
1456
|
+
padding_side: Optional[Literal["right", "left"]] = None,
|
1430
1457
|
return_tensors: Optional[Union[str, TensorType]] = None,
|
1431
1458
|
return_position_ids: Optional[bool] = None,
|
1432
1459
|
return_token_type_ids: Optional[bool] = None,
|
@@ -1491,6 +1518,7 @@ class PretrainedTokenizer(ChatTemplateMixin, PretrainedTokenizerBase):
|
|
1491
1518
|
max_length=max_length,
|
1492
1519
|
stride=stride,
|
1493
1520
|
pad_to_multiple_of=pad_to_multiple_of,
|
1521
|
+
padding_side=padding_side,
|
1494
1522
|
return_tensors=return_tensors,
|
1495
1523
|
prepend_batch_axis=True,
|
1496
1524
|
return_position_ids=return_position_ids,
|
@@ -1521,6 +1549,7 @@ class PretrainedTokenizer(ChatTemplateMixin, PretrainedTokenizerBase):
|
|
1521
1549
|
stride: int = 0,
|
1522
1550
|
is_split_into_words: bool = False,
|
1523
1551
|
pad_to_multiple_of: Optional[int] = None,
|
1552
|
+
padding_side: Optional[Literal["right", "left"]] = None,
|
1524
1553
|
return_position_ids: Optional[bool] = None,
|
1525
1554
|
return_tensors: Optional[Union[str, TensorType]] = None,
|
1526
1555
|
return_token_type_ids: Optional[bool] = None,
|
@@ -1606,6 +1635,7 @@ class PretrainedTokenizer(ChatTemplateMixin, PretrainedTokenizerBase):
|
|
1606
1635
|
max_length=max_length,
|
1607
1636
|
stride=stride,
|
1608
1637
|
pad_to_multiple_of=pad_to_multiple_of,
|
1638
|
+
padding_side=padding_side,
|
1609
1639
|
return_position_ids=return_position_ids,
|
1610
1640
|
return_attention_mask=return_attention_mask,
|
1611
1641
|
return_token_type_ids=return_token_type_ids,
|
@@ -1630,6 +1660,7 @@ class PretrainedTokenizer(ChatTemplateMixin, PretrainedTokenizerBase):
|
|
1630
1660
|
max_length: Optional[int] = None,
|
1631
1661
|
stride: int = 0,
|
1632
1662
|
pad_to_multiple_of: Optional[int] = None,
|
1663
|
+
padding_side: Optional[Literal["right", "left"]] = None,
|
1633
1664
|
return_position_ids: Optional[bool] = None,
|
1634
1665
|
return_tensors: Optional[str] = None,
|
1635
1666
|
return_token_type_ids: Optional[bool] = None,
|
@@ -1758,6 +1789,7 @@ class PretrainedTokenizer(ChatTemplateMixin, PretrainedTokenizerBase):
|
|
1758
1789
|
max_length=max_length,
|
1759
1790
|
stride=stride,
|
1760
1791
|
pad_to_multiple_of=None, # we pad in batch afterward
|
1792
|
+
padding_side=padding_side, # we pad in batch afterward
|
1761
1793
|
return_position_ids=return_position_ids, # we pad in batch afterward
|
1762
1794
|
return_attention_mask=False, # we pad in batch afterward
|
1763
1795
|
return_token_type_ids=return_token_type_ids,
|
@@ -1780,6 +1812,7 @@ class PretrainedTokenizer(ChatTemplateMixin, PretrainedTokenizerBase):
|
|
1780
1812
|
padding=padding_strategy.value,
|
1781
1813
|
max_length=max_length,
|
1782
1814
|
pad_to_multiple_of=pad_to_multiple_of,
|
1815
|
+
padding_side=padding_side,
|
1783
1816
|
return_attention_mask=return_attention_mask,
|
1784
1817
|
)
|
1785
1818
|
if return_dict:
|
@@ -2028,31 +2061,6 @@ class PretrainedTokenizer(ChatTemplateMixin, PretrainedTokenizerBase):
|
|
2028
2061
|
else:
|
2029
2062
|
return text
|
2030
2063
|
|
2031
|
-
def decode_token(
|
2032
|
-
self,
|
2033
|
-
all_input_ids: List[int],
|
2034
|
-
prefix_offset: int = 0,
|
2035
|
-
read_offset: int = 0,
|
2036
|
-
) -> Tuple[str, int, int]:
|
2037
|
-
"""tokenizer decoding for the streaming generation use case. This method can be overrided for tokenizer that doesn't follow this API"""
|
2038
|
-
# The prefix text is necessary only to defeat cleanup algorithms in the decode
|
2039
|
-
# which decide to add a space or not depending on the surrounding ids.
|
2040
|
-
prefix_text = self.decode(
|
2041
|
-
all_input_ids[prefix_offset:read_offset], skip_special_tokens=False
|
2042
|
-
)
|
2043
|
-
new_text = self.decode(all_input_ids[prefix_offset:], skip_special_tokens=False)
|
2044
|
-
|
2045
|
-
if len(new_text) > len(prefix_text) and not new_text.endswith("�"):
|
2046
|
-
# utf-8 char at the end means it's a potential unfinished byte sequence
|
2047
|
-
# from byte fallback tokenization.
|
2048
|
-
# If it's in the middle, it's probably a real invalid id generated
|
2049
|
-
# by the model
|
2050
|
-
prefix_index = new_text.index(prefix_text)
|
2051
|
-
new_text = new_text[prefix_index + len(prefix_text) :]
|
2052
|
-
return new_text, read_offset, len(all_input_ids)
|
2053
|
-
else:
|
2054
|
-
return "", prefix_offset, read_offset
|
2055
|
-
|
2056
2064
|
|
2057
2065
|
def _is_control(char):
|
2058
2066
|
"""Checks whether `chars` is a control character."""
|