autogluon.multimodal 1.4.1b20251119__tar.gz → 1.5.1b20260112__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {autogluon_multimodal-1.4.1b20251119/src/autogluon.multimodal.egg-info → autogluon_multimodal-1.5.1b20260112}/PKG-INFO +20 -17
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/README.md +6 -3
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/setup.py +8 -5
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/data/__init__.py +2 -2
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/data/dataset_mmlab/multi_image_mix_dataset.py +1 -1
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/data/infer_types.py +85 -29
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/data/preprocess_dataframe.py +36 -36
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/data/template_engine.py +3 -3
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/data/trivial_augmenter.py +2 -3
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/learners/base.py +19 -20
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/learners/ensemble.py +7 -7
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/learners/few_shot_svm.py +7 -9
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/learners/matching.py +9 -9
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/learners/object_detection.py +1 -1
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/learners/semantic_segmentation.py +6 -6
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/models/clip.py +0 -4
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/models/custom_transformer.py +12 -12
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/models/hf_text.py +1 -1
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/models/mmdet_image.py +1 -1
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/models/t_few.py +6 -6
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/models/utils.py +5 -5
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/optim/lit_matcher.py +1 -1
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/optim/lit_mmdet.py +1 -1
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/optim/losses/focal_loss.py +22 -11
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/optim/losses/softmax_losses.py +3 -3
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/optim/metrics/__init__.py +1 -1
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/optim/metrics/ranking_metrics.py +3 -3
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/optim/metrics/utils.py +3 -3
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/optim/utils.py +6 -6
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/utils/__init__.py +6 -6
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/utils/cache.py +6 -6
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/utils/checkpoint.py +3 -1
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/utils/config.py +16 -16
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/utils/download.py +1 -1
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/utils/export.py +1 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/utils/hpo.py +2 -2
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/utils/matcher.py +9 -9
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/utils/mmcv.py +1 -1
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/utils/object_detection.py +3 -3
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/utils/registry.py +1 -1
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/utils/save.py +1 -2
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/utils/visualizer.py +4 -4
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/version.py +1 -1
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112/src/autogluon.multimodal.egg-info}/PKG-INFO +20 -17
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon.multimodal.egg-info/requires.txt +11 -11
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/LICENSE +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/NOTICE +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/setup.cfg +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/__init__.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/cli/__init__.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/cli/prepare_detection_dataset.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/cli/voc2coco.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/__init__.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/data/default.yaml +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/distiller/default.yaml +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/env/default.yaml +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/matcher/default.yaml +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/model/default.yaml +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/optim/default.yaml +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/pretrain/__init__.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/pretrain/detection/__init__.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/pretrain/detection/coco_detection.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/pretrain/detection/default_runtime.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/pretrain/detection/dino/dino-4scale_r50_8xb2-12e_coco.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/pretrain/detection/dino/dino-5scale_swin-l_8xb2-12e_coco.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/pretrain/detection/dino/dino-5scale_swin-l_8xb2-36e_coco.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/pretrain/detection/dino/dino_swinl_tta.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/pretrain/detection/dino/dino_tta.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/pretrain/detection/faster_rcnn/__init__.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/pretrain/detection/faster_rcnn/faster_rcnn_r50_fpn.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/pretrain/detection/schedule_1x.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/pretrain/detection/voc/__init__.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/pretrain/detection/voc/faster_rcnn_r50_fpn_1x_voc0712.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/pretrain/detection/voc/voc0712.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/pretrain/detection/yolox/__init__.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/pretrain/detection/yolox/yolox_l_8xb8-300e_coco.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/pretrain/detection/yolox/yolox_m_8xb8-300e_coco.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/pretrain/detection/yolox/yolox_nano_8xb8-300e_coco.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/pretrain/detection/yolox/yolox_s_8xb8-300e_coco.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/pretrain/detection/yolox/yolox_tiny_8xb8-300e_coco.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/pretrain/detection/yolox/yolox_tta.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/pretrain/detection/yolox/yolox_x_8xb8-300e_coco.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/constants.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/data/collator.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/data/datamodule.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/data/dataset.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/data/dataset_mmlab/__init__.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/data/label_encoder.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/data/mixup.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/data/nlpaug.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/data/process_categorical.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/data/process_document.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/data/process_image.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/data/process_label.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/data/process_mmlab/__init__.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/data/process_mmlab/process_mmdet.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/data/process_mmlab/process_mmlab_base.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/data/process_mmlab/process_mmocr.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/data/process_ner.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/data/process_numerical.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/data/process_semantic_seg_img.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/data/process_text.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/data/randaug.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/data/templates.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/data/utils.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/learners/__init__.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/learners/ner.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/models/__init__.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/models/adaptation_layers.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/models/augmenter.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/models/categorical_mlp.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/models/custom_hf_models/modeling_sam_for_conv_lora.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/models/document_transformer.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/models/ft_transformer.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/models/fusion/__init__.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/models/fusion/base.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/models/fusion/fusion_mlp.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/models/fusion/fusion_ner.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/models/fusion/fusion_transformer.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/models/meta_transformer.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/models/mlp.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/models/mmocr_text_detection.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/models/mmocr_text_recognition.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/models/ner_text.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/models/numerical_mlp.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/models/sam.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/models/timm_image.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/optim/__init__.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/optim/deepspeed.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/optim/lit_distiller.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/optim/lit_module.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/optim/lit_ner.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/optim/lit_semantic_seg.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/optim/losses/__init__.py +2 -2
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/optim/losses/bce_loss.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/optim/losses/lemda_loss.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/optim/losses/rkd_loss.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/optim/losses/structure_loss.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/optim/losses/utils.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/optim/lr/__init__.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/optim/lr/lr_schedulers.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/optim/lr/utils.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/optim/metrics/coverage_metrics.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/optim/metrics/hit_rate_metrics.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/optim/metrics/semantic_seg_metrics.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/predictor.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/utils/colormap.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/utils/device.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/utils/distillation.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/utils/env.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/utils/inference.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/utils/install.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/utils/label_studio.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/utils/load.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/utils/log.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/utils/misc.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/utils/onnx.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/utils/path.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/utils/precision.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/utils/presets.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/utils/problem_types.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/utils/strategy.py +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon.multimodal.egg-info/SOURCES.txt +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon.multimodal.egg-info/dependency_links.txt +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon.multimodal.egg-info/namespace_packages.txt +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon.multimodal.egg-info/top_level.txt +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon.multimodal.egg-info/zip-safe +0 -0
- {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/tests/test_check_style.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: autogluon.multimodal
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.5.1b20260112
|
|
4
4
|
Summary: Fast and Accurate ML in 3 Lines of Code
|
|
5
5
|
Home-page: https://github.com/autogluon/autogluon
|
|
6
6
|
Author: AutoGluon Community
|
|
@@ -23,15 +23,15 @@ Classifier: Operating System :: Microsoft :: Windows
|
|
|
23
23
|
Classifier: Operating System :: POSIX
|
|
24
24
|
Classifier: Operating System :: Unix
|
|
25
25
|
Classifier: Programming Language :: Python :: 3
|
|
26
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
27
26
|
Classifier: Programming Language :: Python :: 3.10
|
|
28
27
|
Classifier: Programming Language :: Python :: 3.11
|
|
29
28
|
Classifier: Programming Language :: Python :: 3.12
|
|
29
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
30
30
|
Classifier: Topic :: Software Development
|
|
31
31
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
32
32
|
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
33
33
|
Classifier: Topic :: Scientific/Engineering :: Image Recognition
|
|
34
|
-
Requires-Python: >=3.
|
|
34
|
+
Requires-Python: >=3.10, <3.14
|
|
35
35
|
Description-Content-Type: text/markdown
|
|
36
36
|
License-File: LICENSE
|
|
37
37
|
License-File: NOTICE
|
|
@@ -42,9 +42,9 @@ Requires-Dist: scikit-learn<1.8.0,>=1.4.0
|
|
|
42
42
|
Requires-Dist: Pillow<12,>=10.0.1
|
|
43
43
|
Requires-Dist: tqdm<5,>=4.38
|
|
44
44
|
Requires-Dist: boto3<2,>=1.10
|
|
45
|
-
Requires-Dist: torch<2.
|
|
46
|
-
Requires-Dist: lightning<2.
|
|
47
|
-
Requires-Dist: transformers[sentencepiece]<4.
|
|
45
|
+
Requires-Dist: torch<2.10,>=2.6
|
|
46
|
+
Requires-Dist: lightning<2.6,>=2.5.1
|
|
47
|
+
Requires-Dist: transformers[sentencepiece]<4.58,>=4.51.0
|
|
48
48
|
Requires-Dist: accelerate<2.0,>=0.34.0
|
|
49
49
|
Requires-Dist: fsspec[http]<=2025.3
|
|
50
50
|
Requires-Dist: requests<3,>=2.30
|
|
@@ -52,14 +52,14 @@ Requires-Dist: jsonschema<4.24,>=4.18
|
|
|
52
52
|
Requires-Dist: seqeval<1.3.0,>=1.2.2
|
|
53
53
|
Requires-Dist: evaluate<0.5.0,>=0.4.0
|
|
54
54
|
Requires-Dist: timm<1.0.7,>=0.9.5
|
|
55
|
-
Requires-Dist: torchvision<0.
|
|
55
|
+
Requires-Dist: torchvision<0.25.0,>=0.21.0
|
|
56
56
|
Requires-Dist: scikit-image<0.26.0,>=0.19.1
|
|
57
57
|
Requires-Dist: text-unidecode<1.4,>=1.3
|
|
58
58
|
Requires-Dist: torchmetrics<1.8,>=1.2.0
|
|
59
59
|
Requires-Dist: omegaconf<2.4.0,>=2.1.1
|
|
60
|
-
Requires-Dist: autogluon.core[raytune]==1.
|
|
61
|
-
Requires-Dist: autogluon.features==1.
|
|
62
|
-
Requires-Dist: autogluon.common==1.
|
|
60
|
+
Requires-Dist: autogluon.core[raytune]==1.5.1b20260112
|
|
61
|
+
Requires-Dist: autogluon.features==1.5.1b20260112
|
|
62
|
+
Requires-Dist: autogluon.common==1.5.1b20260112
|
|
63
63
|
Requires-Dist: pytorch-metric-learning<2.9,>=1.3.0
|
|
64
64
|
Requires-Dist: nlpaug<1.2.0,>=1.1.10
|
|
65
65
|
Requires-Dist: nltk<3.10,>=3.4.5
|
|
@@ -73,11 +73,11 @@ Requires-Dist: pdf2image<1.19,>=1.17.0
|
|
|
73
73
|
Provides-Extra: tests
|
|
74
74
|
Requires-Dist: ruff; extra == "tests"
|
|
75
75
|
Requires-Dist: datasets<3.6.0,>=2.16.0; extra == "tests"
|
|
76
|
-
Requires-Dist: onnx<1.16.2,>=1.13.0; platform_system == "Windows" and extra == "tests"
|
|
77
|
-
Requires-Dist: onnx<1.18.0,>=1.13.0; platform_system != "Windows" and extra == "tests"
|
|
78
|
-
Requires-Dist: onnxruntime<1.22.0,>=1.17.0; extra == "tests"
|
|
79
|
-
Requires-Dist: onnxruntime-gpu<1.22.0,>=1.17.0; (platform_system != "Darwin" and platform_machine != "aarch64") and extra == "tests"
|
|
80
76
|
Requires-Dist: tensorrt<10.9.1,>=8.6.0; (platform_system == "Linux" and python_version < "3.11") and extra == "tests"
|
|
77
|
+
Requires-Dist: onnx!=1.16.2,<1.21.0,>=1.13.0; platform_system == "Windows" and extra == "tests"
|
|
78
|
+
Requires-Dist: onnx<1.21.0,>=1.13.0; platform_system != "Windows" and extra == "tests"
|
|
79
|
+
Requires-Dist: onnxruntime<1.24.0,>=1.17.0; extra == "tests"
|
|
80
|
+
Requires-Dist: onnxruntime-gpu<1.24.0,>=1.17.0; (platform_system != "Darwin" and platform_machine != "aarch64") and extra == "tests"
|
|
81
81
|
Dynamic: author
|
|
82
82
|
Dynamic: classifier
|
|
83
83
|
Dynamic: description
|
|
@@ -100,7 +100,7 @@ Dynamic: summary
|
|
|
100
100
|
|
|
101
101
|
[](https://github.com/autogluon/autogluon/releases)
|
|
102
102
|
[](https://anaconda.org/conda-forge/autogluon)
|
|
103
|
-
[](https://pypi.org/project/autogluon/)
|
|
104
104
|
[](https://pepy.tech/project/autogluon)
|
|
105
105
|
[](./LICENSE)
|
|
106
106
|
[](https://discord.gg/wjUmjqAc2N)
|
|
@@ -117,7 +117,7 @@ AutoGluon, developed by AWS AI, automates machine learning tasks enabling you to
|
|
|
117
117
|
|
|
118
118
|
## 💾 Installation
|
|
119
119
|
|
|
120
|
-
AutoGluon is supported on Python 3.
|
|
120
|
+
AutoGluon is supported on Python 3.10 - 3.13 and is available on Linux, MacOS, and Windows.
|
|
121
121
|
|
|
122
122
|
You can install AutoGluon with:
|
|
123
123
|
|
|
@@ -164,7 +164,10 @@ Below is a curated list of recent tutorials and talks on AutoGluon. A comprehens
|
|
|
164
164
|
- [Benchmarking Multimodal AutoML for Tabular Data with Text Fields](https://datasets-benchmarks-proceedings.neurips.cc/paper/2021/file/9bf31c7ff062936a96d3c8bd1f8f2ff3-Paper-round2.pdf) (*NeurIPS*, 2021) ([BibTeX](CITING.md#autogluonmultimodal))
|
|
165
165
|
- [XTab: Cross-table Pretraining for Tabular Transformers](https://proceedings.mlr.press/v202/zhu23k/zhu23k.pdf) (*ICML*, 2023)
|
|
166
166
|
- [AutoGluon-TimeSeries: AutoML for Probabilistic Time Series Forecasting](https://arxiv.org/abs/2308.05566) (*AutoML Conf*, 2023) ([BibTeX](CITING.md#autogluontimeseries))
|
|
167
|
-
- [TabRepo: A Large Scale Repository of Tabular Model Evaluations and its AutoML Applications](https://arxiv.org/pdf/2311.02971.pdf) (*
|
|
167
|
+
- [TabRepo: A Large Scale Repository of Tabular Model Evaluations and its AutoML Applications](https://arxiv.org/pdf/2311.02971.pdf) (*AutoML Conf*, 2024)
|
|
168
|
+
- [AutoGluon-Multimodal (AutoMM): Supercharging Multimodal AutoML with Foundation Models](https://arxiv.org/pdf/2404.16233) (*AutoML Conf*, 2024) ([BibTeX](CITING.md#autogluonmultimodal))
|
|
169
|
+
- [Multi-layer Stack Ensembles for Time Series Forecasting](https://arxiv.org/abs/2511.15350) (*AutoML Conf*, 2025) ([BibTeX](CITING.md#autogluontimeseries))
|
|
170
|
+
- [Chronos-2: From Univariate to Universal Forecasting](https://arxiv.org/abs/2510.15821) (*Arxiv*, 2025) ([BibTeX](CITING.md#autogluontimeseries))
|
|
168
171
|
|
|
169
172
|
### Articles
|
|
170
173
|
- [AutoGluon-TimeSeries: Every Time Series Forecasting Model In One Library](https://towardsdatascience.com/autogluon-timeseries-every-time-series-forecasting-model-in-one-library-29a3bf6879db) (*Towards Data Science*, Jan 2024)
|
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
|
|
8
8
|
[](https://github.com/autogluon/autogluon/releases)
|
|
9
9
|
[](https://anaconda.org/conda-forge/autogluon)
|
|
10
|
-
[](https://pypi.org/project/autogluon/)
|
|
11
11
|
[](https://pepy.tech/project/autogluon)
|
|
12
12
|
[](./LICENSE)
|
|
13
13
|
[](https://discord.gg/wjUmjqAc2N)
|
|
@@ -24,7 +24,7 @@ AutoGluon, developed by AWS AI, automates machine learning tasks enabling you to
|
|
|
24
24
|
|
|
25
25
|
## 💾 Installation
|
|
26
26
|
|
|
27
|
-
AutoGluon is supported on Python 3.
|
|
27
|
+
AutoGluon is supported on Python 3.10 - 3.13 and is available on Linux, MacOS, and Windows.
|
|
28
28
|
|
|
29
29
|
You can install AutoGluon with:
|
|
30
30
|
|
|
@@ -71,7 +71,10 @@ Below is a curated list of recent tutorials and talks on AutoGluon. A comprehens
|
|
|
71
71
|
- [Benchmarking Multimodal AutoML for Tabular Data with Text Fields](https://datasets-benchmarks-proceedings.neurips.cc/paper/2021/file/9bf31c7ff062936a96d3c8bd1f8f2ff3-Paper-round2.pdf) (*NeurIPS*, 2021) ([BibTeX](CITING.md#autogluonmultimodal))
|
|
72
72
|
- [XTab: Cross-table Pretraining for Tabular Transformers](https://proceedings.mlr.press/v202/zhu23k/zhu23k.pdf) (*ICML*, 2023)
|
|
73
73
|
- [AutoGluon-TimeSeries: AutoML for Probabilistic Time Series Forecasting](https://arxiv.org/abs/2308.05566) (*AutoML Conf*, 2023) ([BibTeX](CITING.md#autogluontimeseries))
|
|
74
|
-
- [TabRepo: A Large Scale Repository of Tabular Model Evaluations and its AutoML Applications](https://arxiv.org/pdf/2311.02971.pdf) (*
|
|
74
|
+
- [TabRepo: A Large Scale Repository of Tabular Model Evaluations and its AutoML Applications](https://arxiv.org/pdf/2311.02971.pdf) (*AutoML Conf*, 2024)
|
|
75
|
+
- [AutoGluon-Multimodal (AutoMM): Supercharging Multimodal AutoML with Foundation Models](https://arxiv.org/pdf/2404.16233) (*AutoML Conf*, 2024) ([BibTeX](CITING.md#autogluonmultimodal))
|
|
76
|
+
- [Multi-layer Stack Ensembles for Time Series Forecasting](https://arxiv.org/abs/2511.15350) (*AutoML Conf*, 2025) ([BibTeX](CITING.md#autogluontimeseries))
|
|
77
|
+
- [Chronos-2: From Univariate to Universal Forecasting](https://arxiv.org/abs/2510.15821) (*Arxiv*, 2025) ([BibTeX](CITING.md#autogluontimeseries))
|
|
75
78
|
|
|
76
79
|
### Articles
|
|
77
80
|
- [AutoGluon-TimeSeries: Every Time Series Forecasting Model In One Library](https://towardsdatascience.com/autogluon-timeseries-every-time-series-forecasting-model-in-one-library-29a3bf6879db) (*Towards Data Science*, Jan 2024)
|
|
@@ -41,7 +41,7 @@ install_requires = [
|
|
|
41
41
|
"seqeval>=1.2.2,<1.3.0",
|
|
42
42
|
"evaluate>=0.4.0,<0.5.0",
|
|
43
43
|
"timm>=0.9.5,<1.0.7",
|
|
44
|
-
"torchvision>=0.21.0,<0.
|
|
44
|
+
"torchvision>=0.21.0,<0.25.0",
|
|
45
45
|
"scikit-image>=0.19.1,<0.26.0",
|
|
46
46
|
"text-unidecode>=1.3,<1.4",
|
|
47
47
|
"torchmetrics>=1.2.0,<1.8",
|
|
@@ -66,11 +66,14 @@ install_requires = ag.get_dependency_version_ranges(install_requires)
|
|
|
66
66
|
tests_require = [
|
|
67
67
|
"ruff",
|
|
68
68
|
"datasets>=2.16.0,<3.6.0",
|
|
69
|
-
"onnx>=1.13.0,<1.16.2;platform_system=='Windows'", # cap at 1.16.1 for issue https://github.com/onnx/onnx/issues/6267
|
|
70
|
-
"onnx>=1.13.0,<1.18.0;platform_system!='Windows'",
|
|
71
|
-
"onnxruntime>=1.17.0,<1.22.0", # install for gpu system due to https://github.com/autogluon/autogluon/issues/3804
|
|
72
|
-
"onnxruntime-gpu>=1.17.0,<1.22.0;platform_system!='Darwin' and platform_machine!='aarch64'",
|
|
73
69
|
"tensorrt>=8.6.0,<10.9.1;platform_system=='Linux' and python_version<'3.11'",
|
|
70
|
+
# Sync ONNX requirements with tabular/setup.py
|
|
71
|
+
"onnx>=1.13.0,!=1.16.2,<1.21.0;platform_system=='Windows'", # exclude 1.16.2 for issue https://github.com/onnx/onnx/issues/6267
|
|
72
|
+
"onnx>=1.13.0,<1.21.0;platform_system!='Windows'",
|
|
73
|
+
# For macOS, there isn't a onnxruntime-gpu package installed with skl2onnx.
|
|
74
|
+
# Therefore, we install onnxruntime explicitly here just for macOS.
|
|
75
|
+
"onnxruntime>=1.17.0,<1.24.0",
|
|
76
|
+
"onnxruntime-gpu>=1.17.0,<1.24.0; platform_system != 'Darwin' and platform_machine != 'aarch64'",
|
|
74
77
|
]
|
|
75
78
|
|
|
76
79
|
extras_require = {"tests": tests_require}
|
|
@@ -3,14 +3,14 @@ from .dataset import BaseDataset
|
|
|
3
3
|
from .dataset_mmlab import MultiImageMixDataset
|
|
4
4
|
from .infer_types import (
|
|
5
5
|
infer_column_types,
|
|
6
|
+
infer_ner_column_type,
|
|
6
7
|
infer_output_shape,
|
|
7
8
|
infer_problem_type,
|
|
8
9
|
infer_rois_column_type,
|
|
9
10
|
is_image_column,
|
|
10
11
|
)
|
|
11
|
-
from .mixup import MixupModule
|
|
12
|
-
from .infer_types import infer_column_types, infer_output_shape, infer_problem_type, is_image_column, infer_ner_column_type
|
|
13
12
|
from .label_encoder import CustomLabelEncoder, NerLabelEncoder
|
|
13
|
+
from .mixup import MixupModule
|
|
14
14
|
from .preprocess_dataframe import MultiModalFeaturePreprocessor
|
|
15
15
|
from .process_categorical import CategoricalProcessor
|
|
16
16
|
from .process_document import DocumentProcessor
|
|
@@ -290,7 +290,7 @@ class Mosaic(BaseTransform):
|
|
|
290
290
|
prob: float = 1.0,
|
|
291
291
|
) -> None:
|
|
292
292
|
assert isinstance(img_scale, tuple)
|
|
293
|
-
assert 0 <= prob <= 1.0, "The probability should be in range [0,1].
|
|
293
|
+
assert 0 <= prob <= 1.0, f"The probability should be in range [0,1]. got {prob}."
|
|
294
294
|
|
|
295
295
|
log_img_scale(img_scale, skip_square=True, shape_order="wh")
|
|
296
296
|
self.img_scale = img_scale
|
|
@@ -304,11 +304,24 @@ def is_document_image_column(
|
|
|
304
304
|
col_name: str,
|
|
305
305
|
image_type: Optional[str] = IMAGE_PATH,
|
|
306
306
|
sample_m: Optional[int] = 10,
|
|
307
|
-
|
|
307
|
+
min_text_len_threshold: Optional[int] = 200,
|
|
308
|
+
text_density_threshold: Optional[float] = 0.001,
|
|
309
|
+
min_line_count: Optional[int] = 3,
|
|
310
|
+
min_document_ratio: Optional[float] = 0.8,
|
|
308
311
|
) -> bool:
|
|
309
312
|
"""
|
|
310
313
|
Identify if a column is a document image column.
|
|
311
314
|
|
|
315
|
+
Document images are images that primarily contain text (e.g., scanned documents,
|
|
316
|
+
screenshots of text, PDFs rendered as images). Regular photographs, maps,
|
|
317
|
+
charts with labels, or images with watermarks/captions should NOT be
|
|
318
|
+
classified as document images.
|
|
319
|
+
|
|
320
|
+
The detection uses multiple heuristics:
|
|
321
|
+
1. Minimum absolute text length (short text like watermarks is ignored)
|
|
322
|
+
2. Text density relative to image size (documents have high text-to-pixel ratio)
|
|
323
|
+
3. Line count (documents typically have multiple lines of text)
|
|
324
|
+
|
|
312
325
|
Parameters
|
|
313
326
|
----------
|
|
314
327
|
data
|
|
@@ -319,46 +332,90 @@ def is_document_image_column(
|
|
|
319
332
|
The image type to check. Set to IMAGE_PATH by default.
|
|
320
333
|
sample_m
|
|
321
334
|
Number of sample images used to check if images are documents images.
|
|
322
|
-
|
|
323
|
-
|
|
335
|
+
min_text_len_threshold
|
|
336
|
+
Minimum text length to even consider an image as a potential document.
|
|
337
|
+
This filters out images with just watermarks or short captions.
|
|
338
|
+
text_density_threshold
|
|
339
|
+
Minimum ratio of (text_characters / image_pixels) to consider as document.
|
|
340
|
+
Documents typically have much higher text density than photos with labels.
|
|
341
|
+
min_line_count
|
|
342
|
+
Minimum number of non-empty text lines expected in a document.
|
|
343
|
+
min_document_ratio
|
|
344
|
+
Minimum ratio of images that must be classified as documents for the
|
|
345
|
+
entire column to be treated as a document column.
|
|
346
|
+
|
|
324
347
|
Returns
|
|
325
348
|
-------
|
|
326
349
|
Whether the column is a document image column.
|
|
327
350
|
"""
|
|
351
|
+
if data.empty:
|
|
352
|
+
return False
|
|
328
353
|
|
|
329
|
-
# TODO: Add support for other types (e.g., pdf) of document.
|
|
330
|
-
|
|
331
|
-
words_len = []
|
|
332
354
|
if len(data) > sample_m:
|
|
333
|
-
# Sample to speed-up type inference
|
|
334
355
|
data = data.sample(n=sample_m, random_state=0)
|
|
335
|
-
|
|
356
|
+
|
|
357
|
+
document_count = 0
|
|
358
|
+
total_processed = 0
|
|
359
|
+
|
|
336
360
|
for images in data:
|
|
337
|
-
|
|
361
|
+
if images is None:
|
|
362
|
+
continue
|
|
363
|
+
|
|
338
364
|
if not isinstance(images, list):
|
|
339
365
|
images = [images]
|
|
366
|
+
|
|
340
367
|
for per_image in images:
|
|
368
|
+
if not isinstance(per_image, str):
|
|
369
|
+
total_processed += 1
|
|
370
|
+
continue
|
|
371
|
+
|
|
341
372
|
try:
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
373
|
+
with PIL.Image.open(per_image) as img:
|
|
374
|
+
width, height = img.size
|
|
375
|
+
total_pixels = width * height
|
|
376
|
+
|
|
377
|
+
ocr_text = pytesseract.image_to_string(img)
|
|
378
|
+
text_length = len(ocr_text.strip())
|
|
379
|
+
|
|
380
|
+
total_processed += 1
|
|
381
|
+
|
|
382
|
+
# Heuristic 1: Minimum absolute text length
|
|
383
|
+
# Filters out watermarks, copyright notices, short captions
|
|
384
|
+
if text_length < min_text_len_threshold:
|
|
385
|
+
continue
|
|
386
|
+
|
|
387
|
+
# Heuristic 2: Text density (characters per pixel)
|
|
388
|
+
# Documents have dense text; photos with small labels don't
|
|
389
|
+
text_density = text_length / total_pixels
|
|
390
|
+
if text_density < text_density_threshold:
|
|
391
|
+
continue
|
|
392
|
+
|
|
393
|
+
# Heuristic 3: Line count
|
|
394
|
+
# Documents have multiple lines; watermarks are 1-2 lines
|
|
395
|
+
lines = [line for line in ocr_text.split("\n") if line.strip()]
|
|
396
|
+
if len(lines) < min_line_count:
|
|
397
|
+
continue
|
|
398
|
+
|
|
399
|
+
# Passed all heuristics - this looks like a document
|
|
400
|
+
document_count += 1
|
|
401
|
+
|
|
346
402
|
except Exception as e:
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
break
|
|
350
|
-
success = True
|
|
351
|
-
if not success:
|
|
352
|
-
failure_count += 1
|
|
403
|
+
logger.debug(f"Failed to process image {per_image}: {e}")
|
|
404
|
+
total_processed += 1
|
|
353
405
|
|
|
354
|
-
if
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
406
|
+
if total_processed == 0:
|
|
407
|
+
return False
|
|
408
|
+
|
|
409
|
+
document_ratio = document_count / total_processed
|
|
410
|
+
is_document_column = document_ratio >= min_document_ratio
|
|
411
|
+
|
|
412
|
+
logger.debug(
|
|
413
|
+
f"Column '{col_name}': {document_count}/{total_processed} images "
|
|
414
|
+
f"({document_ratio:.1%}) classified as documents. "
|
|
415
|
+
f"Column type: {'document' if is_document_column else 'regular'} images."
|
|
416
|
+
)
|
|
417
|
+
|
|
418
|
+
return is_document_column
|
|
362
419
|
|
|
363
420
|
|
|
364
421
|
def is_text_column(data: pd.Series) -> bool:
|
|
@@ -769,8 +826,7 @@ def infer_output_shape(
|
|
|
769
826
|
if problem_type in [BINARY, MULTICLASS, REGRESSION, CLASSIFICATION]:
|
|
770
827
|
class_num = len(data[label_column].unique())
|
|
771
828
|
err_msg = (
|
|
772
|
-
f"Problem type is '{problem_type}' while the number of "
|
|
773
|
-
f"unique values in the label column is {class_num}."
|
|
829
|
+
f"Problem type is '{problem_type}' while the number of unique values in the label column is {class_num}."
|
|
774
830
|
)
|
|
775
831
|
if problem_type == BINARY:
|
|
776
832
|
if class_num != 2:
|
|
@@ -456,9 +456,9 @@ class MultiModalFeaturePreprocessor(TransformerMixin, BaseEstimator):
|
|
|
456
456
|
text_types
|
|
457
457
|
The column types of these text data, e.g., text or text_identifier.
|
|
458
458
|
"""
|
|
459
|
-
assert (
|
|
460
|
-
|
|
461
|
-
)
|
|
459
|
+
assert self._fit_called or self._fit_x_called, (
|
|
460
|
+
"You will need to first call preprocessor.fit_x() before calling preprocessor.transform_text."
|
|
461
|
+
)
|
|
462
462
|
text_features = {}
|
|
463
463
|
text_types = {}
|
|
464
464
|
for col_name in self._text_feature_names:
|
|
@@ -508,9 +508,9 @@ class MultiModalFeaturePreprocessor(TransformerMixin, BaseEstimator):
|
|
|
508
508
|
image_types
|
|
509
509
|
The column types of these image data, e.g., image_path or image_identifier.
|
|
510
510
|
"""
|
|
511
|
-
assert (
|
|
512
|
-
|
|
513
|
-
)
|
|
511
|
+
assert self._fit_called or self._fit_x_called, (
|
|
512
|
+
"You will need to first call preprocessor.fit_x() before calling preprocessor.transform_rois."
|
|
513
|
+
)
|
|
514
514
|
|
|
515
515
|
x = self.transform_image(df)
|
|
516
516
|
ret_data = x[0]
|
|
@@ -552,9 +552,9 @@ class MultiModalFeaturePreprocessor(TransformerMixin, BaseEstimator):
|
|
|
552
552
|
image_types
|
|
553
553
|
The column types of these image data, e.g., image_path or image_identifier.
|
|
554
554
|
"""
|
|
555
|
-
assert (
|
|
556
|
-
|
|
557
|
-
)
|
|
555
|
+
assert self._fit_called or self._fit_x_called, (
|
|
556
|
+
"You will need to first call preprocessor.fit_x() before calling preprocessor.transform_semantic_segmentation_img."
|
|
557
|
+
)
|
|
558
558
|
|
|
559
559
|
ret_data = {}
|
|
560
560
|
ret_type = {}
|
|
@@ -597,9 +597,9 @@ class MultiModalFeaturePreprocessor(TransformerMixin, BaseEstimator):
|
|
|
597
597
|
All the image data stored in a dictionary.
|
|
598
598
|
image_types
|
|
599
599
|
The column types of these image data, e.g., image_path, image_bytearray or image_identifier."""
|
|
600
|
-
assert (
|
|
601
|
-
|
|
602
|
-
)
|
|
600
|
+
assert self._fit_called or self._fit_x_called, (
|
|
601
|
+
"You will need to first call preprocessor.fit_x() before calling preprocessor.transform_image."
|
|
602
|
+
)
|
|
603
603
|
|
|
604
604
|
image_features = {}
|
|
605
605
|
image_types = {}
|
|
@@ -650,9 +650,9 @@ class MultiModalFeaturePreprocessor(TransformerMixin, BaseEstimator):
|
|
|
650
650
|
document_types
|
|
651
651
|
The column types of these document data.
|
|
652
652
|
"""
|
|
653
|
-
assert (
|
|
654
|
-
|
|
655
|
-
)
|
|
653
|
+
assert self._fit_called or self._fit_x_called, (
|
|
654
|
+
"You will need to first call preprocessor.fit_x() before calling preprocessor.transform_document."
|
|
655
|
+
)
|
|
656
656
|
document_features = {}
|
|
657
657
|
document_types = {}
|
|
658
658
|
for col_name in self._document_feature_names:
|
|
@@ -687,9 +687,9 @@ class MultiModalFeaturePreprocessor(TransformerMixin, BaseEstimator):
|
|
|
687
687
|
None
|
|
688
688
|
The column types of numerical data, which is None currently since only one numerical type exists.
|
|
689
689
|
"""
|
|
690
|
-
assert (
|
|
691
|
-
|
|
692
|
-
)
|
|
690
|
+
assert self._fit_called or self._fit_x_called, (
|
|
691
|
+
"You will need to first call preprocessor.fit before calling preprocessor.transform_numerical."
|
|
692
|
+
)
|
|
693
693
|
numerical_features = {}
|
|
694
694
|
for col_name in self._numerical_feature_names:
|
|
695
695
|
generator = self._feature_generators[col_name]
|
|
@@ -720,9 +720,9 @@ class MultiModalFeaturePreprocessor(TransformerMixin, BaseEstimator):
|
|
|
720
720
|
None
|
|
721
721
|
The column types of categorical data, which is None currently since only one categorical type exists.
|
|
722
722
|
"""
|
|
723
|
-
assert (
|
|
724
|
-
|
|
725
|
-
)
|
|
723
|
+
assert self._fit_called or self._fit_x_called, (
|
|
724
|
+
"You will need to first call preprocessor.fit before calling preprocessor.transform_categorical."
|
|
725
|
+
)
|
|
726
726
|
categorical_features = {}
|
|
727
727
|
for col_name, num_category in self._categorical_num_categories.items():
|
|
728
728
|
col_value = df[col_name]
|
|
@@ -758,9 +758,9 @@ class MultiModalFeaturePreprocessor(TransformerMixin, BaseEstimator):
|
|
|
758
758
|
label_types
|
|
759
759
|
The label column types.
|
|
760
760
|
"""
|
|
761
|
-
assert (
|
|
762
|
-
|
|
763
|
-
)
|
|
761
|
+
assert self._fit_called or self._fit_y_called, (
|
|
762
|
+
"You will need to first call preprocessor.fit_y() before calling preprocessor.transform_label."
|
|
763
|
+
)
|
|
764
764
|
# Creating deep copy of the DataFrame, which allows writable buffer to be created for the new df
|
|
765
765
|
# This is needed for 1.4.1 < scikit-learn < 1.5.0, versions <=1.4.0 and >=1.5.1 do not need a writable buffer
|
|
766
766
|
df = df.copy(deep=True)
|
|
@@ -784,9 +784,9 @@ class MultiModalFeaturePreprocessor(TransformerMixin, BaseEstimator):
|
|
|
784
784
|
self,
|
|
785
785
|
df: pd.DataFrame,
|
|
786
786
|
) -> Tuple[Dict[str, NDArray], Dict[str, str]]:
|
|
787
|
-
assert (
|
|
788
|
-
|
|
789
|
-
)
|
|
787
|
+
assert self._fit_called or self._fit_x_called, (
|
|
788
|
+
"You will need to first call preprocessor.fit_x() before calling preprocessor.transform_ner."
|
|
789
|
+
)
|
|
790
790
|
ret_data, ret_type = {}, {}
|
|
791
791
|
ner_text_features = {}
|
|
792
792
|
ner_text_types = {}
|
|
@@ -831,12 +831,12 @@ class MultiModalFeaturePreprocessor(TransformerMixin, BaseEstimator):
|
|
|
831
831
|
-------
|
|
832
832
|
Ground-truth labels ready to compute metric scores.
|
|
833
833
|
"""
|
|
834
|
-
assert (
|
|
835
|
-
|
|
836
|
-
)
|
|
837
|
-
assert (
|
|
838
|
-
self._label_column in
|
|
839
|
-
)
|
|
834
|
+
assert self._fit_called or self._fit_y_called, (
|
|
835
|
+
"You will need to first call preprocessor.fit_y() before calling preprocessor.transform_label_for_metric."
|
|
836
|
+
)
|
|
837
|
+
assert self._label_column in df.columns, (
|
|
838
|
+
f"Label {self._label_column} is not in the data. Cannot perform evaluation without ground truth labels."
|
|
839
|
+
)
|
|
840
840
|
y_df = df[self._label_column]
|
|
841
841
|
if self.label_type == CATEGORICAL:
|
|
842
842
|
# need to encode to integer labels
|
|
@@ -875,9 +875,9 @@ class MultiModalFeaturePreprocessor(TransformerMixin, BaseEstimator):
|
|
|
875
875
|
-------
|
|
876
876
|
Predicted labels ready to compute metric scores.
|
|
877
877
|
"""
|
|
878
|
-
assert (
|
|
879
|
-
|
|
880
|
-
)
|
|
878
|
+
assert self._fit_called or self._fit_y_called, (
|
|
879
|
+
"You will need to first call preprocessor.fit_y() before calling preprocessor.transform_prediction."
|
|
880
|
+
)
|
|
881
881
|
|
|
882
882
|
if self.label_type == CATEGORICAL:
|
|
883
883
|
assert len(y_pred.shape) <= 2
|
|
@@ -32,9 +32,9 @@ class TemplateEngine:
|
|
|
32
32
|
self.template_length = self.template_config.template_length
|
|
33
33
|
|
|
34
34
|
if self.preset_templates:
|
|
35
|
-
assert (
|
|
36
|
-
|
|
37
|
-
)
|
|
35
|
+
assert len(self.preset_templates) == 2, (
|
|
36
|
+
f"Preset templates has the wrong format. Needs to be [DATASET, SUBSET]."
|
|
37
|
+
)
|
|
38
38
|
dataset_templates = DatasetTemplates(self.preset_templates[0], self.preset_templates[1])
|
|
39
39
|
current_templates = list(dataset_templates.templates.values())
|
|
40
40
|
self.templates += current_templates[: self.num_templates]
|
|
@@ -210,8 +210,8 @@ def set_image_augmentation_space():
|
|
|
210
210
|
def download_nltk():
|
|
211
211
|
"""
|
|
212
212
|
Download required NLTK resources with singleton pattern to prevent multiple downloads.
|
|
213
|
-
|
|
214
|
-
This function handles NLTK 3.9+ changes where resource names changed and
|
|
213
|
+
|
|
214
|
+
This function handles NLTK 3.9+ changes where resource names changed and
|
|
215
215
|
the quiet=True parameter behavior was affected. Uses a global flag to ensure
|
|
216
216
|
downloads happen only once even when TrivialAugment is instantiated multiple times.
|
|
217
217
|
"""
|
|
@@ -232,7 +232,6 @@ def download_nltk():
|
|
|
232
232
|
try:
|
|
233
233
|
nltk.data.find(resource_path)
|
|
234
234
|
except LookupError:
|
|
235
|
-
|
|
236
235
|
nltk.download(download_name, quiet=True)
|
|
237
236
|
_nltk_downloaded = True
|
|
238
237
|
|
|
@@ -530,9 +530,9 @@ class BaseLearner(ExportMixin, DistillationMixin, RealtimeMixin):
|
|
|
530
530
|
def fit_sanity_check(self):
|
|
531
531
|
assert not self._resume or not self._is_hpo, "You can not resume training with HPO."
|
|
532
532
|
if self._is_hpo and hasattr(self, "_teacher_learner") and self._teacher_learner is not None:
|
|
533
|
-
assert isinstance(
|
|
534
|
-
|
|
535
|
-
)
|
|
533
|
+
assert isinstance(self._teacher_learner, str), (
|
|
534
|
+
"HPO with distillation only supports passing a path to the learner."
|
|
535
|
+
)
|
|
536
536
|
|
|
537
537
|
def prepare_fit_args(
|
|
538
538
|
self,
|
|
@@ -683,9 +683,9 @@ class BaseLearner(ExportMixin, DistillationMixin, RealtimeMixin):
|
|
|
683
683
|
overrides=hyperparameters,
|
|
684
684
|
)
|
|
685
685
|
if self._model is None:
|
|
686
|
-
assert (
|
|
687
|
-
|
|
688
|
-
)
|
|
686
|
+
assert len(self._config.model.names) == 1, (
|
|
687
|
+
f"Zero shot mode only supports using one model, but detects multiple models {self._config.model.names}"
|
|
688
|
+
)
|
|
689
689
|
self._model = create_fusion_model(
|
|
690
690
|
config=self._config,
|
|
691
691
|
pretrained=self._pretrained,
|
|
@@ -836,8 +836,7 @@ class BaseLearner(ExportMixin, DistillationMixin, RealtimeMixin):
|
|
|
836
836
|
)
|
|
837
837
|
if mixup_active and (config.env.per_gpu_batch_size == 1 or config.env.per_gpu_batch_size % 2 == 1):
|
|
838
838
|
warnings.warn(
|
|
839
|
-
"The mixup is done on the batch."
|
|
840
|
-
"The per_gpu_batch_size should be >1 and even for reasonable operation",
|
|
839
|
+
"The mixup is done on the batch.The per_gpu_batch_size should be >1 and even for reasonable operation",
|
|
841
840
|
UserWarning,
|
|
842
841
|
)
|
|
843
842
|
return mixup_active, mixup_func
|
|
@@ -1053,9 +1052,9 @@ class BaseLearner(ExportMixin, DistillationMixin, RealtimeMixin):
|
|
|
1053
1052
|
if (
|
|
1054
1053
|
config.env.strategy == DEEPSPEED_OFFLOADING and num_gpus == 1 and DEEPSPEED_MODULE not in sys.modules
|
|
1055
1054
|
): # Offloading currently only tested for single GPU
|
|
1056
|
-
assert (
|
|
1057
|
-
version
|
|
1058
|
-
)
|
|
1055
|
+
assert version.parse(pl.__version__) >= version.parse(DEEPSPEED_MIN_PL_VERSION), (
|
|
1056
|
+
f"For DeepSpeed Offloading to work reliably you need at least lightning version {DEEPSPEED_MIN_PL_VERSION}, however, found {pl.__version__}. Please update your lightning version."
|
|
1057
|
+
)
|
|
1059
1058
|
from ..optim.deepspeed import CustomDeepSpeedStrategy
|
|
1060
1059
|
|
|
1061
1060
|
strategy = CustomDeepSpeedStrategy(
|
|
@@ -1909,15 +1908,15 @@ class BaseLearner(ExportMixin, DistillationMixin, RealtimeMixin):
|
|
|
1909
1908
|
return_prob: Optional[bool] = False,
|
|
1910
1909
|
):
|
|
1911
1910
|
query_embeddings = self.extract_embedding(query_data, as_tensor=True)
|
|
1912
|
-
assert (
|
|
1913
|
-
|
|
1914
|
-
)
|
|
1911
|
+
assert len(query_embeddings) == 1, (
|
|
1912
|
+
f"Multiple embedding types `{query_embeddings.keys()}` exist in query data. Please reduce them to one type."
|
|
1913
|
+
)
|
|
1915
1914
|
query_embeddings = list(query_embeddings.values())[0]
|
|
1916
1915
|
|
|
1917
1916
|
candidate_embeddings = self.extract_embedding(candidate_data, as_tensor=True)
|
|
1918
|
-
assert (
|
|
1919
|
-
|
|
1920
|
-
)
|
|
1917
|
+
assert len(candidate_embeddings) == 1, (
|
|
1918
|
+
f"Multiple embedding types `{candidate_embeddings.keys()}` exist in candidate data. Please reduce them to one type."
|
|
1919
|
+
)
|
|
1921
1920
|
candidate_embeddings = list(candidate_embeddings.values())[0]
|
|
1922
1921
|
|
|
1923
1922
|
if return_prob:
|
|
@@ -2157,9 +2156,9 @@ class BaseLearner(ExportMixin, DistillationMixin, RealtimeMixin):
|
|
|
2157
2156
|
state_dict = {k: v for k, v in state_dict.items() if k not in buffer_names_to_filter}
|
|
2158
2157
|
|
|
2159
2158
|
load_result = self._model.load_state_dict(state_dict, strict=strict)
|
|
2160
|
-
assert (
|
|
2161
|
-
|
|
2162
|
-
)
|
|
2159
|
+
assert len(load_result.unexpected_keys) == 0, (
|
|
2160
|
+
f"Load model failed, unexpected keys {load_result.unexpected_keys.__str__()}"
|
|
2161
|
+
)
|
|
2163
2162
|
|
|
2164
2163
|
@staticmethod
|
|
2165
2164
|
def _replace_model_name_prefix(
|