autogluon.multimodal 1.4.1b20251119__tar.gz → 1.5.1b20260112__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (168) hide show
  1. {autogluon_multimodal-1.4.1b20251119/src/autogluon.multimodal.egg-info → autogluon_multimodal-1.5.1b20260112}/PKG-INFO +20 -17
  2. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/README.md +6 -3
  3. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/setup.py +8 -5
  4. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/data/__init__.py +2 -2
  5. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/data/dataset_mmlab/multi_image_mix_dataset.py +1 -1
  6. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/data/infer_types.py +85 -29
  7. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/data/preprocess_dataframe.py +36 -36
  8. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/data/template_engine.py +3 -3
  9. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/data/trivial_augmenter.py +2 -3
  10. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/learners/base.py +19 -20
  11. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/learners/ensemble.py +7 -7
  12. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/learners/few_shot_svm.py +7 -9
  13. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/learners/matching.py +9 -9
  14. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/learners/object_detection.py +1 -1
  15. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/learners/semantic_segmentation.py +6 -6
  16. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/models/clip.py +0 -4
  17. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/models/custom_transformer.py +12 -12
  18. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/models/hf_text.py +1 -1
  19. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/models/mmdet_image.py +1 -1
  20. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/models/t_few.py +6 -6
  21. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/models/utils.py +5 -5
  22. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/optim/lit_matcher.py +1 -1
  23. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/optim/lit_mmdet.py +1 -1
  24. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/optim/losses/focal_loss.py +22 -11
  25. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/optim/losses/softmax_losses.py +3 -3
  26. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/optim/metrics/__init__.py +1 -1
  27. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/optim/metrics/ranking_metrics.py +3 -3
  28. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/optim/metrics/utils.py +3 -3
  29. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/optim/utils.py +6 -6
  30. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/utils/__init__.py +6 -6
  31. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/utils/cache.py +6 -6
  32. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/utils/checkpoint.py +3 -1
  33. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/utils/config.py +16 -16
  34. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/utils/download.py +1 -1
  35. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/utils/export.py +1 -0
  36. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/utils/hpo.py +2 -2
  37. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/utils/matcher.py +9 -9
  38. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/utils/mmcv.py +1 -1
  39. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/utils/object_detection.py +3 -3
  40. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/utils/registry.py +1 -1
  41. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/utils/save.py +1 -2
  42. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/utils/visualizer.py +4 -4
  43. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/version.py +1 -1
  44. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112/src/autogluon.multimodal.egg-info}/PKG-INFO +20 -17
  45. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon.multimodal.egg-info/requires.txt +11 -11
  46. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/LICENSE +0 -0
  47. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/NOTICE +0 -0
  48. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/setup.cfg +0 -0
  49. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/__init__.py +0 -0
  50. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/cli/__init__.py +0 -0
  51. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/cli/prepare_detection_dataset.py +0 -0
  52. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/cli/voc2coco.py +0 -0
  53. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/__init__.py +0 -0
  54. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/data/default.yaml +0 -0
  55. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/distiller/default.yaml +0 -0
  56. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/env/default.yaml +0 -0
  57. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/matcher/default.yaml +0 -0
  58. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/model/default.yaml +0 -0
  59. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/optim/default.yaml +0 -0
  60. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/pretrain/__init__.py +0 -0
  61. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/pretrain/detection/__init__.py +0 -0
  62. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/pretrain/detection/coco_detection.py +0 -0
  63. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/pretrain/detection/default_runtime.py +0 -0
  64. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/pretrain/detection/dino/dino-4scale_r50_8xb2-12e_coco.py +0 -0
  65. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/pretrain/detection/dino/dino-5scale_swin-l_8xb2-12e_coco.py +0 -0
  66. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/pretrain/detection/dino/dino-5scale_swin-l_8xb2-36e_coco.py +0 -0
  67. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/pretrain/detection/dino/dino_swinl_tta.py +0 -0
  68. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/pretrain/detection/dino/dino_tta.py +0 -0
  69. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/pretrain/detection/faster_rcnn/__init__.py +0 -0
  70. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/pretrain/detection/faster_rcnn/faster_rcnn_r50_fpn.py +0 -0
  71. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/pretrain/detection/schedule_1x.py +0 -0
  72. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/pretrain/detection/voc/__init__.py +0 -0
  73. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/pretrain/detection/voc/faster_rcnn_r50_fpn_1x_voc0712.py +0 -0
  74. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/pretrain/detection/voc/voc0712.py +0 -0
  75. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/pretrain/detection/yolox/__init__.py +0 -0
  76. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/pretrain/detection/yolox/yolox_l_8xb8-300e_coco.py +0 -0
  77. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/pretrain/detection/yolox/yolox_m_8xb8-300e_coco.py +0 -0
  78. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/pretrain/detection/yolox/yolox_nano_8xb8-300e_coco.py +0 -0
  79. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/pretrain/detection/yolox/yolox_s_8xb8-300e_coco.py +0 -0
  80. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/pretrain/detection/yolox/yolox_tiny_8xb8-300e_coco.py +0 -0
  81. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/pretrain/detection/yolox/yolox_tta.py +0 -0
  82. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/configs/pretrain/detection/yolox/yolox_x_8xb8-300e_coco.py +0 -0
  83. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/constants.py +0 -0
  84. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/data/collator.py +0 -0
  85. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/data/datamodule.py +0 -0
  86. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/data/dataset.py +0 -0
  87. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/data/dataset_mmlab/__init__.py +0 -0
  88. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/data/label_encoder.py +0 -0
  89. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/data/mixup.py +0 -0
  90. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/data/nlpaug.py +0 -0
  91. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/data/process_categorical.py +0 -0
  92. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/data/process_document.py +0 -0
  93. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/data/process_image.py +0 -0
  94. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/data/process_label.py +0 -0
  95. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/data/process_mmlab/__init__.py +0 -0
  96. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/data/process_mmlab/process_mmdet.py +0 -0
  97. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/data/process_mmlab/process_mmlab_base.py +0 -0
  98. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/data/process_mmlab/process_mmocr.py +0 -0
  99. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/data/process_ner.py +0 -0
  100. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/data/process_numerical.py +0 -0
  101. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/data/process_semantic_seg_img.py +0 -0
  102. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/data/process_text.py +0 -0
  103. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/data/randaug.py +0 -0
  104. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/data/templates.py +0 -0
  105. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/data/utils.py +0 -0
  106. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/learners/__init__.py +0 -0
  107. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/learners/ner.py +0 -0
  108. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/models/__init__.py +0 -0
  109. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/models/adaptation_layers.py +0 -0
  110. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/models/augmenter.py +0 -0
  111. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/models/categorical_mlp.py +0 -0
  112. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/models/custom_hf_models/modeling_sam_for_conv_lora.py +0 -0
  113. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/models/document_transformer.py +0 -0
  114. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/models/ft_transformer.py +0 -0
  115. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/models/fusion/__init__.py +0 -0
  116. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/models/fusion/base.py +0 -0
  117. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/models/fusion/fusion_mlp.py +0 -0
  118. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/models/fusion/fusion_ner.py +0 -0
  119. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/models/fusion/fusion_transformer.py +0 -0
  120. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/models/meta_transformer.py +0 -0
  121. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/models/mlp.py +0 -0
  122. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/models/mmocr_text_detection.py +0 -0
  123. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/models/mmocr_text_recognition.py +0 -0
  124. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/models/ner_text.py +0 -0
  125. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/models/numerical_mlp.py +0 -0
  126. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/models/sam.py +0 -0
  127. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/models/timm_image.py +0 -0
  128. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/optim/__init__.py +0 -0
  129. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/optim/deepspeed.py +0 -0
  130. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/optim/lit_distiller.py +0 -0
  131. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/optim/lit_module.py +0 -0
  132. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/optim/lit_ner.py +0 -0
  133. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/optim/lit_semantic_seg.py +0 -0
  134. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/optim/losses/__init__.py +2 -2
  135. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/optim/losses/bce_loss.py +0 -0
  136. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/optim/losses/lemda_loss.py +0 -0
  137. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/optim/losses/rkd_loss.py +0 -0
  138. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/optim/losses/structure_loss.py +0 -0
  139. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/optim/losses/utils.py +0 -0
  140. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/optim/lr/__init__.py +0 -0
  141. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/optim/lr/lr_schedulers.py +0 -0
  142. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/optim/lr/utils.py +0 -0
  143. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/optim/metrics/coverage_metrics.py +0 -0
  144. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/optim/metrics/hit_rate_metrics.py +0 -0
  145. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/optim/metrics/semantic_seg_metrics.py +0 -0
  146. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/predictor.py +0 -0
  147. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/utils/colormap.py +0 -0
  148. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/utils/device.py +0 -0
  149. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/utils/distillation.py +0 -0
  150. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/utils/env.py +0 -0
  151. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/utils/inference.py +0 -0
  152. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/utils/install.py +0 -0
  153. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/utils/label_studio.py +0 -0
  154. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/utils/load.py +0 -0
  155. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/utils/log.py +0 -0
  156. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/utils/misc.py +0 -0
  157. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/utils/onnx.py +0 -0
  158. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/utils/path.py +0 -0
  159. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/utils/precision.py +0 -0
  160. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/utils/presets.py +0 -0
  161. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/utils/problem_types.py +0 -0
  162. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon/multimodal/utils/strategy.py +0 -0
  163. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon.multimodal.egg-info/SOURCES.txt +0 -0
  164. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon.multimodal.egg-info/dependency_links.txt +0 -0
  165. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon.multimodal.egg-info/namespace_packages.txt +0 -0
  166. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon.multimodal.egg-info/top_level.txt +0 -0
  167. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/src/autogluon.multimodal.egg-info/zip-safe +0 -0
  168. {autogluon_multimodal-1.4.1b20251119 → autogluon_multimodal-1.5.1b20260112}/tests/test_check_style.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: autogluon.multimodal
3
- Version: 1.4.1b20251119
3
+ Version: 1.5.1b20260112
4
4
  Summary: Fast and Accurate ML in 3 Lines of Code
5
5
  Home-page: https://github.com/autogluon/autogluon
6
6
  Author: AutoGluon Community
@@ -23,15 +23,15 @@ Classifier: Operating System :: Microsoft :: Windows
23
23
  Classifier: Operating System :: POSIX
24
24
  Classifier: Operating System :: Unix
25
25
  Classifier: Programming Language :: Python :: 3
26
- Classifier: Programming Language :: Python :: 3.9
27
26
  Classifier: Programming Language :: Python :: 3.10
28
27
  Classifier: Programming Language :: Python :: 3.11
29
28
  Classifier: Programming Language :: Python :: 3.12
29
+ Classifier: Programming Language :: Python :: 3.13
30
30
  Classifier: Topic :: Software Development
31
31
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
32
32
  Classifier: Topic :: Scientific/Engineering :: Information Analysis
33
33
  Classifier: Topic :: Scientific/Engineering :: Image Recognition
34
- Requires-Python: >=3.9, <3.13
34
+ Requires-Python: >=3.10, <3.14
35
35
  Description-Content-Type: text/markdown
36
36
  License-File: LICENSE
37
37
  License-File: NOTICE
@@ -42,9 +42,9 @@ Requires-Dist: scikit-learn<1.8.0,>=1.4.0
42
42
  Requires-Dist: Pillow<12,>=10.0.1
43
43
  Requires-Dist: tqdm<5,>=4.38
44
44
  Requires-Dist: boto3<2,>=1.10
45
- Requires-Dist: torch<2.8,>=2.6
46
- Requires-Dist: lightning<2.8,>=2.5.1
47
- Requires-Dist: transformers[sentencepiece]<4.50,>=4.38.0
45
+ Requires-Dist: torch<2.10,>=2.6
46
+ Requires-Dist: lightning<2.6,>=2.5.1
47
+ Requires-Dist: transformers[sentencepiece]<4.58,>=4.51.0
48
48
  Requires-Dist: accelerate<2.0,>=0.34.0
49
49
  Requires-Dist: fsspec[http]<=2025.3
50
50
  Requires-Dist: requests<3,>=2.30
@@ -52,14 +52,14 @@ Requires-Dist: jsonschema<4.24,>=4.18
52
52
  Requires-Dist: seqeval<1.3.0,>=1.2.2
53
53
  Requires-Dist: evaluate<0.5.0,>=0.4.0
54
54
  Requires-Dist: timm<1.0.7,>=0.9.5
55
- Requires-Dist: torchvision<0.23.0,>=0.21.0
55
+ Requires-Dist: torchvision<0.25.0,>=0.21.0
56
56
  Requires-Dist: scikit-image<0.26.0,>=0.19.1
57
57
  Requires-Dist: text-unidecode<1.4,>=1.3
58
58
  Requires-Dist: torchmetrics<1.8,>=1.2.0
59
59
  Requires-Dist: omegaconf<2.4.0,>=2.1.1
60
- Requires-Dist: autogluon.core[raytune]==1.4.1b20251119
61
- Requires-Dist: autogluon.features==1.4.1b20251119
62
- Requires-Dist: autogluon.common==1.4.1b20251119
60
+ Requires-Dist: autogluon.core[raytune]==1.5.1b20260112
61
+ Requires-Dist: autogluon.features==1.5.1b20260112
62
+ Requires-Dist: autogluon.common==1.5.1b20260112
63
63
  Requires-Dist: pytorch-metric-learning<2.9,>=1.3.0
64
64
  Requires-Dist: nlpaug<1.2.0,>=1.1.10
65
65
  Requires-Dist: nltk<3.10,>=3.4.5
@@ -73,11 +73,11 @@ Requires-Dist: pdf2image<1.19,>=1.17.0
73
73
  Provides-Extra: tests
74
74
  Requires-Dist: ruff; extra == "tests"
75
75
  Requires-Dist: datasets<3.6.0,>=2.16.0; extra == "tests"
76
- Requires-Dist: onnx<1.16.2,>=1.13.0; platform_system == "Windows" and extra == "tests"
77
- Requires-Dist: onnx<1.18.0,>=1.13.0; platform_system != "Windows" and extra == "tests"
78
- Requires-Dist: onnxruntime<1.22.0,>=1.17.0; extra == "tests"
79
- Requires-Dist: onnxruntime-gpu<1.22.0,>=1.17.0; (platform_system != "Darwin" and platform_machine != "aarch64") and extra == "tests"
80
76
  Requires-Dist: tensorrt<10.9.1,>=8.6.0; (platform_system == "Linux" and python_version < "3.11") and extra == "tests"
77
+ Requires-Dist: onnx!=1.16.2,<1.21.0,>=1.13.0; platform_system == "Windows" and extra == "tests"
78
+ Requires-Dist: onnx<1.21.0,>=1.13.0; platform_system != "Windows" and extra == "tests"
79
+ Requires-Dist: onnxruntime<1.24.0,>=1.17.0; extra == "tests"
80
+ Requires-Dist: onnxruntime-gpu<1.24.0,>=1.17.0; (platform_system != "Darwin" and platform_machine != "aarch64") and extra == "tests"
81
81
  Dynamic: author
82
82
  Dynamic: classifier
83
83
  Dynamic: description
@@ -100,7 +100,7 @@ Dynamic: summary
100
100
 
101
101
  [![Latest Release](https://img.shields.io/github/v/release/autogluon/autogluon)](https://github.com/autogluon/autogluon/releases)
102
102
  [![Conda Forge](https://img.shields.io/conda/vn/conda-forge/autogluon.svg)](https://anaconda.org/conda-forge/autogluon)
103
- [![Python Versions](https://img.shields.io/badge/python-3.9%20%7C%203.10%20%7C%203.11%20%7C%203.12-blue)](https://pypi.org/project/autogluon/)
103
+ [![Python Versions](https://img.shields.io/badge/python-3.10%20%7C%203.11%20%7C%203.12%20%7C%203.13-blue)](https://pypi.org/project/autogluon/)
104
104
  [![Downloads](https://pepy.tech/badge/autogluon/month)](https://pepy.tech/project/autogluon)
105
105
  [![GitHub license](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](./LICENSE)
106
106
  [![Discord](https://img.shields.io/discord/1043248669505368144?color=7289da&label=Discord&logo=discord&logoColor=ffffff)](https://discord.gg/wjUmjqAc2N)
@@ -117,7 +117,7 @@ AutoGluon, developed by AWS AI, automates machine learning tasks enabling you to
117
117
 
118
118
  ## 💾 Installation
119
119
 
120
- AutoGluon is supported on Python 3.9 - 3.12 and is available on Linux, MacOS, and Windows.
120
+ AutoGluon is supported on Python 3.10 - 3.13 and is available on Linux, MacOS, and Windows.
121
121
 
122
122
  You can install AutoGluon with:
123
123
 
@@ -164,7 +164,10 @@ Below is a curated list of recent tutorials and talks on AutoGluon. A comprehens
164
164
  - [Benchmarking Multimodal AutoML for Tabular Data with Text Fields](https://datasets-benchmarks-proceedings.neurips.cc/paper/2021/file/9bf31c7ff062936a96d3c8bd1f8f2ff3-Paper-round2.pdf) (*NeurIPS*, 2021) ([BibTeX](CITING.md#autogluonmultimodal))
165
165
  - [XTab: Cross-table Pretraining for Tabular Transformers](https://proceedings.mlr.press/v202/zhu23k/zhu23k.pdf) (*ICML*, 2023)
166
166
  - [AutoGluon-TimeSeries: AutoML for Probabilistic Time Series Forecasting](https://arxiv.org/abs/2308.05566) (*AutoML Conf*, 2023) ([BibTeX](CITING.md#autogluontimeseries))
167
- - [TabRepo: A Large Scale Repository of Tabular Model Evaluations and its AutoML Applications](https://arxiv.org/pdf/2311.02971.pdf) (*Under Review*, 2024)
167
+ - [TabRepo: A Large Scale Repository of Tabular Model Evaluations and its AutoML Applications](https://arxiv.org/pdf/2311.02971.pdf) (*AutoML Conf*, 2024)
168
+ - [AutoGluon-Multimodal (AutoMM): Supercharging Multimodal AutoML with Foundation Models](https://arxiv.org/pdf/2404.16233) (*AutoML Conf*, 2024) ([BibTeX](CITING.md#autogluonmultimodal))
169
+ - [Multi-layer Stack Ensembles for Time Series Forecasting](https://arxiv.org/abs/2511.15350) (*AutoML Conf*, 2025) ([BibTeX](CITING.md#autogluontimeseries))
170
+ - [Chronos-2: From Univariate to Universal Forecasting](https://arxiv.org/abs/2510.15821) (*Arxiv*, 2025) ([BibTeX](CITING.md#autogluontimeseries))
168
171
 
169
172
  ### Articles
170
173
  - [AutoGluon-TimeSeries: Every Time Series Forecasting Model In One Library](https://towardsdatascience.com/autogluon-timeseries-every-time-series-forecasting-model-in-one-library-29a3bf6879db) (*Towards Data Science*, Jan 2024)
@@ -7,7 +7,7 @@
7
7
 
8
8
  [![Latest Release](https://img.shields.io/github/v/release/autogluon/autogluon)](https://github.com/autogluon/autogluon/releases)
9
9
  [![Conda Forge](https://img.shields.io/conda/vn/conda-forge/autogluon.svg)](https://anaconda.org/conda-forge/autogluon)
10
- [![Python Versions](https://img.shields.io/badge/python-3.9%20%7C%203.10%20%7C%203.11%20%7C%203.12-blue)](https://pypi.org/project/autogluon/)
10
+ [![Python Versions](https://img.shields.io/badge/python-3.10%20%7C%203.11%20%7C%203.12%20%7C%203.13-blue)](https://pypi.org/project/autogluon/)
11
11
  [![Downloads](https://pepy.tech/badge/autogluon/month)](https://pepy.tech/project/autogluon)
12
12
  [![GitHub license](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](./LICENSE)
13
13
  [![Discord](https://img.shields.io/discord/1043248669505368144?color=7289da&label=Discord&logo=discord&logoColor=ffffff)](https://discord.gg/wjUmjqAc2N)
@@ -24,7 +24,7 @@ AutoGluon, developed by AWS AI, automates machine learning tasks enabling you to
24
24
 
25
25
  ## 💾 Installation
26
26
 
27
- AutoGluon is supported on Python 3.9 - 3.12 and is available on Linux, MacOS, and Windows.
27
+ AutoGluon is supported on Python 3.10 - 3.13 and is available on Linux, MacOS, and Windows.
28
28
 
29
29
  You can install AutoGluon with:
30
30
 
@@ -71,7 +71,10 @@ Below is a curated list of recent tutorials and talks on AutoGluon. A comprehens
71
71
  - [Benchmarking Multimodal AutoML for Tabular Data with Text Fields](https://datasets-benchmarks-proceedings.neurips.cc/paper/2021/file/9bf31c7ff062936a96d3c8bd1f8f2ff3-Paper-round2.pdf) (*NeurIPS*, 2021) ([BibTeX](CITING.md#autogluonmultimodal))
72
72
  - [XTab: Cross-table Pretraining for Tabular Transformers](https://proceedings.mlr.press/v202/zhu23k/zhu23k.pdf) (*ICML*, 2023)
73
73
  - [AutoGluon-TimeSeries: AutoML for Probabilistic Time Series Forecasting](https://arxiv.org/abs/2308.05566) (*AutoML Conf*, 2023) ([BibTeX](CITING.md#autogluontimeseries))
74
- - [TabRepo: A Large Scale Repository of Tabular Model Evaluations and its AutoML Applications](https://arxiv.org/pdf/2311.02971.pdf) (*Under Review*, 2024)
74
+ - [TabRepo: A Large Scale Repository of Tabular Model Evaluations and its AutoML Applications](https://arxiv.org/pdf/2311.02971.pdf) (*AutoML Conf*, 2024)
75
+ - [AutoGluon-Multimodal (AutoMM): Supercharging Multimodal AutoML with Foundation Models](https://arxiv.org/pdf/2404.16233) (*AutoML Conf*, 2024) ([BibTeX](CITING.md#autogluonmultimodal))
76
+ - [Multi-layer Stack Ensembles for Time Series Forecasting](https://arxiv.org/abs/2511.15350) (*AutoML Conf*, 2025) ([BibTeX](CITING.md#autogluontimeseries))
77
+ - [Chronos-2: From Univariate to Universal Forecasting](https://arxiv.org/abs/2510.15821) (*Arxiv*, 2025) ([BibTeX](CITING.md#autogluontimeseries))
75
78
 
76
79
  ### Articles
77
80
  - [AutoGluon-TimeSeries: Every Time Series Forecasting Model In One Library](https://towardsdatascience.com/autogluon-timeseries-every-time-series-forecasting-model-in-one-library-29a3bf6879db) (*Towards Data Science*, Jan 2024)
@@ -41,7 +41,7 @@ install_requires = [
41
41
  "seqeval>=1.2.2,<1.3.0",
42
42
  "evaluate>=0.4.0,<0.5.0",
43
43
  "timm>=0.9.5,<1.0.7",
44
- "torchvision>=0.21.0,<0.23.0",
44
+ "torchvision>=0.21.0,<0.25.0",
45
45
  "scikit-image>=0.19.1,<0.26.0",
46
46
  "text-unidecode>=1.3,<1.4",
47
47
  "torchmetrics>=1.2.0,<1.8",
@@ -66,11 +66,14 @@ install_requires = ag.get_dependency_version_ranges(install_requires)
66
66
  tests_require = [
67
67
  "ruff",
68
68
  "datasets>=2.16.0,<3.6.0",
69
- "onnx>=1.13.0,<1.16.2;platform_system=='Windows'", # cap at 1.16.1 for issue https://github.com/onnx/onnx/issues/6267
70
- "onnx>=1.13.0,<1.18.0;platform_system!='Windows'",
71
- "onnxruntime>=1.17.0,<1.22.0", # install for gpu system due to https://github.com/autogluon/autogluon/issues/3804
72
- "onnxruntime-gpu>=1.17.0,<1.22.0;platform_system!='Darwin' and platform_machine!='aarch64'",
73
69
  "tensorrt>=8.6.0,<10.9.1;platform_system=='Linux' and python_version<'3.11'",
70
+ # Sync ONNX requirements with tabular/setup.py
71
+ "onnx>=1.13.0,!=1.16.2,<1.21.0;platform_system=='Windows'", # exclude 1.16.2 for issue https://github.com/onnx/onnx/issues/6267
72
+ "onnx>=1.13.0,<1.21.0;platform_system!='Windows'",
73
+ # For macOS, there isn't a onnxruntime-gpu package installed with skl2onnx.
74
+ # Therefore, we install onnxruntime explicitly here just for macOS.
75
+ "onnxruntime>=1.17.0,<1.24.0",
76
+ "onnxruntime-gpu>=1.17.0,<1.24.0; platform_system != 'Darwin' and platform_machine != 'aarch64'",
74
77
  ]
75
78
 
76
79
  extras_require = {"tests": tests_require}
@@ -3,14 +3,14 @@ from .dataset import BaseDataset
3
3
  from .dataset_mmlab import MultiImageMixDataset
4
4
  from .infer_types import (
5
5
  infer_column_types,
6
+ infer_ner_column_type,
6
7
  infer_output_shape,
7
8
  infer_problem_type,
8
9
  infer_rois_column_type,
9
10
  is_image_column,
10
11
  )
11
- from .mixup import MixupModule
12
- from .infer_types import infer_column_types, infer_output_shape, infer_problem_type, is_image_column, infer_ner_column_type
13
12
  from .label_encoder import CustomLabelEncoder, NerLabelEncoder
13
+ from .mixup import MixupModule
14
14
  from .preprocess_dataframe import MultiModalFeaturePreprocessor
15
15
  from .process_categorical import CategoricalProcessor
16
16
  from .process_document import DocumentProcessor
@@ -290,7 +290,7 @@ class Mosaic(BaseTransform):
290
290
  prob: float = 1.0,
291
291
  ) -> None:
292
292
  assert isinstance(img_scale, tuple)
293
- assert 0 <= prob <= 1.0, "The probability should be in range [0,1]. " f"got {prob}."
293
+ assert 0 <= prob <= 1.0, f"The probability should be in range [0,1]. got {prob}."
294
294
 
295
295
  log_img_scale(img_scale, skip_square=True, shape_order="wh")
296
296
  self.img_scale = img_scale
@@ -304,11 +304,24 @@ def is_document_image_column(
304
304
  col_name: str,
305
305
  image_type: Optional[str] = IMAGE_PATH,
306
306
  sample_m: Optional[int] = 10,
307
- text_len_threshold: Optional[int] = 100,
307
+ min_text_len_threshold: Optional[int] = 200,
308
+ text_density_threshold: Optional[float] = 0.001,
309
+ min_line_count: Optional[int] = 3,
310
+ min_document_ratio: Optional[float] = 0.8,
308
311
  ) -> bool:
309
312
  """
310
313
  Identify if a column is a document image column.
311
314
 
315
+ Document images are images that primarily contain text (e.g., scanned documents,
316
+ screenshots of text, PDFs rendered as images). Regular photographs, maps,
317
+ charts with labels, or images with watermarks/captions should NOT be
318
+ classified as document images.
319
+
320
+ The detection uses multiple heuristics:
321
+ 1. Minimum absolute text length (short text like watermarks is ignored)
322
+ 2. Text density relative to image size (documents have high text-to-pixel ratio)
323
+ 3. Line count (documents typically have multiple lines of text)
324
+
312
325
  Parameters
313
326
  ----------
314
327
  data
@@ -319,46 +332,90 @@ def is_document_image_column(
319
332
  The image type to check. Set to IMAGE_PATH by default.
320
333
  sample_m
321
334
  Number of sample images used to check if images are documents images.
322
- text_len_threshold
323
- If the average text length is longer than text_len_threshold, the images will be considered as document images.
335
+ min_text_len_threshold
336
+ Minimum text length to even consider an image as a potential document.
337
+ This filters out images with just watermarks or short captions.
338
+ text_density_threshold
339
+ Minimum ratio of (text_characters / image_pixels) to consider as document.
340
+ Documents typically have much higher text density than photos with labels.
341
+ min_line_count
342
+ Minimum number of non-empty text lines expected in a document.
343
+ min_document_ratio
344
+ Minimum ratio of images that must be classified as documents for the
345
+ entire column to be treated as a document column.
346
+
324
347
  Returns
325
348
  -------
326
349
  Whether the column is a document image column.
327
350
  """
351
+ if data.empty:
352
+ return False
328
353
 
329
- # TODO: Add support for other types (e.g., pdf) of document.
330
-
331
- words_len = []
332
354
  if len(data) > sample_m:
333
- # Sample to speed-up type inference
334
355
  data = data.sample(n=sample_m, random_state=0)
335
- failure_count = 0
356
+
357
+ document_count = 0
358
+ total_processed = 0
359
+
336
360
  for images in data:
337
- success = False
361
+ if images is None:
362
+ continue
363
+
338
364
  if not isinstance(images, list):
339
365
  images = [images]
366
+
340
367
  for per_image in images:
368
+ if not isinstance(per_image, str):
369
+ total_processed += 1
370
+ continue
371
+
341
372
  try:
342
- # convert images to string
343
- with PIL.Image.open(per_image) as doc_image:
344
- words = pytesseract.image_to_string(doc_image)
345
- words_len.append(len(words))
373
+ with PIL.Image.open(per_image) as img:
374
+ width, height = img.size
375
+ total_pixels = width * height
376
+
377
+ ocr_text = pytesseract.image_to_string(img)
378
+ text_length = len(ocr_text.strip())
379
+
380
+ total_processed += 1
381
+
382
+ # Heuristic 1: Minimum absolute text length
383
+ # Filters out watermarks, copyright notices, short captions
384
+ if text_length < min_text_len_threshold:
385
+ continue
386
+
387
+ # Heuristic 2: Text density (characters per pixel)
388
+ # Documents have dense text; photos with small labels don't
389
+ text_density = text_length / total_pixels
390
+ if text_density < text_density_threshold:
391
+ continue
392
+
393
+ # Heuristic 3: Line count
394
+ # Documents have multiple lines; watermarks are 1-2 lines
395
+ lines = [line for line in ocr_text.split("\n") if line.strip()]
396
+ if len(lines) < min_line_count:
397
+ continue
398
+
399
+ # Passed all heuristics - this looks like a document
400
+ document_count += 1
401
+
346
402
  except Exception as e:
347
- words_len.append(0)
348
- success = False
349
- break
350
- success = True
351
- if not success:
352
- failure_count += 1
403
+ logger.debug(f"Failed to process image {per_image}: {e}")
404
+ total_processed += 1
353
405
 
354
- if (1 - failure_count / sample_m) >= 0.8:
355
- logger.debug(f"Average length of words of this dataset is {sum(words_len) / len(words_len)}.")
356
- if sum(words_len) / len(words_len) > text_len_threshold:
357
- return True
358
- else:
359
- return False
360
- else:
361
- False
406
+ if total_processed == 0:
407
+ return False
408
+
409
+ document_ratio = document_count / total_processed
410
+ is_document_column = document_ratio >= min_document_ratio
411
+
412
+ logger.debug(
413
+ f"Column '{col_name}': {document_count}/{total_processed} images "
414
+ f"({document_ratio:.1%}) classified as documents. "
415
+ f"Column type: {'document' if is_document_column else 'regular'} images."
416
+ )
417
+
418
+ return is_document_column
362
419
 
363
420
 
364
421
  def is_text_column(data: pd.Series) -> bool:
@@ -769,8 +826,7 @@ def infer_output_shape(
769
826
  if problem_type in [BINARY, MULTICLASS, REGRESSION, CLASSIFICATION]:
770
827
  class_num = len(data[label_column].unique())
771
828
  err_msg = (
772
- f"Problem type is '{problem_type}' while the number of "
773
- f"unique values in the label column is {class_num}."
829
+ f"Problem type is '{problem_type}' while the number of unique values in the label column is {class_num}."
774
830
  )
775
831
  if problem_type == BINARY:
776
832
  if class_num != 2:
@@ -456,9 +456,9 @@ class MultiModalFeaturePreprocessor(TransformerMixin, BaseEstimator):
456
456
  text_types
457
457
  The column types of these text data, e.g., text or text_identifier.
458
458
  """
459
- assert (
460
- self._fit_called or self._fit_x_called
461
- ), "You will need to first call preprocessor.fit_x() before calling preprocessor.transform_text."
459
+ assert self._fit_called or self._fit_x_called, (
460
+ "You will need to first call preprocessor.fit_x() before calling preprocessor.transform_text."
461
+ )
462
462
  text_features = {}
463
463
  text_types = {}
464
464
  for col_name in self._text_feature_names:
@@ -508,9 +508,9 @@ class MultiModalFeaturePreprocessor(TransformerMixin, BaseEstimator):
508
508
  image_types
509
509
  The column types of these image data, e.g., image_path or image_identifier.
510
510
  """
511
- assert (
512
- self._fit_called or self._fit_x_called
513
- ), "You will need to first call preprocessor.fit_x() before calling preprocessor.transform_rois."
511
+ assert self._fit_called or self._fit_x_called, (
512
+ "You will need to first call preprocessor.fit_x() before calling preprocessor.transform_rois."
513
+ )
514
514
 
515
515
  x = self.transform_image(df)
516
516
  ret_data = x[0]
@@ -552,9 +552,9 @@ class MultiModalFeaturePreprocessor(TransformerMixin, BaseEstimator):
552
552
  image_types
553
553
  The column types of these image data, e.g., image_path or image_identifier.
554
554
  """
555
- assert (
556
- self._fit_called or self._fit_x_called
557
- ), "You will need to first call preprocessor.fit_x() before calling preprocessor.transform_semantic_segmentation_img."
555
+ assert self._fit_called or self._fit_x_called, (
556
+ "You will need to first call preprocessor.fit_x() before calling preprocessor.transform_semantic_segmentation_img."
557
+ )
558
558
 
559
559
  ret_data = {}
560
560
  ret_type = {}
@@ -597,9 +597,9 @@ class MultiModalFeaturePreprocessor(TransformerMixin, BaseEstimator):
597
597
  All the image data stored in a dictionary.
598
598
  image_types
599
599
  The column types of these image data, e.g., image_path, image_bytearray or image_identifier."""
600
- assert (
601
- self._fit_called or self._fit_x_called
602
- ), "You will need to first call preprocessor.fit_x() before calling preprocessor.transform_image."
600
+ assert self._fit_called or self._fit_x_called, (
601
+ "You will need to first call preprocessor.fit_x() before calling preprocessor.transform_image."
602
+ )
603
603
 
604
604
  image_features = {}
605
605
  image_types = {}
@@ -650,9 +650,9 @@ class MultiModalFeaturePreprocessor(TransformerMixin, BaseEstimator):
650
650
  document_types
651
651
  The column types of these document data.
652
652
  """
653
- assert (
654
- self._fit_called or self._fit_x_called
655
- ), "You will need to first call preprocessor.fit_x() before calling preprocessor.transform_document."
653
+ assert self._fit_called or self._fit_x_called, (
654
+ "You will need to first call preprocessor.fit_x() before calling preprocessor.transform_document."
655
+ )
656
656
  document_features = {}
657
657
  document_types = {}
658
658
  for col_name in self._document_feature_names:
@@ -687,9 +687,9 @@ class MultiModalFeaturePreprocessor(TransformerMixin, BaseEstimator):
687
687
  None
688
688
  The column types of numerical data, which is None currently since only one numerical type exists.
689
689
  """
690
- assert (
691
- self._fit_called or self._fit_x_called
692
- ), "You will need to first call preprocessor.fit before calling preprocessor.transform_numerical."
690
+ assert self._fit_called or self._fit_x_called, (
691
+ "You will need to first call preprocessor.fit before calling preprocessor.transform_numerical."
692
+ )
693
693
  numerical_features = {}
694
694
  for col_name in self._numerical_feature_names:
695
695
  generator = self._feature_generators[col_name]
@@ -720,9 +720,9 @@ class MultiModalFeaturePreprocessor(TransformerMixin, BaseEstimator):
720
720
  None
721
721
  The column types of categorical data, which is None currently since only one categorical type exists.
722
722
  """
723
- assert (
724
- self._fit_called or self._fit_x_called
725
- ), "You will need to first call preprocessor.fit before calling preprocessor.transform_categorical."
723
+ assert self._fit_called or self._fit_x_called, (
724
+ "You will need to first call preprocessor.fit before calling preprocessor.transform_categorical."
725
+ )
726
726
  categorical_features = {}
727
727
  for col_name, num_category in self._categorical_num_categories.items():
728
728
  col_value = df[col_name]
@@ -758,9 +758,9 @@ class MultiModalFeaturePreprocessor(TransformerMixin, BaseEstimator):
758
758
  label_types
759
759
  The label column types.
760
760
  """
761
- assert (
762
- self._fit_called or self._fit_y_called
763
- ), "You will need to first call preprocessor.fit_y() before calling preprocessor.transform_label."
761
+ assert self._fit_called or self._fit_y_called, (
762
+ "You will need to first call preprocessor.fit_y() before calling preprocessor.transform_label."
763
+ )
764
764
  # Creating deep copy of the DataFrame, which allows writable buffer to be created for the new df
765
765
  # This is needed for 1.4.1 < scikit-learn < 1.5.0, versions <=1.4.0 and >=1.5.1 do not need a writable buffer
766
766
  df = df.copy(deep=True)
@@ -784,9 +784,9 @@ class MultiModalFeaturePreprocessor(TransformerMixin, BaseEstimator):
784
784
  self,
785
785
  df: pd.DataFrame,
786
786
  ) -> Tuple[Dict[str, NDArray], Dict[str, str]]:
787
- assert (
788
- self._fit_called or self._fit_x_called
789
- ), "You will need to first call preprocessor.fit_x() before calling preprocessor.transform_ner."
787
+ assert self._fit_called or self._fit_x_called, (
788
+ "You will need to first call preprocessor.fit_x() before calling preprocessor.transform_ner."
789
+ )
790
790
  ret_data, ret_type = {}, {}
791
791
  ner_text_features = {}
792
792
  ner_text_types = {}
@@ -831,12 +831,12 @@ class MultiModalFeaturePreprocessor(TransformerMixin, BaseEstimator):
831
831
  -------
832
832
  Ground-truth labels ready to compute metric scores.
833
833
  """
834
- assert (
835
- self._fit_called or self._fit_y_called
836
- ), "You will need to first call preprocessor.fit_y() before calling preprocessor.transform_label_for_metric."
837
- assert (
838
- self._label_column in df.columns
839
- ), f"Label {self._label_column} is not in the data. Cannot perform evaluation without ground truth labels."
834
+ assert self._fit_called or self._fit_y_called, (
835
+ "You will need to first call preprocessor.fit_y() before calling preprocessor.transform_label_for_metric."
836
+ )
837
+ assert self._label_column in df.columns, (
838
+ f"Label {self._label_column} is not in the data. Cannot perform evaluation without ground truth labels."
839
+ )
840
840
  y_df = df[self._label_column]
841
841
  if self.label_type == CATEGORICAL:
842
842
  # need to encode to integer labels
@@ -875,9 +875,9 @@ class MultiModalFeaturePreprocessor(TransformerMixin, BaseEstimator):
875
875
  -------
876
876
  Predicted labels ready to compute metric scores.
877
877
  """
878
- assert (
879
- self._fit_called or self._fit_y_called
880
- ), "You will need to first call preprocessor.fit_y() before calling preprocessor.transform_prediction."
878
+ assert self._fit_called or self._fit_y_called, (
879
+ "You will need to first call preprocessor.fit_y() before calling preprocessor.transform_prediction."
880
+ )
881
881
 
882
882
  if self.label_type == CATEGORICAL:
883
883
  assert len(y_pred.shape) <= 2
@@ -32,9 +32,9 @@ class TemplateEngine:
32
32
  self.template_length = self.template_config.template_length
33
33
 
34
34
  if self.preset_templates:
35
- assert (
36
- len(self.preset_templates) == 2
37
- ), f"Preset templates has the wrong format. Needs to be [DATASET, SUBSET]."
35
+ assert len(self.preset_templates) == 2, (
36
+ f"Preset templates has the wrong format. Needs to be [DATASET, SUBSET]."
37
+ )
38
38
  dataset_templates = DatasetTemplates(self.preset_templates[0], self.preset_templates[1])
39
39
  current_templates = list(dataset_templates.templates.values())
40
40
  self.templates += current_templates[: self.num_templates]
@@ -210,8 +210,8 @@ def set_image_augmentation_space():
210
210
  def download_nltk():
211
211
  """
212
212
  Download required NLTK resources with singleton pattern to prevent multiple downloads.
213
-
214
- This function handles NLTK 3.9+ changes where resource names changed and
213
+
214
+ This function handles NLTK 3.9+ changes where resource names changed and
215
215
  the quiet=True parameter behavior was affected. Uses a global flag to ensure
216
216
  downloads happen only once even when TrivialAugment is instantiated multiple times.
217
217
  """
@@ -232,7 +232,6 @@ def download_nltk():
232
232
  try:
233
233
  nltk.data.find(resource_path)
234
234
  except LookupError:
235
-
236
235
  nltk.download(download_name, quiet=True)
237
236
  _nltk_downloaded = True
238
237
 
@@ -530,9 +530,9 @@ class BaseLearner(ExportMixin, DistillationMixin, RealtimeMixin):
530
530
  def fit_sanity_check(self):
531
531
  assert not self._resume or not self._is_hpo, "You can not resume training with HPO."
532
532
  if self._is_hpo and hasattr(self, "_teacher_learner") and self._teacher_learner is not None:
533
- assert isinstance(
534
- self._teacher_learner, str
535
- ), "HPO with distillation only supports passing a path to the learner."
533
+ assert isinstance(self._teacher_learner, str), (
534
+ "HPO with distillation only supports passing a path to the learner."
535
+ )
536
536
 
537
537
  def prepare_fit_args(
538
538
  self,
@@ -683,9 +683,9 @@ class BaseLearner(ExportMixin, DistillationMixin, RealtimeMixin):
683
683
  overrides=hyperparameters,
684
684
  )
685
685
  if self._model is None:
686
- assert (
687
- len(self._config.model.names) == 1
688
- ), f"Zero shot mode only supports using one model, but detects multiple models {self._config.model.names}"
686
+ assert len(self._config.model.names) == 1, (
687
+ f"Zero shot mode only supports using one model, but detects multiple models {self._config.model.names}"
688
+ )
689
689
  self._model = create_fusion_model(
690
690
  config=self._config,
691
691
  pretrained=self._pretrained,
@@ -836,8 +836,7 @@ class BaseLearner(ExportMixin, DistillationMixin, RealtimeMixin):
836
836
  )
837
837
  if mixup_active and (config.env.per_gpu_batch_size == 1 or config.env.per_gpu_batch_size % 2 == 1):
838
838
  warnings.warn(
839
- "The mixup is done on the batch."
840
- "The per_gpu_batch_size should be >1 and even for reasonable operation",
839
+ "The mixup is done on the batch.The per_gpu_batch_size should be >1 and even for reasonable operation",
841
840
  UserWarning,
842
841
  )
843
842
  return mixup_active, mixup_func
@@ -1053,9 +1052,9 @@ class BaseLearner(ExportMixin, DistillationMixin, RealtimeMixin):
1053
1052
  if (
1054
1053
  config.env.strategy == DEEPSPEED_OFFLOADING and num_gpus == 1 and DEEPSPEED_MODULE not in sys.modules
1055
1054
  ): # Offloading currently only tested for single GPU
1056
- assert (
1057
- version.parse(pl.__version__) >= version.parse(DEEPSPEED_MIN_PL_VERSION)
1058
- ), f"For DeepSpeed Offloading to work reliably you need at least lightning version {DEEPSPEED_MIN_PL_VERSION}, however, found {pl.__version__}. Please update your lightning version."
1055
+ assert version.parse(pl.__version__) >= version.parse(DEEPSPEED_MIN_PL_VERSION), (
1056
+ f"For DeepSpeed Offloading to work reliably you need at least lightning version {DEEPSPEED_MIN_PL_VERSION}, however, found {pl.__version__}. Please update your lightning version."
1057
+ )
1059
1058
  from ..optim.deepspeed import CustomDeepSpeedStrategy
1060
1059
 
1061
1060
  strategy = CustomDeepSpeedStrategy(
@@ -1909,15 +1908,15 @@ class BaseLearner(ExportMixin, DistillationMixin, RealtimeMixin):
1909
1908
  return_prob: Optional[bool] = False,
1910
1909
  ):
1911
1910
  query_embeddings = self.extract_embedding(query_data, as_tensor=True)
1912
- assert (
1913
- len(query_embeddings) == 1
1914
- ), f"Multiple embedding types `{query_embeddings.keys()}` exist in query data. Please reduce them to one type."
1911
+ assert len(query_embeddings) == 1, (
1912
+ f"Multiple embedding types `{query_embeddings.keys()}` exist in query data. Please reduce them to one type."
1913
+ )
1915
1914
  query_embeddings = list(query_embeddings.values())[0]
1916
1915
 
1917
1916
  candidate_embeddings = self.extract_embedding(candidate_data, as_tensor=True)
1918
- assert (
1919
- len(candidate_embeddings) == 1
1920
- ), f"Multiple embedding types `{candidate_embeddings.keys()}` exist in candidate data. Please reduce them to one type."
1917
+ assert len(candidate_embeddings) == 1, (
1918
+ f"Multiple embedding types `{candidate_embeddings.keys()}` exist in candidate data. Please reduce them to one type."
1919
+ )
1921
1920
  candidate_embeddings = list(candidate_embeddings.values())[0]
1922
1921
 
1923
1922
  if return_prob:
@@ -2157,9 +2156,9 @@ class BaseLearner(ExportMixin, DistillationMixin, RealtimeMixin):
2157
2156
  state_dict = {k: v for k, v in state_dict.items() if k not in buffer_names_to_filter}
2158
2157
 
2159
2158
  load_result = self._model.load_state_dict(state_dict, strict=strict)
2160
- assert (
2161
- len(load_result.unexpected_keys) == 0
2162
- ), f"Load model failed, unexpected keys {load_result.unexpected_keys.__str__()}"
2159
+ assert len(load_result.unexpected_keys) == 0, (
2160
+ f"Load model failed, unexpected keys {load_result.unexpected_keys.__str__()}"
2161
+ )
2163
2162
 
2164
2163
  @staticmethod
2165
2164
  def _replace_model_name_prefix(