autogluon.tabular 1.3.2b20250708__tar.gz → 1.3.2b20250710__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/PKG-INFO +4 -1
  2. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/setup.py +11 -2
  3. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/__init__.py +3 -0
  4. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/catboost/callbacks.py +3 -2
  5. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/catboost/catboost_model.py +2 -2
  6. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/catboost/catboost_utils.py +7 -3
  7. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/fastainn/tabular_nn_fastai.py +3 -3
  8. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/lgb/lgb_model.py +2 -2
  9. autogluon.tabular-1.3.2b20250710/src/autogluon/tabular/models/realmlp/realmlp_model.py +347 -0
  10. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/rf/rf_model.py +2 -1
  11. autogluon.tabular-1.3.2b20250710/src/autogluon/tabular/models/tabicl/tabicl_model.py +174 -0
  12. autogluon.tabular-1.3.2b20250710/src/autogluon/tabular/models/tabm/_tabm_internal.py +544 -0
  13. autogluon.tabular-1.3.2b20250710/src/autogluon/tabular/models/tabm/rtdl_num_embeddings.py +807 -0
  14. autogluon.tabular-1.3.2b20250710/src/autogluon/tabular/models/tabm/tabm_model.py +275 -0
  15. autogluon.tabular-1.3.2b20250710/src/autogluon/tabular/models/tabm/tabm_reference.py +627 -0
  16. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/tabpfnmix/tabpfnmix_model.py +3 -3
  17. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/tabular_nn/torch/tabular_nn_torch.py +3 -3
  18. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/xgboost/xgboost_model.py +2 -2
  19. autogluon.tabular-1.3.2b20250710/src/autogluon/tabular/models/xt/__init__.py +0 -0
  20. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/predictor/predictor.py +5 -3
  21. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/registry/_ag_model_registry.py +6 -0
  22. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/testing/fit_helper.py +27 -25
  23. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/testing/generate_datasets.py +7 -0
  24. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/trainer/abstract_trainer.py +1 -1
  25. autogluon.tabular-1.3.2b20250710/src/autogluon/tabular/trainer/model_presets/__init__.py +0 -0
  26. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/trainer/model_presets/presets.py +10 -1
  27. autogluon.tabular-1.3.2b20250710/src/autogluon/tabular/tuning/__init__.py +0 -0
  28. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/version.py +1 -1
  29. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon.tabular.egg-info/PKG-INFO +4 -1
  30. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon.tabular.egg-info/SOURCES.txt +9 -0
  31. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon.tabular.egg-info/requires.txt +23 -12
  32. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/setup.cfg +0 -0
  33. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/__init__.py +0 -0
  34. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/configs/__init__.py +0 -0
  35. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/configs/config_helper.py +0 -0
  36. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/configs/feature_generator_presets.py +0 -0
  37. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/configs/hyperparameter_configs.py +0 -0
  38. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/configs/presets_configs.py +0 -0
  39. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/configs/zeroshot/__init__.py +0 -0
  40. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/configs/zeroshot/zeroshot_portfolio_2023.py +0 -0
  41. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/experimental/__init__.py +0 -0
  42. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/experimental/_scikit_mixin.py +0 -0
  43. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/experimental/_tabular_classifier.py +0 -0
  44. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/experimental/_tabular_regressor.py +0 -0
  45. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/experimental/plot_leaderboard.py +0 -0
  46. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/learner/__init__.py +0 -0
  47. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/learner/abstract_learner.py +0 -0
  48. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/learner/default_learner.py +0 -0
  49. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/_utils/__init__.py +0 -0
  50. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/_utils/rapids_utils.py +0 -0
  51. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/_utils/torch_utils.py +0 -0
  52. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/automm/__init__.py +0 -0
  53. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/automm/automm_model.py +0 -0
  54. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/automm/ft_transformer.py +0 -0
  55. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/catboost/__init__.py +0 -0
  56. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/catboost/catboost_softclass_utils.py +0 -0
  57. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/catboost/hyperparameters/__init__.py +0 -0
  58. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/catboost/hyperparameters/parameters.py +0 -0
  59. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/catboost/hyperparameters/searchspaces.py +0 -0
  60. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/fastainn/__init__.py +0 -0
  61. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/fastainn/callbacks.py +0 -0
  62. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/fastainn/fastai_helpers.py +0 -0
  63. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/fastainn/hyperparameters/__init__.py +0 -0
  64. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/fastainn/hyperparameters/parameters.py +0 -0
  65. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/fastainn/hyperparameters/searchspaces.py +0 -0
  66. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/fastainn/imports_helper.py +0 -0
  67. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/fastainn/quantile_helpers.py +0 -0
  68. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/fasttext/__init__.py +0 -0
  69. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/fasttext/fasttext_model.py +0 -0
  70. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/fasttext/hyperparameters/__init__.py +0 -0
  71. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/fasttext/hyperparameters/parameters.py +0 -0
  72. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/image_prediction/__init__.py +0 -0
  73. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/image_prediction/image_predictor.py +0 -0
  74. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/imodels/__init__.py +0 -0
  75. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/imodels/imodels_models.py +0 -0
  76. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/knn/__init__.py +0 -0
  77. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/knn/_knn_loo_variants.py +0 -0
  78. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/knn/knn_model.py +0 -0
  79. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/knn/knn_rapids_model.py +0 -0
  80. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/knn/knn_utils.py +0 -0
  81. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/lgb/__init__.py +0 -0
  82. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/lgb/callbacks.py +0 -0
  83. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/lgb/hyperparameters/__init__.py +0 -0
  84. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/lgb/hyperparameters/parameters.py +0 -0
  85. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/lgb/hyperparameters/searchspaces.py +0 -0
  86. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/lgb/lgb_utils.py +0 -0
  87. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/lr/__init__.py +0 -0
  88. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/lr/hyperparameters/__init__.py +0 -0
  89. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/lr/hyperparameters/parameters.py +0 -0
  90. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/lr/hyperparameters/searchspaces.py +0 -0
  91. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/lr/lr_model.py +0 -0
  92. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/lr/lr_preprocessing_utils.py +0 -0
  93. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/lr/lr_rapids_model.py +0 -0
  94. {autogluon.tabular-1.3.2b20250708/src/autogluon/tabular/models/rf → autogluon.tabular-1.3.2b20250710/src/autogluon/tabular/models/realmlp}/__init__.py +0 -0
  95. {autogluon.tabular-1.3.2b20250708/src/autogluon/tabular/models/rf/compilers → autogluon.tabular-1.3.2b20250710/src/autogluon/tabular/models/rf}/__init__.py +0 -0
  96. {autogluon.tabular-1.3.2b20250708/src/autogluon/tabular/models/tabpfnmix → autogluon.tabular-1.3.2b20250710/src/autogluon/tabular/models/rf/compilers}/__init__.py +0 -0
  97. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/rf/compilers/native.py +0 -0
  98. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/rf/compilers/onnx.py +0 -0
  99. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/rf/rf_quantile.py +0 -0
  100. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/rf/rf_rapids_model.py +0 -0
  101. {autogluon.tabular-1.3.2b20250708/src/autogluon/tabular/models/tabpfnmix/_internal → autogluon.tabular-1.3.2b20250710/src/autogluon/tabular/models/tabicl}/__init__.py +0 -0
  102. {autogluon.tabular-1.3.2b20250708/src/autogluon/tabular/models/tabpfnmix/_internal/config → autogluon.tabular-1.3.2b20250710/src/autogluon/tabular/models/tabm}/__init__.py +0 -0
  103. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/tabpfn/__init__.py +0 -0
  104. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/tabpfn/tabpfn_model.py +0 -0
  105. {autogluon.tabular-1.3.2b20250708/src/autogluon/tabular/models/tabpfnmix/_internal/core → autogluon.tabular-1.3.2b20250710/src/autogluon/tabular/models/tabpfnmix}/__init__.py +0 -0
  106. {autogluon.tabular-1.3.2b20250708/src/autogluon/tabular/models/tabpfnmix/_internal/data → autogluon.tabular-1.3.2b20250710/src/autogluon/tabular/models/tabpfnmix/_internal}/__init__.py +0 -0
  107. {autogluon.tabular-1.3.2b20250708/src/autogluon/tabular/models/tabpfnmix/_internal/models → autogluon.tabular-1.3.2b20250710/src/autogluon/tabular/models/tabpfnmix/_internal/config}/__init__.py +0 -0
  108. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/tabpfnmix/_internal/config/config_run.py +0 -0
  109. {autogluon.tabular-1.3.2b20250708/src/autogluon/tabular/models/tabpfnmix/_internal/models/foundation → autogluon.tabular-1.3.2b20250710/src/autogluon/tabular/models/tabpfnmix/_internal/core}/__init__.py +0 -0
  110. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/tabpfnmix/_internal/core/callbacks.py +0 -0
  111. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/tabpfnmix/_internal/core/collator.py +0 -0
  112. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/tabpfnmix/_internal/core/dataset_split.py +0 -0
  113. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/tabpfnmix/_internal/core/enums.py +0 -0
  114. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/tabpfnmix/_internal/core/get_loss.py +0 -0
  115. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/tabpfnmix/_internal/core/get_optimizer.py +0 -0
  116. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/tabpfnmix/_internal/core/get_scheduler.py +0 -0
  117. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/tabpfnmix/_internal/core/trainer_finetune.py +0 -0
  118. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/tabpfnmix/_internal/core/y_transformer.py +0 -0
  119. {autogluon.tabular-1.3.2b20250708/src/autogluon/tabular/models/tabpfnmix/_internal/results → autogluon.tabular-1.3.2b20250710/src/autogluon/tabular/models/tabpfnmix/_internal/data}/__init__.py +0 -0
  120. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/tabpfnmix/_internal/data/dataset_finetune.py +0 -0
  121. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/tabpfnmix/_internal/data/preprocessor.py +0 -0
  122. {autogluon.tabular-1.3.2b20250708/src/autogluon/tabular/models/tabular_nn → autogluon.tabular-1.3.2b20250710/src/autogluon/tabular/models/tabpfnmix/_internal/models}/__init__.py +0 -0
  123. {autogluon.tabular-1.3.2b20250708/src/autogluon/tabular/models/tabular_nn/compilers → autogluon.tabular-1.3.2b20250710/src/autogluon/tabular/models/tabpfnmix/_internal/models/foundation}/__init__.py +0 -0
  124. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/tabpfnmix/_internal/models/foundation/embedding.py +0 -0
  125. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/tabpfnmix/_internal/models/foundation/foundation_transformer.py +0 -0
  126. {autogluon.tabular-1.3.2b20250708/src/autogluon/tabular/models/tabular_nn/hyperparameters → autogluon.tabular-1.3.2b20250710/src/autogluon/tabular/models/tabpfnmix/_internal/results}/__init__.py +0 -0
  127. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/tabpfnmix/_internal/results/prediction_metrics.py +0 -0
  128. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/tabpfnmix/_internal/tabpfnmix_classifier.py +0 -0
  129. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/tabpfnmix/_internal/tabpfnmix_regressor.py +0 -0
  130. {autogluon.tabular-1.3.2b20250708/src/autogluon/tabular/models/tabular_nn/torch → autogluon.tabular-1.3.2b20250710/src/autogluon/tabular/models/tabular_nn}/__init__.py +0 -0
  131. {autogluon.tabular-1.3.2b20250708/src/autogluon/tabular/models/tabular_nn/utils → autogluon.tabular-1.3.2b20250710/src/autogluon/tabular/models/tabular_nn/compilers}/__init__.py +0 -0
  132. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/tabular_nn/compilers/native.py +0 -0
  133. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/tabular_nn/compilers/onnx.py +0 -0
  134. {autogluon.tabular-1.3.2b20250708/src/autogluon/tabular/models/text_prediction → autogluon.tabular-1.3.2b20250710/src/autogluon/tabular/models/tabular_nn/hyperparameters}/__init__.py +0 -0
  135. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/tabular_nn/hyperparameters/parameters.py +0 -0
  136. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/tabular_nn/hyperparameters/searchspaces.py +0 -0
  137. {autogluon.tabular-1.3.2b20250708/src/autogluon/tabular/models/xgboost → autogluon.tabular-1.3.2b20250710/src/autogluon/tabular/models/tabular_nn/torch}/__init__.py +0 -0
  138. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/tabular_nn/torch/tabular_torch_dataset.py +0 -0
  139. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/tabular_nn/torch/torch_network_modules.py +0 -0
  140. {autogluon.tabular-1.3.2b20250708/src/autogluon/tabular/models/xgboost/hyperparameters → autogluon.tabular-1.3.2b20250710/src/autogluon/tabular/models/tabular_nn/utils}/__init__.py +0 -0
  141. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/tabular_nn/utils/categorical_encoders.py +0 -0
  142. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/tabular_nn/utils/data_preprocessor.py +0 -0
  143. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/tabular_nn/utils/nn_architecture_utils.py +0 -0
  144. {autogluon.tabular-1.3.2b20250708/src/autogluon/tabular/models/xt → autogluon.tabular-1.3.2b20250710/src/autogluon/tabular/models/text_prediction}/__init__.py +0 -0
  145. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/text_prediction/text_prediction_v1_model.py +0 -0
  146. {autogluon.tabular-1.3.2b20250708/src/autogluon/tabular/trainer/model_presets → autogluon.tabular-1.3.2b20250710/src/autogluon/tabular/models/xgboost}/__init__.py +0 -0
  147. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/xgboost/callbacks.py +0 -0
  148. {autogluon.tabular-1.3.2b20250708/src/autogluon/tabular/tuning → autogluon.tabular-1.3.2b20250710/src/autogluon/tabular/models/xgboost/hyperparameters}/__init__.py +0 -0
  149. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/xgboost/hyperparameters/parameters.py +0 -0
  150. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/xgboost/hyperparameters/searchspaces.py +0 -0
  151. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/xgboost/xgboost_utils.py +0 -0
  152. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/models/xt/xt_model.py +0 -0
  153. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/predictor/__init__.py +0 -0
  154. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/predictor/interpretable_predictor.py +0 -0
  155. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/registry/__init__.py +0 -0
  156. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/registry/_model_registry.py +0 -0
  157. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/testing/__init__.py +0 -0
  158. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/testing/model_fit_helper.py +0 -0
  159. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/trainer/__init__.py +0 -0
  160. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/trainer/auto_trainer.py +0 -0
  161. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/trainer/model_presets/presets_distill.py +0 -0
  162. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon/tabular/tuning/feature_pruner.py +0 -0
  163. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon.tabular.egg-info/dependency_links.txt +0 -0
  164. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon.tabular.egg-info/namespace_packages.txt +0 -0
  165. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon.tabular.egg-info/top_level.txt +0 -0
  166. {autogluon.tabular-1.3.2b20250708 → autogluon.tabular-1.3.2b20250710}/src/autogluon.tabular.egg-info/zip-safe +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: autogluon.tabular
3
- Version: 1.3.2b20250708
3
+ Version: 1.3.2b20250710
4
4
  Summary: Fast and Accurate ML in 3 Lines of Code
5
5
  Home-page: https://github.com/autogluon/autogluon
6
6
  Author: AutoGluon Community
@@ -37,9 +37,12 @@ Description-Content-Type: text/markdown
37
37
  Provides-Extra: lightgbm
38
38
  Provides-Extra: catboost
39
39
  Provides-Extra: xgboost
40
+ Provides-Extra: realmlp
40
41
  Provides-Extra: fastai
42
+ Provides-Extra: tabm
41
43
  Provides-Extra: tabpfn
42
44
  Provides-Extra: tabpfnmix
45
+ Provides-Extra: tabicl
43
46
  Provides-Extra: ray
44
47
  Provides-Extra: skex
45
48
  Provides-Extra: imodels
@@ -43,11 +43,17 @@ extras_require = {
43
43
  "xgboost": [
44
44
  "xgboost>=2.0,<3.1", # <{N+1} upper cap, where N is the latest released minor version
45
45
  ],
46
+ "realmlp": [
47
+ "pytabkit>=1.5,<1.6",
48
+ ],
46
49
  "fastai": [
47
50
  "spacy<3.9",
48
51
  "torch", # version range defined in `core/_setup_utils.py`
49
52
  "fastai>=2.3.1,<2.9", # <{N+1} upper cap, where N is the latest released minor version
50
53
  ],
54
+ "tabm": [
55
+ "torch", # version range defined in `core/_setup_utils.py`
56
+ ],
51
57
  "tabpfn": [
52
58
  # versions below 0.1.11 are yanked, not compatible with >=2.0.0 yet
53
59
  "tabpfn>=0.1.11,<2.0", # after v2 compatibility is ensured, should be <{N+1} upper cap, where N is the latest released minor version
@@ -57,6 +63,9 @@ extras_require = {
57
63
  "huggingface_hub[torch]", # Only needed for HuggingFace downloads, currently uncapped to minimize future conflicts.
58
64
  "einops>=0.7,<0.9",
59
65
  ],
66
+ "tabicl": [
67
+ "tabicl>=0.1.3,<0.2", # 0.1.3 added a major bug fix to multithreading.
68
+ ],
60
69
  "ray": [
61
70
  f"{ag.PACKAGE_NAME}.core[all]=={version}",
62
71
  ],
@@ -94,14 +103,14 @@ else:
94
103
  # TODO: v1.0: Rename `all` to `core`, make `all` contain everything.
95
104
  all_requires = []
96
105
  # TODO: Consider adding 'skex' to 'all'
97
- for extra_package in ["lightgbm", "catboost", "xgboost", "fastai", "tabpfnmix", "ray"]:
106
+ for extra_package in ["lightgbm", "catboost", "xgboost", "fastai", "tabm", "tabpfnmix", "realmlp", "ray"]:
98
107
  all_requires += extras_require[extra_package]
99
108
  all_requires = list(set(all_requires))
100
109
  extras_require["all"] = all_requires
101
110
 
102
111
 
103
112
  test_requires = []
104
- for test_package in ["tabpfnmix", "imodels", "skl2onnx"]:
113
+ for test_package in ["tabpfnmix", "imodels", "skl2onnx", "tabicl"]:
105
114
  test_requires += extras_require[test_package]
106
115
  extras_require["tests"] = test_requires
107
116
  install_requires = ag.get_dependency_version_ranges(install_requires)
@@ -17,7 +17,10 @@ from .imodels.imodels_models import (
17
17
  from .knn.knn_model import KNNModel
18
18
  from .lgb.lgb_model import LGBModel
19
19
  from .lr.lr_model import LinearModel
20
+ from .realmlp.realmlp_model import RealMLPModel
20
21
  from .rf.rf_model import RFModel
22
+ from .tabicl.tabicl_model import TabICLModel
23
+ from .tabm.tabm_model import TabMModel
21
24
  from .tabpfn.tabpfn_model import TabPFNModel
22
25
  from .tabpfnmix.tabpfnmix_model import TabPFNMixModel
23
26
  from .tabular_nn.torch.tabular_nn_torch import TabularNeuralNetTorchModel
@@ -170,14 +170,15 @@ class EarlyStoppingCallback:
170
170
 
171
171
  self.eval_metric_name = eval_metric_name
172
172
  self.is_max_optimal = is_max_optimal
173
- self.is_quantile = self.eval_metric_name.startswith(CATBOOST_QUANTILE_PREFIX)
173
+ self.is_quantile = CATBOOST_QUANTILE_PREFIX in self.eval_metric_name
174
174
 
175
175
  def after_iteration(self, info):
176
176
  is_best_iter = False
177
177
  if self.is_quantile:
178
178
  # FIXME: CatBoost adds extra ',' in the metric name if quantile levels are not balanced
179
179
  # e.g., 'MultiQuantile:alpha=0.1,0.25,0.5,0.95' becomes 'MultiQuantile:alpha=0.1,,0.25,0.5,0.95'
180
- eval_metric_name = [k for k in info.metrics[self.compare_key] if k.startswith(CATBOOST_QUANTILE_PREFIX)][0]
180
+ # `'Quantile:' in k` catches both multiquantile (MultiQuantile:) and single-quantile mode (Quantile:)
181
+ eval_metric_name = [k for k in info.metrics[self.compare_key] if CATBOOST_QUANTILE_PREFIX in k][0]
181
182
  else:
182
183
  eval_metric_name = self.eval_metric_name
183
184
  cur_score = info.metrics[self.compare_key][eval_metric_name][-1]
@@ -350,8 +350,8 @@ class CatBoostModel(AbstractModel):
350
350
  return minimum_resources
351
351
 
352
352
  def _get_default_resources(self):
353
- # logical=False is faster in training
354
- num_cpus = ResourceManager.get_cpu_count_psutil(logical=False)
353
+ # only_physical_cores=True is faster in training
354
+ num_cpus = ResourceManager.get_cpu_count(only_physical_cores=True)
355
355
  num_gpus = 0
356
356
  return num_cpus, num_gpus
357
357
 
@@ -5,7 +5,7 @@ from autogluon.core.constants import BINARY, MULTICLASS, QUANTILE, REGRESSION, S
5
5
  logger = logging.getLogger(__name__)
6
6
 
7
7
 
8
- CATBOOST_QUANTILE_PREFIX = "MultiQuantile:"
8
+ CATBOOST_QUANTILE_PREFIX = "Quantile:"
9
9
 
10
10
 
11
11
  # TODO: Add weight support?
@@ -74,8 +74,12 @@ def get_catboost_metric_from_ag_metric(metric, problem_type, quantile_levels=Non
74
74
  raise AssertionError(f"quantile_levels must be provided for problem_type = {problem_type}")
75
75
  if not all(0 < q < 1 for q in quantile_levels):
76
76
  raise AssertionError(f"quantile_levels must fulfill 0 < q < 1, provided quantile_levels: {quantile_levels}")
77
- quantile_string = ",".join(str(q) for q in quantile_levels)
78
- metric_class = f"{CATBOOST_QUANTILE_PREFIX}alpha={quantile_string}"
77
+ # Loss function MultiQuantile: can only be used if len(quantile_levels) >= 2, otherwise we must use Quantile:
78
+ if len(quantile_levels) == 1:
79
+ metric_class = f"{CATBOOST_QUANTILE_PREFIX}alpha={quantile_levels[0]}"
80
+ else:
81
+ quantile_string = ",".join(str(q) for q in quantile_levels)
82
+ metric_class = f"Multi{CATBOOST_QUANTILE_PREFIX}alpha={quantile_string}"
79
83
  else:
80
84
  raise AssertionError(f"CatBoost does not support {problem_type} problem type.")
81
85
 
@@ -584,8 +584,8 @@ class NNFastAiTabularModel(AbstractModel):
584
584
  return default_auxiliary_params
585
585
 
586
586
  def _get_default_resources(self):
587
- # logical=False is faster in training
588
- num_cpus = ResourceManager.get_cpu_count_psutil(logical=False)
587
+ # only_physical_cores=True is faster in training
588
+ num_cpus = ResourceManager.get_cpu_count(only_physical_cores=True)
589
589
  num_gpus = 0
590
590
  return num_cpus, num_gpus
591
591
 
@@ -642,7 +642,7 @@ class NNFastAiTabularModel(AbstractModel):
642
642
 
643
643
  def _get_maximum_resources(self) -> dict[str, Union[int, float]]:
644
644
  # fastai model trains slower when utilizing virtual cores and this issue scale up when the number of cpu cores increases
645
- return {"num_cpus": ResourceManager.get_cpu_count_psutil(logical=False)}
645
+ return {"num_cpus": ResourceManager.get_cpu_count(only_physical_cores=True)}
646
646
 
647
647
  def get_minimum_resources(self, is_gpu_available=False):
648
648
  minimum_resources = {
@@ -532,8 +532,8 @@ class LGBModel(AbstractModel):
532
532
  return minimum_resources
533
533
 
534
534
  def _get_default_resources(self):
535
- # logical=False is faster in training
536
- num_cpus = ResourceManager.get_cpu_count_psutil(logical=False)
535
+ # only_physical_cores=True is faster in training
536
+ num_cpus = ResourceManager.get_cpu_count(only_physical_cores=True)
537
537
  num_gpus = 0
538
538
  return num_cpus, num_gpus
539
539
 
@@ -0,0 +1,347 @@
1
+ """
2
+ Code Adapted from TabArena: https://github.com/autogluon/tabrepo/blob/main/tabrepo/benchmark/models/ag/realmlp/realmlp_model.py
3
+
4
+ Model: RealMLP
5
+ Paper: Better by Default: Strong Pre-Tuned MLPs and Boosted Trees on Tabular Data
6
+ Authors: David Holzmüller, Léo Grinsztajn, Ingo Steinwart
7
+ Codebase: https://github.com/dholzmueller/pytabkit
8
+ License: Apache-2.0
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import logging
14
+ import math
15
+ import time
16
+ from contextlib import contextmanager
17
+ from typing import Literal
18
+
19
+ import numpy as np
20
+ import pandas as pd
21
+ from sklearn.impute import SimpleImputer
22
+
23
+ from autogluon.common.utils.pandas_utils import get_approximate_df_mem_usage
24
+ from autogluon.common.utils.resource_utils import ResourceManager
25
+ from autogluon.core.models import AbstractModel
26
+ from autogluon.tabular import __version__
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+
31
+ @contextmanager
32
+ def set_logger_level(logger_name: str, level: int):
33
+ _logger = logging.getLogger(logger_name)
34
+ old_level = _logger.level
35
+ _logger.setLevel(level)
36
+ try:
37
+ yield
38
+ finally:
39
+ _logger.setLevel(old_level)
40
+
41
+
42
+ # pip install pytabkit
43
+ class RealMLPModel(AbstractModel):
44
+ ag_key = "REALMLP"
45
+ ag_name = "RealMLP"
46
+ ag_priority = 75
47
+
48
+ def __init__(self, **kwargs):
49
+ super().__init__(**kwargs)
50
+ self._imputer = None
51
+ self._features_to_impute = None
52
+ self._features_to_keep = None
53
+ self._indicator_columns = None
54
+ self._features_bool = None
55
+ self._bool_to_cat = None
56
+
57
+ def get_model_cls(self, default_hyperparameters: Literal["td", "td_s"] = "td"):
58
+ from pytabkit import RealMLP_TD_Classifier, RealMLP_TD_Regressor, RealMLP_TD_S_Classifier, RealMLP_TD_S_Regressor
59
+
60
+ assert default_hyperparameters in ["td", "td_s"]
61
+ if self.problem_type in ['binary', 'multiclass']:
62
+ if default_hyperparameters == "td":
63
+ model_cls = RealMLP_TD_Classifier
64
+ else:
65
+ model_cls = RealMLP_TD_S_Classifier
66
+ else:
67
+ if default_hyperparameters == "td":
68
+ model_cls = RealMLP_TD_Regressor
69
+ else:
70
+ model_cls = RealMLP_TD_S_Regressor
71
+ return model_cls
72
+
73
+ def _fit(
74
+ self,
75
+ X: pd.DataFrame,
76
+ y: pd.Series,
77
+ X_val: pd.DataFrame = None,
78
+ y_val: pd.Series = None,
79
+ time_limit: float = None,
80
+ num_cpus: int = 1,
81
+ num_gpus: float = 0,
82
+ verbosity: int = 2,
83
+ **kwargs,
84
+ ):
85
+ start_time = time.time()
86
+
87
+ try:
88
+ import pytabkit
89
+ import torch
90
+ except ImportError as err:
91
+ logger.log(
92
+ 40,
93
+ f"\tFailed to import pytabkit/torch! To use the ReaLMLP model, "
94
+ f"do: `pip install autogluon.tabular[realmlp]=={__version__}`.",
95
+ )
96
+ raise err
97
+
98
+ if verbosity == 0:
99
+ _lightning_log_level = logging.ERROR
100
+ elif verbosity <= 2:
101
+ _lightning_log_level = logging.WARNING
102
+ else:
103
+ _lightning_log_level = logging.INFO
104
+
105
+ # FIXME: code assume we only see one GPU in the fit process.
106
+ device = "cpu" if num_gpus == 0 else "cuda:0"
107
+ if (device == "cuda:0") and (not torch.cuda.is_available()):
108
+ raise AssertionError(
109
+ "Fit specified to use GPU, but CUDA is not available on this machine. "
110
+ "Please switch to CPU usage instead.",
111
+ )
112
+
113
+ hyp = self._get_model_params()
114
+
115
+ default_hyperparameters = hyp.pop("default_hyperparameters", "td")
116
+
117
+ model_cls = self.get_model_cls(default_hyperparameters=default_hyperparameters)
118
+
119
+ metric_map = {
120
+ "roc_auc": "1-auc_ovr_alt",
121
+ "accuracy": "class_error",
122
+ "balanced_accuracy": "1-balanced_accuracy",
123
+ "log_loss": "cross_entropy",
124
+ "rmse": "rmse",
125
+ "root_mean_squared_error": "rmse",
126
+ "r2": "rmse",
127
+ "mae": "mae",
128
+ "mean_average_error": "mae",
129
+ }
130
+
131
+ val_metric_name = metric_map.get(self.stopping_metric.name, None)
132
+
133
+ init_kwargs = dict()
134
+
135
+ if val_metric_name is not None:
136
+ init_kwargs["val_metric_name"] = val_metric_name
137
+
138
+ # TODO: Make this smarter? Maybe use `eval_metric.needs_pred`
139
+ if hyp["use_ls"] is not None and isinstance(hyp["use_ls"], str) and hyp["use_ls"] == "auto":
140
+ if val_metric_name is None:
141
+ hyp["use_ls"] = False
142
+ elif val_metric_name in ["cross_entropy", "1-auc_ovr_alt"]:
143
+ hyp["use_ls"] = False
144
+ else:
145
+ hyp["use_ls"] = None
146
+
147
+ if X_val is None:
148
+ hyp["use_early_stopping"] = False
149
+ hyp["val_fraction"] = 0
150
+
151
+ bool_to_cat = hyp.pop("bool_to_cat", True)
152
+ impute_bool = hyp.pop("impute_bool", True)
153
+ name_categories = hyp.pop("name_categories", True)
154
+
155
+ n_features = len(X.columns)
156
+ if "predict_batch_size" in hyp and isinstance(hyp["predict_batch_size"], str) and hyp["predict_batch_size"] == "auto":
157
+ # simple heuristic to avoid OOM during inference time
158
+ # note: this isn't fool-proof, and ignores the actual memory availability of the machine.
159
+ # note: this is based on an assumption of 32 GB of memory available on the instance
160
+ # default is 1024
161
+ hyp["predict_batch_size"] = max(min(int(8192 * 200 / n_features), 8192), 64)
162
+
163
+ self.model = model_cls(
164
+ n_threads=num_cpus,
165
+ device=device,
166
+ **init_kwargs,
167
+ **hyp,
168
+ )
169
+
170
+ X = self.preprocess(X, is_train=True, bool_to_cat=bool_to_cat, impute_bool=impute_bool)
171
+
172
+ # FIXME: In rare cases can cause exceptions if name_categories=False, unknown why
173
+ extra_fit_kwargs = {}
174
+ if name_categories:
175
+ cat_col_names = X.select_dtypes(include='category').columns.tolist()
176
+ extra_fit_kwargs["cat_col_names"] = cat_col_names
177
+
178
+ if X_val is not None:
179
+ X_val = self.preprocess(X_val)
180
+
181
+ with set_logger_level("lightning.pytorch", _lightning_log_level):
182
+ self.model = self.model.fit(
183
+ X=X,
184
+ y=y,
185
+ X_val=X_val,
186
+ y_val=y_val,
187
+ time_to_fit_in_seconds=time_limit - (time.time() - start_time) if time_limit is not None else None,
188
+ **extra_fit_kwargs,
189
+ )
190
+
191
+ def _predict_proba(self, X, **kwargs) -> np.ndarray:
192
+ with set_logger_level("lightning.pytorch", logging.WARNING):
193
+ return super()._predict_proba(X=X, kwargs=kwargs)
194
+
195
+ # TODO: Move missing indicator + mean fill to a generic preprocess flag available to all models
196
+ # FIXME: bool_to_cat is a hack: Maybe move to abstract model?
197
+ def _preprocess(self, X: pd.DataFrame, is_train: bool = False, bool_to_cat: bool = False, impute_bool: bool = True, **kwargs) -> pd.DataFrame:
198
+ """
199
+ Imputes missing values via the mean and adds indicator columns for numerical features.
200
+ Converts indicator columns to categorical features to avoid them being treated as numerical by RealMLP.
201
+ """
202
+ X = super()._preprocess(X, **kwargs)
203
+
204
+ # FIXME: is copy needed?
205
+ X = X.copy(deep=True)
206
+ if is_train:
207
+ self._bool_to_cat = bool_to_cat
208
+ self._features_bool = self._feature_metadata.get_features(required_special_types=["bool"])
209
+ if impute_bool: # Technically this should do nothing useful because bools will never have NaN
210
+ self._features_to_impute = self._feature_metadata.get_features(valid_raw_types=["int", "float"])
211
+ self._features_to_keep = self._feature_metadata.get_features(invalid_raw_types=["int", "float"])
212
+ else:
213
+ self._features_to_impute = self._feature_metadata.get_features(valid_raw_types=["int", "float"], invalid_special_types=["bool"])
214
+ self._features_to_keep = [f for f in self._feature_metadata.get_features() if f not in self._features_to_impute]
215
+ if self._features_to_impute:
216
+ self._imputer = SimpleImputer(strategy="mean", add_indicator=True)
217
+ self._imputer.fit(X=X[self._features_to_impute])
218
+ self._indicator_columns = [c for c in self._imputer.get_feature_names_out() if c not in self._features_to_impute]
219
+ if self._imputer is not None:
220
+ X_impute = self._imputer.transform(X=X[self._features_to_impute])
221
+ X_impute = pd.DataFrame(X_impute, index=X.index, columns=self._imputer.get_feature_names_out())
222
+ if self._indicator_columns:
223
+ # FIXME: Use CategoryFeatureGenerator? Or tell the model which is category
224
+ # TODO: Add to features_bool?
225
+ X_impute[self._indicator_columns] = X_impute[self._indicator_columns].astype("category")
226
+ X = pd.concat([X[self._features_to_keep], X_impute], axis=1)
227
+ if self._bool_to_cat and self._features_bool:
228
+ # FIXME: Use CategoryFeatureGenerator? Or tell the model which is category
229
+ X[self._features_bool] = X[self._features_bool].astype("category")
230
+ return X
231
+
232
+ def _set_default_params(self):
233
+ default_params = dict(
234
+ random_state=0,
235
+
236
+ # Don't use early stopping by default, seems to work well without
237
+ use_early_stopping=False,
238
+ early_stopping_additive_patience=40,
239
+ early_stopping_multiplicative_patience=3,
240
+
241
+ # verdict: use_ls="auto" is much better than None.
242
+ use_ls="auto",
243
+
244
+ # verdict: no impact, but makes more sense to be False.
245
+ impute_bool=False,
246
+
247
+ # verdict: name_categories=True avoids random exceptions being raised in rare cases
248
+ name_categories=True,
249
+
250
+ # verdict: bool_to_cat=True is equivalent to False in terms of quality, but can be slightly faster in training time
251
+ # and slightly slower in inference time
252
+ bool_to_cat=True,
253
+
254
+ # verdict: "td" is better than "td_s"
255
+ default_hyperparameters="td", # options ["td", "td_s"]
256
+
257
+ predict_batch_size="auto", # if auto, uses AutoGluon's heuristic to set a value between 8192 and 64.
258
+ )
259
+ for param, val in default_params.items():
260
+ self._set_default_param_value(param, val)
261
+
262
+ @classmethod
263
+ def supported_problem_types(cls) -> list[str] | None:
264
+ return ["binary", "multiclass", "regression"]
265
+
266
+ def _get_default_stopping_metric(self):
267
+ return self.eval_metric
268
+
269
+ def _get_default_resources(self) -> tuple[int, int]:
270
+ # only_physical_cores=True is faster in training
271
+ num_cpus = ResourceManager.get_cpu_count(only_physical_cores=True)
272
+ num_gpus = min(ResourceManager.get_gpu_count_torch(), 1)
273
+ return num_cpus, num_gpus
274
+
275
+ def _estimate_memory_usage(self, X: pd.DataFrame, **kwargs) -> int:
276
+ hyperparameters = self._get_model_params()
277
+ return self.estimate_memory_usage_static(X=X, problem_type=self.problem_type, num_classes=self.num_classes, hyperparameters=hyperparameters, **kwargs)
278
+
279
+ @classmethod
280
+ def _estimate_memory_usage_static(
281
+ cls,
282
+ *,
283
+ X: pd.DataFrame,
284
+ hyperparameters: dict = None,
285
+ **kwargs,
286
+ ) -> int:
287
+ """
288
+ Heuristic memory estimate that correlates strongly with RealMLP's more sophisticated method
289
+
290
+ More comprehensive memory estimate logic:
291
+
292
+ ```python
293
+ from typing import Any
294
+
295
+ from pytabkit.models.alg_interfaces.nn_interfaces import NNAlgInterface
296
+ from pytabkit.models.data.data import DictDataset, TensorInfo
297
+ from pytabkit.models.sklearn.default_params import DefaultParams
298
+
299
+ def estimate_realmlp_cpu_ram_gb(hparams: dict[str, Any], n_numerical: int, cat_sizes: list[int], n_classes: int,
300
+ n_samples: int):
301
+ params = copy.copy(DefaultParams.RealMLP_TD_CLASS if n_classes > 0 else DefaultParams.RealMLP_TD_REG)
302
+ params.update(hparams)
303
+
304
+ ds = DictDataset(tensors=None, tensor_infos=dict(x_cont=TensorInfo(feat_shape=[n_numerical]),
305
+ x_cat=TensorInfo(cat_sizes=cat_sizes),
306
+ y=TensorInfo(cat_sizes=[n_classes])), device='cpu',
307
+ n_samples=n_samples)
308
+
309
+ alg_interface = NNAlgInterface(**params)
310
+ res = alg_interface.get_required_resources(ds, n_cv=1, n_refit=0, n_splits=1, split_seeds=[0], n_train=n_samples)
311
+ return res.cpu_ram_gb
312
+ ```
313
+
314
+ """
315
+ if hyperparameters is None:
316
+ hyperparameters = {}
317
+ plr_hidden_1 = hyperparameters.get("plr_hidden_1", 16)
318
+ plr_hidden_2 = hyperparameters.get("plr_hidden_2", 4)
319
+ hidden_width = hyperparameters.get("hidden_width", 256)
320
+
321
+ num_features = len(X.columns)
322
+ columns_mem_est = num_features * 8e5
323
+
324
+ hidden_1_weight = 0.13
325
+ hidden_2_weight = 0.42
326
+ width_factor = math.sqrt(hidden_width / 256 + 0.6)
327
+
328
+ columns_mem_est_hidden_1 = columns_mem_est * hidden_1_weight * plr_hidden_1 / 16 * width_factor
329
+ columns_mem_est_hidden_2 = columns_mem_est * hidden_2_weight * plr_hidden_2 / 16 * width_factor
330
+ columns_mem_est = columns_mem_est_hidden_1 + columns_mem_est_hidden_2
331
+
332
+ dataset_size_mem_est = 5 * get_approximate_df_mem_usage(X).sum() # roughly 5x DataFrame memory size
333
+ baseline_overhead_mem_est = 3e8 # 300 MB generic overhead
334
+
335
+ mem_estimate = dataset_size_mem_est + columns_mem_est + baseline_overhead_mem_est
336
+
337
+ return mem_estimate
338
+
339
+ @classmethod
340
+ def _class_tags(cls) -> dict:
341
+ return {"can_estimate_memory_usage_static": True}
342
+
343
+ def _more_tags(self) -> dict:
344
+ # TODO: Need to add train params support, track best epoch
345
+ # How to mirror RealMLP learning rate scheduler while forcing stopping at a specific epoch?
346
+ tags = {"can_refit_full": False}
347
+ return tags
@@ -309,8 +309,9 @@ class RFModel(AbstractModel):
309
309
  if self.model.n_outputs_ == 1:
310
310
  self.model.n_classes_ = [self.model.n_classes_]
311
311
  from sklearn.tree._tree import DOUBLE, DTYPE
312
+ from sklearn.utils.validation import check_X_y
312
313
 
313
- X, y = self.model._validate_data(X, y, multi_output=True, accept_sparse="csc", dtype=DTYPE)
314
+ X, y = check_X_y(X, y, multi_output=True, accept_sparse="csc", dtype=DTYPE)
314
315
  if y.ndim == 1:
315
316
  # reshape is necessary to preserve the data contiguity against vs
316
317
  # [:, np.newaxis] that does not.