autogluon.tabular 1.5.1b20260105__py3-none-any.whl → 1.5.1b20260116__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of autogluon.tabular might be problematic. Click here for more details.

Files changed (135) hide show
  1. autogluon/tabular/__init__.py +1 -0
  2. autogluon/tabular/configs/config_helper.py +18 -6
  3. autogluon/tabular/configs/feature_generator_presets.py +3 -1
  4. autogluon/tabular/configs/hyperparameter_configs.py +42 -9
  5. autogluon/tabular/configs/presets_configs.py +38 -14
  6. autogluon/tabular/configs/zeroshot/zeroshot_portfolio_2023.py +84 -14
  7. autogluon/tabular/configs/zeroshot/zeroshot_portfolio_2025.py +48 -48
  8. autogluon/tabular/configs/zeroshot/zeroshot_portfolio_cpu_2025_12_18.py +774 -1
  9. autogluon/tabular/configs/zeroshot/zeroshot_portfolio_gpu_2025_12_18.py +421 -1
  10. autogluon/tabular/experimental/_scikit_mixin.py +6 -2
  11. autogluon/tabular/experimental/_tabular_classifier.py +3 -1
  12. autogluon/tabular/experimental/_tabular_regressor.py +3 -1
  13. autogluon/tabular/experimental/plot_leaderboard.py +73 -19
  14. autogluon/tabular/learner/abstract_learner.py +160 -42
  15. autogluon/tabular/learner/default_learner.py +78 -22
  16. autogluon/tabular/models/__init__.py +2 -2
  17. autogluon/tabular/models/_utils/rapids_utils.py +3 -1
  18. autogluon/tabular/models/abstract/abstract_torch_model.py +2 -0
  19. autogluon/tabular/models/automm/automm_model.py +12 -3
  20. autogluon/tabular/models/automm/ft_transformer.py +5 -1
  21. autogluon/tabular/models/catboost/callbacks.py +2 -2
  22. autogluon/tabular/models/catboost/catboost_model.py +93 -29
  23. autogluon/tabular/models/catboost/catboost_softclass_utils.py +4 -1
  24. autogluon/tabular/models/catboost/catboost_utils.py +3 -1
  25. autogluon/tabular/models/ebm/ebm_model.py +8 -13
  26. autogluon/tabular/models/ebm/hyperparameters/parameters.py +1 -0
  27. autogluon/tabular/models/ebm/hyperparameters/searchspaces.py +1 -0
  28. autogluon/tabular/models/fastainn/callbacks.py +20 -3
  29. autogluon/tabular/models/fastainn/hyperparameters/searchspaces.py +11 -1
  30. autogluon/tabular/models/fastainn/quantile_helpers.py +10 -2
  31. autogluon/tabular/models/fastainn/tabular_nn_fastai.py +65 -18
  32. autogluon/tabular/models/fasttext/fasttext_model.py +3 -1
  33. autogluon/tabular/models/image_prediction/image_predictor.py +7 -2
  34. autogluon/tabular/models/knn/knn_model.py +41 -8
  35. autogluon/tabular/models/lgb/callbacks.py +32 -9
  36. autogluon/tabular/models/lgb/hyperparameters/searchspaces.py +3 -1
  37. autogluon/tabular/models/lgb/lgb_model.py +150 -34
  38. autogluon/tabular/models/lgb/lgb_utils.py +12 -4
  39. autogluon/tabular/models/lr/hyperparameters/searchspaces.py +5 -1
  40. autogluon/tabular/models/lr/lr_model.py +40 -10
  41. autogluon/tabular/models/lr/lr_rapids_model.py +22 -13
  42. autogluon/tabular/models/mitra/_internal/__init__.py +1 -1
  43. autogluon/tabular/models/mitra/_internal/config/__init__.py +1 -1
  44. autogluon/tabular/models/mitra/_internal/config/config_pretrain.py +36 -40
  45. autogluon/tabular/models/mitra/_internal/config/config_run.py +2 -14
  46. autogluon/tabular/models/mitra/_internal/config/enums.py +27 -26
  47. autogluon/tabular/models/mitra/_internal/core/__init__.py +1 -1
  48. autogluon/tabular/models/mitra/_internal/core/callbacks.py +14 -21
  49. autogluon/tabular/models/mitra/_internal/core/get_loss.py +10 -12
  50. autogluon/tabular/models/mitra/_internal/core/get_optimizer.py +17 -32
  51. autogluon/tabular/models/mitra/_internal/core/get_scheduler.py +12 -27
  52. autogluon/tabular/models/mitra/_internal/core/prediction_metrics.py +16 -21
  53. autogluon/tabular/models/mitra/_internal/core/trainer_finetune.py +130 -111
  54. autogluon/tabular/models/mitra/_internal/data/__init__.py +1 -1
  55. autogluon/tabular/models/mitra/_internal/data/collator.py +30 -26
  56. autogluon/tabular/models/mitra/_internal/data/dataset_finetune.py +18 -26
  57. autogluon/tabular/models/mitra/_internal/data/dataset_split.py +10 -7
  58. autogluon/tabular/models/mitra/_internal/data/preprocessor.py +70 -100
  59. autogluon/tabular/models/mitra/_internal/models/__init__.py +1 -1
  60. autogluon/tabular/models/mitra/_internal/models/base.py +7 -10
  61. autogluon/tabular/models/mitra/_internal/models/embedding.py +46 -56
  62. autogluon/tabular/models/mitra/_internal/models/tab2d.py +140 -120
  63. autogluon/tabular/models/mitra/_internal/utils/__init__.py +1 -1
  64. autogluon/tabular/models/mitra/_internal/utils/set_seed.py +3 -1
  65. autogluon/tabular/models/mitra/mitra_model.py +16 -11
  66. autogluon/tabular/models/mitra/sklearn_interface.py +178 -162
  67. autogluon/tabular/models/realmlp/realmlp_model.py +28 -15
  68. autogluon/tabular/models/rf/compilers/onnx.py +1 -1
  69. autogluon/tabular/models/rf/rf_model.py +45 -12
  70. autogluon/tabular/models/rf/rf_quantile.py +4 -2
  71. autogluon/tabular/models/tabdpt/tabdpt_model.py +8 -17
  72. autogluon/tabular/models/tabicl/tabicl_model.py +8 -1
  73. autogluon/tabular/models/tabm/_tabm_internal.py +6 -4
  74. autogluon/tabular/models/tabm/rtdl_num_embeddings.py +80 -127
  75. autogluon/tabular/models/tabm/tabm_model.py +8 -4
  76. autogluon/tabular/models/tabm/tabm_reference.py +53 -85
  77. autogluon/tabular/models/tabpfnmix/_internal/core/callbacks.py +7 -16
  78. autogluon/tabular/models/tabpfnmix/_internal/core/collator.py +16 -24
  79. autogluon/tabular/models/tabpfnmix/_internal/core/dataset_split.py +5 -7
  80. autogluon/tabular/models/tabpfnmix/_internal/core/enums.py +0 -2
  81. autogluon/tabular/models/tabpfnmix/_internal/core/get_loss.py +0 -1
  82. autogluon/tabular/models/tabpfnmix/_internal/core/get_optimizer.py +7 -18
  83. autogluon/tabular/models/tabpfnmix/_internal/core/get_scheduler.py +3 -14
  84. autogluon/tabular/models/tabpfnmix/_internal/core/trainer_finetune.py +79 -64
  85. autogluon/tabular/models/tabpfnmix/_internal/core/y_transformer.py +3 -5
  86. autogluon/tabular/models/tabpfnmix/_internal/data/dataset_finetune.py +17 -30
  87. autogluon/tabular/models/tabpfnmix/_internal/data/preprocessor.py +15 -35
  88. autogluon/tabular/models/tabpfnmix/_internal/models/foundation/embedding.py +21 -38
  89. autogluon/tabular/models/tabpfnmix/_internal/models/foundation/foundation_transformer.py +33 -51
  90. autogluon/tabular/models/tabpfnmix/_internal/results/prediction_metrics.py +4 -4
  91. autogluon/tabular/models/tabpfnmix/_internal/tabpfnmix_classifier.py +32 -12
  92. autogluon/tabular/models/tabpfnmix/_internal/tabpfnmix_regressor.py +32 -13
  93. autogluon/tabular/models/tabpfnmix/tabpfnmix_model.py +55 -19
  94. autogluon/tabular/models/tabpfnv2/tabpfnv2_5_model.py +21 -48
  95. autogluon/tabular/models/tabprep/prep_mixin.py +34 -26
  96. autogluon/tabular/models/tabular_nn/compilers/onnx.py +36 -8
  97. autogluon/tabular/models/tabular_nn/torch/tabular_nn_torch.py +130 -36
  98. autogluon/tabular/models/tabular_nn/torch/tabular_torch_dataset.py +8 -4
  99. autogluon/tabular/models/tabular_nn/torch/torch_network_modules.py +26 -5
  100. autogluon/tabular/models/tabular_nn/utils/categorical_encoders.py +41 -24
  101. autogluon/tabular/models/tabular_nn/utils/data_preprocessor.py +33 -8
  102. autogluon/tabular/models/tabular_nn/utils/nn_architecture_utils.py +21 -6
  103. autogluon/tabular/models/xgboost/callbacks.py +9 -3
  104. autogluon/tabular/models/xgboost/xgboost_model.py +59 -11
  105. autogluon/tabular/models/xt/xt_model.py +1 -0
  106. autogluon/tabular/predictor/interpretable_predictor.py +3 -1
  107. autogluon/tabular/predictor/predictor.py +409 -128
  108. autogluon/tabular/registry/__init__.py +1 -1
  109. autogluon/tabular/registry/_ag_model_registry.py +4 -5
  110. autogluon/tabular/registry/_model_registry.py +1 -0
  111. autogluon/tabular/testing/fit_helper.py +55 -15
  112. autogluon/tabular/testing/generate_datasets.py +1 -1
  113. autogluon/tabular/testing/model_fit_helper.py +10 -4
  114. autogluon/tabular/trainer/abstract_trainer.py +644 -230
  115. autogluon/tabular/trainer/auto_trainer.py +19 -8
  116. autogluon/tabular/trainer/model_presets/presets.py +33 -9
  117. autogluon/tabular/trainer/model_presets/presets_distill.py +16 -2
  118. autogluon/tabular/version.py +1 -1
  119. {autogluon_tabular-1.5.1b20260105.dist-info → autogluon_tabular-1.5.1b20260116.dist-info}/METADATA +26 -26
  120. {autogluon_tabular-1.5.1b20260105.dist-info → autogluon_tabular-1.5.1b20260116.dist-info}/RECORD +127 -135
  121. autogluon/tabular/models/tabpfnv2/rfpfn/__init__.py +0 -20
  122. autogluon/tabular/models/tabpfnv2/rfpfn/configs.py +0 -40
  123. autogluon/tabular/models/tabpfnv2/rfpfn/scoring_utils.py +0 -201
  124. autogluon/tabular/models/tabpfnv2/rfpfn/sklearn_based_decision_tree_tabpfn.py +0 -1464
  125. autogluon/tabular/models/tabpfnv2/rfpfn/sklearn_based_random_forest_tabpfn.py +0 -747
  126. autogluon/tabular/models/tabpfnv2/rfpfn/sklearn_compat.py +0 -863
  127. autogluon/tabular/models/tabpfnv2/rfpfn/utils.py +0 -106
  128. autogluon/tabular/models/tabpfnv2/tabpfnv2_model.py +0 -466
  129. /autogluon.tabular-1.5.1b20260105-py3.11-nspkg.pth → /autogluon.tabular-1.5.1b20260116-py3.11-nspkg.pth +0 -0
  130. {autogluon_tabular-1.5.1b20260105.dist-info → autogluon_tabular-1.5.1b20260116.dist-info}/WHEEL +0 -0
  131. {autogluon_tabular-1.5.1b20260105.dist-info → autogluon_tabular-1.5.1b20260116.dist-info}/licenses/LICENSE +0 -0
  132. {autogluon_tabular-1.5.1b20260105.dist-info → autogluon_tabular-1.5.1b20260116.dist-info}/licenses/NOTICE +0 -0
  133. {autogluon_tabular-1.5.1b20260105.dist-info → autogluon_tabular-1.5.1b20260116.dist-info}/namespace_packages.txt +0 -0
  134. {autogluon_tabular-1.5.1b20260105.dist-info → autogluon_tabular-1.5.1b20260116.dist-info}/top_level.txt +0 -0
  135. {autogluon_tabular-1.5.1b20260105.dist-info → autogluon_tabular-1.5.1b20260116.dist-info}/zip-safe +0 -0
@@ -4,13 +4,11 @@ import torch.nn as nn
4
4
 
5
5
 
6
6
  class FoundationEmbeddingX(torch.nn.Module):
7
-
8
7
  def __init__(
9
- self,
10
- dim: int,
11
- n_features: int,
12
- ) -> None:
13
-
8
+ self,
9
+ dim: int,
10
+ n_features: int,
11
+ ) -> None:
14
12
  super().__init__()
15
13
 
16
14
  self.dim = dim
@@ -18,9 +16,7 @@ class FoundationEmbeddingX(torch.nn.Module):
18
16
 
19
17
  self.x_embedding = nn.Linear(n_features, dim)
20
18
 
21
-
22
19
  def forward(self, x_support: torch.Tensor, x_query__: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
23
-
24
20
  batch_size = x_support.shape[0]
25
21
  n_obs_support = x_support.shape[1]
26
22
  n_obs_query__ = x_query__.shape[1]
@@ -32,53 +28,44 @@ class FoundationEmbeddingX(torch.nn.Module):
32
28
 
33
29
 
34
30
  class FoundationEmbeddingYFloat(torch.nn.Module):
35
-
36
31
  def __init__(
37
- self,
38
- dim: int,
39
- ) -> None:
40
-
32
+ self,
33
+ dim: int,
34
+ ) -> None:
41
35
  super().__init__()
42
36
 
43
37
  self.dim = dim
44
38
 
45
39
  self.y_embedding = nn.Linear(1, dim)
46
40
 
47
-
48
41
  def forward(self, y_support: torch.Tensor, n_obs_query: int) -> tuple[torch.Tensor, torch.Tensor]:
49
-
50
42
  batch_size = y_support.shape[0]
51
43
 
52
44
  y_support = y_support.type(torch.float32)
53
- y_support = einops.rearrange(y_support, 'b n -> b n 1')
45
+ y_support = einops.rearrange(y_support, "b n -> b n 1")
54
46
 
55
47
  y_support = self.y_embedding(y_support)
56
48
  y_query = torch.zeros((batch_size, n_obs_query, self.dim), device=y_support.device, dtype=torch.float32)
57
49
 
58
50
  return y_support, y_query
59
-
60
51
 
61
52
 
62
53
  class FoundationEmbeddingYInteger(torch.nn.Module):
63
-
64
54
  def __init__(
65
- self,
66
- n_classes: int,
67
- dim: int,
68
- ) -> None:
69
-
55
+ self,
56
+ n_classes: int,
57
+ dim: int,
58
+ ) -> None:
70
59
  super().__init__()
71
60
 
72
61
  self.n_classes = n_classes
73
62
  self.dim = dim
74
63
 
75
64
  self.y_embedding = nn.Embedding(n_classes, dim)
76
- self.y_padding = nn.Embedding(1, dim, padding_idx=0) # padding is modeled as a separate class
77
- self.y_mask = nn.Embedding(1, dim) # masking is also modeled as a separate class
78
-
65
+ self.y_padding = nn.Embedding(1, dim, padding_idx=0) # padding is modeled as a separate class
66
+ self.y_mask = nn.Embedding(1, dim) # masking is also modeled as a separate class
79
67
 
80
68
  def forward(self, y_support: torch.Tensor, n_obs_query: int) -> tuple[torch.Tensor, torch.Tensor]:
81
-
82
69
  batch_size = y_support.shape[0]
83
70
  n_obs_support = y_support.shape[1]
84
71
 
@@ -88,33 +75,29 @@ class FoundationEmbeddingYInteger(torch.nn.Module):
88
75
  y_support_pad = y_support == -100
89
76
 
90
77
  y_sup = torch.zeros((batch_size, n_obs_support, self.dim), device=y_support.device, dtype=torch.float32)
91
- y_sup[ y_support_pad] = self.y_padding( y_support[ y_support_pad] + 100 )
92
- y_sup[~y_support_pad] = self.y_embedding( y_support[~y_support_pad] )
78
+ y_sup[y_support_pad] = self.y_padding(y_support[y_support_pad] + 100)
79
+ y_sup[~y_support_pad] = self.y_embedding(y_support[~y_support_pad])
93
80
 
94
81
  y_query = torch.zeros((batch_size, n_obs_query), device=y_support.device, dtype=torch.int64)
95
82
  y_query = self.y_mask(y_query)
96
83
 
97
84
  return y_sup, y_query
98
-
99
85
 
100
- class FoundationObservationEmbedding(torch.nn.Module):
101
86
 
87
+ class FoundationObservationEmbedding(torch.nn.Module):
102
88
  def __init__(self, dim: int) -> None:
103
-
104
89
  super().__init__()
105
90
 
106
91
  self.dim = dim
107
92
  self.max_dim = 2**16
108
93
  self.embedding = nn.Embedding(self.max_dim, dim)
109
94
 
110
-
111
95
  def forward(self, batch_size: int, n_obs: int) -> torch.Tensor:
96
+ assert n_obs <= self.max_dim, f"Number of observations is too large. Max is {self.max_dim}, got {n_obs}"
112
97
 
113
- assert n_obs <= self.max_dim, f'Number of observations is too large. Max is {self.max_dim}, got {n_obs}'
114
-
115
- # Take a random embedding from the pool of embeddings
98
+ # Take a random embedding from the pool of embeddings
116
99
  weights = torch.ones((batch_size, self.max_dim), dtype=torch.float32, device=self.embedding.weight.device)
117
100
  indices = torch.multinomial(weights, num_samples=n_obs, replacement=False)
118
101
  x = self.embedding(indices)
119
-
120
- return x
102
+
103
+ return x
@@ -1,16 +1,14 @@
1
-
2
1
  import einops
3
2
  import torch
4
3
  import torch.nn as nn
5
4
  import torch.nn.functional as F
6
- from ...core.enums import Task
5
+ from huggingface_hub import PyTorchModelHubMixin
7
6
 
7
+ from ...core.enums import Task
8
8
  from .embedding import FoundationEmbeddingX, FoundationEmbeddingYFloat, FoundationEmbeddingYInteger
9
- from huggingface_hub import PyTorchModelHubMixin
10
9
 
11
10
 
12
11
  class FoundationTransformer(nn.Module, PyTorchModelHubMixin):
13
-
14
12
  def __init__(
15
13
  self,
16
14
  n_features: int,
@@ -22,7 +20,6 @@ class FoundationTransformer(nn.Module, PyTorchModelHubMixin):
22
20
  y_as_float_embedding: bool,
23
21
  task: str = Task.CLASSIFICATION,
24
22
  ) -> None:
25
-
26
23
  super().__init__()
27
24
 
28
25
  self.n_features = n_features
@@ -44,36 +41,34 @@ class FoundationTransformer(nn.Module, PyTorchModelHubMixin):
44
41
  self.layers = nn.ModuleList([])
45
42
 
46
43
  for _ in range(n_layers):
47
-
48
44
  att = MultiheadAttention(dim, n_heads)
49
45
 
50
- self.layers.append(nn.ModuleDict({
51
- 'layer_norm1': nn.LayerNorm(dim),
52
- 'attention': att,
53
- 'layer_norm2': nn.LayerNorm(dim),
54
- 'linear1': nn.Linear(dim, dim*4),
55
- 'linear2': nn.Linear(dim*4, dim),
56
- }))
57
-
58
- self.final_layer1 = nn.Linear(dim, dim*4)
46
+ self.layers.append(
47
+ nn.ModuleDict(
48
+ {
49
+ "layer_norm1": nn.LayerNorm(dim),
50
+ "attention": att,
51
+ "layer_norm2": nn.LayerNorm(dim),
52
+ "linear1": nn.Linear(dim, dim * 4),
53
+ "linear2": nn.Linear(dim * 4, dim),
54
+ }
55
+ )
56
+ )
57
+
58
+ self.final_layer1 = nn.Linear(dim, dim * 4)
59
59
  if self.task == Task.CLASSIFICATION:
60
- self.final_layer2 = nn.Linear(dim*4, n_classes)
60
+ self.final_layer2 = nn.Linear(dim * 4, n_classes)
61
61
  elif self.task == Task.REGRESSION:
62
- self.final_layer2 = nn.Linear(dim*4, 1)
62
+ self.final_layer2 = nn.Linear(dim * 4, 1)
63
63
  self.init_weights()
64
64
 
65
-
66
65
  def init_weights(self):
67
-
68
66
  for module_dict in self.layers:
69
-
70
67
  # module_dict['attention'].init_weights()
71
- nn.init.zeros_(module_dict['linear2'].weight)
72
- nn.init.zeros_(module_dict['linear2'].bias)
73
-
68
+ nn.init.zeros_(module_dict["linear2"].weight)
69
+ nn.init.zeros_(module_dict["linear2"].bias)
74
70
 
75
71
  def forward(self, x_support: torch.Tensor, y_support: torch.Tensor, x_query: torch.Tensor):
76
-
77
72
  """
78
73
  x_support is (batch_size, n_observations_support, n_features)
79
74
  y_support is (batch_size, n_observations_support)
@@ -106,38 +101,34 @@ class FoundationTransformer(nn.Module, PyTorchModelHubMixin):
106
101
  support = x_support + y_support
107
102
  query__ = x_query__ + y_query__
108
103
 
109
- x, pack = einops.pack((support, query__), 'b * d')
110
-
111
- for module_dict in self.layers:
104
+ x, pack = einops.pack((support, query__), "b * d")
112
105
 
106
+ for module_dict in self.layers:
113
107
  x_residual = x
114
- support, query__ = einops.unpack(x, pack, 'b * d')
115
- att_support = module_dict['attention'](support, support, support, key_padding_mask=padding_mask)
116
- att_query__ = module_dict['attention'](query__, support, support, key_padding_mask=padding_mask)
117
- x = einops.pack((att_support, att_query__), 'b * d')[0]
108
+ support, query__ = einops.unpack(x, pack, "b * d")
109
+ att_support = module_dict["attention"](support, support, support, key_padding_mask=padding_mask)
110
+ att_query__ = module_dict["attention"](query__, support, support, key_padding_mask=padding_mask)
111
+ x = einops.pack((att_support, att_query__), "b * d")[0]
118
112
  x = x_residual + x
119
- x = module_dict['layer_norm1'](x)
113
+ x = module_dict["layer_norm1"](x)
120
114
  x_residual = x
121
- x = module_dict['linear1'](x)
115
+ x = module_dict["linear1"](x)
122
116
  x = torch.nn.functional.gelu(x)
123
- x = module_dict['linear2'](x)
117
+ x = module_dict["linear2"](x)
124
118
  x = x_residual + x
125
- x = module_dict['layer_norm2'](x)
119
+ x = module_dict["layer_norm2"](x)
126
120
 
127
121
  x = self.final_layer1(x)
128
122
  x = F.gelu(x)
129
123
  x = self.final_layer2(x)
130
124
 
131
- support, query__ = einops.unpack(x, pack, 'b * c')
125
+ support, query__ = einops.unpack(x, pack, "b * c")
132
126
 
133
127
  return query__
134
128
 
135
129
 
136
-
137
130
  class MultiheadAttention(torch.nn.Module):
138
-
139
131
  def __init__(self, dim: int, n_heads: int) -> None:
140
-
141
132
  super().__init__()
142
133
 
143
134
  self.use_flash_attention = False
@@ -146,21 +137,14 @@ class MultiheadAttention(torch.nn.Module):
146
137
 
147
138
  self.att = nn.MultiheadAttention(dim, n_heads, dropout=0.0, batch_first=True)
148
139
 
149
-
150
-
151
140
  def init_weights(self):
152
141
  pass
153
142
  # nn.init.zeros_(self.att.out_proj.weight)
154
143
  # nn.init.zeros_(self.att.out_proj.bias)
155
144
 
156
-
157
145
  def forward(
158
- self,
159
- query: torch.Tensor,
160
- key: torch.Tensor,
161
- value: torch.Tensor,
162
- key_padding_mask: torch.Tensor
163
- ) -> torch.Tensor:
146
+ self, query: torch.Tensor, key: torch.Tensor, value: torch.Tensor, key_padding_mask: torch.Tensor
147
+ ) -> torch.Tensor:
164
148
  """
165
149
  b = batch size
166
150
  n = number of samples (dataset size)
@@ -179,9 +163,7 @@ class MultiheadAttention(torch.nn.Module):
179
163
  return output
180
164
 
181
165
 
182
-
183
-
184
166
  class SwiGLU(nn.Module):
185
167
  def forward(self, x):
186
168
  x, gate = x.chunk(2, dim=-1)
187
- return F.silu(gate) * x
169
+ return F.silu(gate) * x
@@ -17,21 +17,21 @@ class PredictionMetrics:
17
17
 
18
18
  @classmethod
19
19
  def from_prediction(cls, y_pred: np.ndarray, y_true: np.ndarray, task: Task, metric: Scorer):
20
-
21
20
  loss, score, metrics = compute_metrics(y_pred, y_true, task, metric=metric)
22
21
 
23
22
  return PredictionMetrics(task=task, loss=loss, score=score, metrics=metrics)
24
23
 
25
24
 
26
25
  def compute_metrics(y_pred: np.ndarray, y_true: np.ndarray, task: Task, metric: Scorer) -> tuple[float, float, dict]:
27
-
28
26
  if task == Task.CLASSIFICATION:
29
27
  return compute_classification_metrics(y_pred, y_true, metric=metric)
30
28
  else:
31
29
  return compute_regression_metrics(y_pred, y_true, metric=metric)
32
-
33
30
 
34
- def compute_classification_metrics(y_pred: np.ndarray, y_true: np.ndarray, metric: Scorer) -> tuple[float, float, dict]:
31
+
32
+ def compute_classification_metrics(
33
+ y_pred: np.ndarray, y_true: np.ndarray, metric: Scorer
34
+ ) -> tuple[float, float, dict]:
35
35
  # predictions are assumed to be log-probabilities
36
36
 
37
37
  if metric.needs_pred or metric.needs_class:
@@ -3,8 +3,8 @@ from __future__ import annotations
3
3
  from pathlib import Path
4
4
 
5
5
  import numpy as np
6
- from sklearn.base import BaseEstimator, ClassifierMixin
7
6
  import torch
7
+ from sklearn.base import BaseEstimator, ClassifierMixin
8
8
 
9
9
  from .core.dataset_split import make_stratified_dataset_split
10
10
  from .core.trainer_finetune import TrainerFinetune
@@ -16,32 +16,52 @@ from .models.foundation.foundation_transformer import FoundationTransformer
16
16
  # TODO: To mitigate val overfitting, can fit multiple random seeds at same time and pick same epoch for all of them, track average performance on epoch.
17
17
  # TODO: Test shuffling the data and see if it makes TabPFNv2 worse, same with TabForestPFN
18
18
  class TabPFNMixClassifier(BaseEstimator, ClassifierMixin):
19
- def __init__(self, n_classes, cfg, split_val, model_path: str = None, weights_path: str | Path = None, stopping_metric=None, use_best_epoch: bool = True):
19
+ def __init__(
20
+ self,
21
+ n_classes,
22
+ cfg,
23
+ split_val,
24
+ model_path: str = None,
25
+ weights_path: str | Path = None,
26
+ stopping_metric=None,
27
+ use_best_epoch: bool = True,
28
+ ):
20
29
  if weights_path is not None:
21
30
  weights_path = str(Path(weights_path))
22
31
 
23
32
  if model_path is not None:
24
33
  model = FoundationTransformer.from_pretrained(model_path)
25
- assert model.task == cfg.task, f"The pretrained model '{model_path}' is for task {model.task}, but the problem type is for task {cfg.task}..."
34
+ assert model.task == cfg.task, (
35
+ f"The pretrained model '{model_path}' is for task {model.task}, but the problem type is for task {cfg.task}..."
36
+ )
26
37
  else:
27
38
  model = FoundationTransformer(
28
- n_features=cfg.hyperparams['n_features'],
29
- n_classes=cfg.hyperparams['n_classes'],
30
- dim=cfg.hyperparams['dim'],
31
- n_layers=cfg.hyperparams['n_layers'],
32
- n_heads=cfg.hyperparams['n_heads'],
33
- attn_dropout=cfg.hyperparams['attn_dropout'],
34
- y_as_float_embedding=cfg.hyperparams['y_as_float_embedding'],
39
+ n_features=cfg.hyperparams["n_features"],
40
+ n_classes=cfg.hyperparams["n_classes"],
41
+ dim=cfg.hyperparams["dim"],
42
+ n_layers=cfg.hyperparams["n_layers"],
43
+ n_heads=cfg.hyperparams["n_heads"],
44
+ attn_dropout=cfg.hyperparams["attn_dropout"],
45
+ y_as_float_embedding=cfg.hyperparams["y_as_float_embedding"],
35
46
  task=cfg.task,
36
47
  )
37
48
  if weights_path is not None:
38
49
  model.load_state_dict(torch.load(weights_path, weights_only=True)) # nosec B614
39
50
 
40
51
  self.split_val = split_val
41
- self.trainer = TrainerFinetune(cfg, model, n_classes=n_classes, stopping_metric=stopping_metric, use_best_epoch=use_best_epoch)
52
+ self.trainer = TrainerFinetune(
53
+ cfg, model, n_classes=n_classes, stopping_metric=stopping_metric, use_best_epoch=use_best_epoch
54
+ )
42
55
  super().__init__()
43
56
 
44
- def fit(self, X: np.ndarray, y: np.ndarray, X_val: np.ndarray = None, y_val: np.ndarray = None, time_limit: float = None):
57
+ def fit(
58
+ self,
59
+ X: np.ndarray,
60
+ y: np.ndarray,
61
+ X_val: np.ndarray = None,
62
+ y_val: np.ndarray = None,
63
+ time_limit: float = None,
64
+ ):
45
65
  # FIXME: Should X and y be preprocessed for inference efficiency? Yes.
46
66
  self.X_ = X # FIXME: Optimize storage of X and y? Is this redundant? Is X and y saving done multiple times during pickle?
47
67
  self.y_ = y
@@ -3,8 +3,8 @@ from __future__ import annotations
3
3
  from pathlib import Path
4
4
 
5
5
  import numpy as np
6
- from sklearn.base import BaseEstimator, RegressorMixin
7
6
  import torch
7
+ from sklearn.base import BaseEstimator, RegressorMixin
8
8
 
9
9
  from .core.dataset_split import make_stratified_dataset_split
10
10
  from .core.trainer_finetune import TrainerFinetune
@@ -16,8 +16,16 @@ from .models.foundation.foundation_transformer import FoundationTransformer
16
16
  # TODO: To mitigate val overfitting, can fit multiple random seeds at same time and pick same epoch for all of them, track average performance on epoch.
17
17
  # TODO: Test shuffling the data and see if it makes TabPFNv2 worse, same with TabForestPFN
18
18
  class TabPFNMixRegressor(BaseEstimator, RegressorMixin):
19
- def __init__(self, n_classes, cfg, split_val, model_path: str = None, weights_path: str | Path = None, stopping_metric=None, use_best_epoch: bool = True):
20
-
19
+ def __init__(
20
+ self,
21
+ n_classes,
22
+ cfg,
23
+ split_val,
24
+ model_path: str = None,
25
+ weights_path: str | Path = None,
26
+ stopping_metric=None,
27
+ use_best_epoch: bool = True,
28
+ ):
21
29
  self.cfg = cfg
22
30
 
23
31
  if weights_path is not None:
@@ -25,26 +33,37 @@ class TabPFNMixRegressor(BaseEstimator, RegressorMixin):
25
33
 
26
34
  if model_path is not None:
27
35
  model = FoundationTransformer.from_pretrained(model_path)
28
- assert model.task == cfg.task, f"The pretrained model '{model_path}' is for task {model.task}, but the problem type is for task {cfg.task}..."
36
+ assert model.task == cfg.task, (
37
+ f"The pretrained model '{model_path}' is for task {model.task}, but the problem type is for task {cfg.task}..."
38
+ )
29
39
  else:
30
40
  model = FoundationTransformer(
31
- n_features=cfg.hyperparams['n_features'],
32
- n_classes=cfg.hyperparams['n_classes'],
33
- dim=cfg.hyperparams['dim'],
34
- n_layers=cfg.hyperparams['n_layers'],
35
- n_heads=cfg.hyperparams['n_heads'],
36
- attn_dropout=cfg.hyperparams['attn_dropout'],
37
- y_as_float_embedding=cfg.hyperparams['y_as_float_embedding'],
41
+ n_features=cfg.hyperparams["n_features"],
42
+ n_classes=cfg.hyperparams["n_classes"],
43
+ dim=cfg.hyperparams["dim"],
44
+ n_layers=cfg.hyperparams["n_layers"],
45
+ n_heads=cfg.hyperparams["n_heads"],
46
+ attn_dropout=cfg.hyperparams["attn_dropout"],
47
+ y_as_float_embedding=cfg.hyperparams["y_as_float_embedding"],
38
48
  task=cfg.task,
39
49
  )
40
50
  if weights_path is not None:
41
51
  model.load_state_dict(torch.load(weights_path, weights_only=True)) # nosec B614
42
52
 
43
53
  self.split_val = split_val
44
- self.trainer = TrainerFinetune(cfg, model, n_classes=n_classes, stopping_metric=stopping_metric, use_best_epoch=use_best_epoch)
54
+ self.trainer = TrainerFinetune(
55
+ cfg, model, n_classes=n_classes, stopping_metric=stopping_metric, use_best_epoch=use_best_epoch
56
+ )
45
57
  super().__init__()
46
58
 
47
- def fit(self, X: np.ndarray, y: np.ndarray, X_val: np.ndarray = None, y_val: np.ndarray = None, time_limit: float = None):
59
+ def fit(
60
+ self,
61
+ X: np.ndarray,
62
+ y: np.ndarray,
63
+ X_val: np.ndarray = None,
64
+ y_val: np.ndarray = None,
65
+ time_limit: float = None,
66
+ ):
48
67
  # FIXME: Should X and y be preprocessed for inference efficiency? Yes.
49
68
  self.X_ = X # FIXME: Optimize storage of X and y? Is this redundant? Is X and y saving done multiple times during pickle?
50
69
  self.y_ = y