autogluon.tabular 1.2.1b20250407__py3-none-any.whl → 1.2.1b20250409__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (22) hide show
  1. autogluon/tabular/register/_ag_model_register.py +0 -2
  2. autogluon/tabular/version.py +1 -1
  3. {autogluon.tabular-1.2.1b20250407.dist-info → autogluon.tabular-1.2.1b20250409.dist-info}/METADATA +13 -13
  4. {autogluon.tabular-1.2.1b20250407.dist-info → autogluon.tabular-1.2.1b20250409.dist-info}/RECORD +11 -22
  5. autogluon/tabular/models/tab_transformer/__init__.py +0 -1
  6. autogluon/tabular/models/tab_transformer/hyperparameters/__init__.py +0 -1
  7. autogluon/tabular/models/tab_transformer/hyperparameters/parameters.py +0 -66
  8. autogluon/tabular/models/tab_transformer/hyperparameters/searchspaces.py +0 -17
  9. autogluon/tabular/models/tab_transformer/modified_transformer.py +0 -494
  10. autogluon/tabular/models/tab_transformer/pretexts.py +0 -150
  11. autogluon/tabular/models/tab_transformer/tab_model_base.py +0 -86
  12. autogluon/tabular/models/tab_transformer/tab_transformer.py +0 -183
  13. autogluon/tabular/models/tab_transformer/tab_transformer_encoder.py +0 -668
  14. autogluon/tabular/models/tab_transformer/tab_transformer_model.py +0 -540
  15. autogluon/tabular/models/tab_transformer/utils.py +0 -124
  16. /autogluon.tabular-1.2.1b20250407-py3.9-nspkg.pth → /autogluon.tabular-1.2.1b20250409-py3.9-nspkg.pth +0 -0
  17. {autogluon.tabular-1.2.1b20250407.dist-info → autogluon.tabular-1.2.1b20250409.dist-info}/LICENSE +0 -0
  18. {autogluon.tabular-1.2.1b20250407.dist-info → autogluon.tabular-1.2.1b20250409.dist-info}/NOTICE +0 -0
  19. {autogluon.tabular-1.2.1b20250407.dist-info → autogluon.tabular-1.2.1b20250409.dist-info}/WHEEL +0 -0
  20. {autogluon.tabular-1.2.1b20250407.dist-info → autogluon.tabular-1.2.1b20250409.dist-info}/namespace_packages.txt +0 -0
  21. {autogluon.tabular-1.2.1b20250407.dist-info → autogluon.tabular-1.2.1b20250409.dist-info}/top_level.txt +0 -0
  22. {autogluon.tabular-1.2.1b20250407.dist-info → autogluon.tabular-1.2.1b20250409.dist-info}/zip-safe +0 -0
@@ -1,86 +0,0 @@
1
- import torch.nn as nn
2
-
3
-
4
- class TabNet(nn.Module):
5
- def __init__(self, num_class, feature_dim, num_output_layers, device, params):
6
- """
7
- Internal torch model that uses TabTransformer as an embedding.
8
- This is where we are passing through activations and neurons.
9
-
10
- Parameters
11
- ----------
12
- num_class (int): Number of classes identified.
13
- cat_feat_origin_cards (list): List of categorical features
14
- """
15
- super().__init__()
16
- import torch.nn as nn
17
-
18
- from .tab_transformer import TabTransformer
19
-
20
- self.embed = TabTransformer(**params)
21
-
22
- relu = nn.ReLU()
23
- in_dim = 2 * feature_dim
24
- lin = nn.Linear(in_dim, in_dim, bias=True)
25
- lin_out = nn.Linear(in_dim, num_class, bias=True)
26
- self.fc = [nn.Sequential(*[relu, lin])] * (num_output_layers - 1) + [nn.Sequential(*[relu, lin_out])]
27
-
28
- # Each individual layer inside needs to be put into the GPU.
29
- # Calling "self.model.cuda()" (TabTransformer:get_model()) will not put a python list into GPU.
30
- if device.type == "cuda":
31
- for layer in range(num_output_layers):
32
- self.fc[layer] = self.fc[layer].cuda()
33
-
34
- def forward(self, data):
35
- features = self.embed(data)
36
- out = features.mean(dim=1)
37
- for layer in range(len(self.fc)):
38
- out = self.fc[layer](out)
39
- return out, features
40
-
41
-
42
- class TabModelBase(nn.Module):
43
- def __init__(self, n_cont_features, norm_class_name, cat_feat_origin_cards, max_emb_dim, p_dropout, one_hot_embeddings, drop_whole_embeddings):
44
- super().__init__()
45
- """
46
- Base class for all TabTransformer models
47
-
48
- Parameters
49
- ----------
50
- max_emb_dim (int): Maximum allowable amount of embeddings.
51
- n_cont_features (int): How many continuous features to concatenate onto the categorical features.
52
- cat_feat_origin_cards (list): Categorical features to turn into embeddings.
53
- norm_class: What normalization to use for continuous features.
54
- p_dropout (float): How much dropout to apply.
55
- drop_whole_embeddings (bool): If True, dropout pretends the embedding was a missing value. If false, dropout sets embed features to 0
56
- one_hot_embeddings (bool): If True, one-hot encode variables whose cardinality is < max_emb_dim.
57
- cat_initializers (dict): Structure to hold the initial embeddings for categorical features.
58
- """
59
- self.max_emb_dim = max_emb_dim
60
- self.n_cont_features = n_cont_features
61
- self.cat_feat_origin_cards = cat_feat_origin_cards
62
- self.norm_class = nn.__dict__[norm_class_name]
63
-
64
- self.p_dropout = p_dropout
65
- self.drop_whole_embeddings = drop_whole_embeddings
66
- self.one_hot_embeddings = one_hot_embeddings
67
-
68
- self.cat_initializers = nn.ModuleDict()
69
-
70
- from .tab_transformer_encoder import EmbeddingInitializer
71
-
72
- if isinstance(self.cat_feat_origin_cards, list):
73
- for col_name, card in self.cat_feat_origin_cards:
74
- self.cat_initializers[col_name] = EmbeddingInitializer(
75
- card, max_emb_dim, p_dropout, drop_whole_embeddings=drop_whole_embeddings, one_hot=one_hot_embeddings
76
- )
77
- self.init_feat_dim = sum(i.emb_dim for i in self.cat_initializers.values()) + self.n_cont_features
78
-
79
- def forward(self, input):
80
- raise NotImplementedError
81
-
82
- def get_norm(self, num_feats):
83
- return self.norm_class(num_feats)
84
-
85
- def pred_from_output(self, output):
86
- return output.max(dim=1, keepdim=True)[1]
@@ -1,183 +0,0 @@
1
- # Code in this script based on work by Milan Cvitkovic,
2
- # Xin Huang, Ashish Khetan and Zohar Karnin.
3
-
4
- import copy
5
-
6
- import torch
7
- import torch.nn as nn
8
-
9
- from .tab_model_base import TabModelBase
10
-
11
-
12
- class TabTransformer(TabModelBase):
13
- """
14
- Transformer model for tabular data, can also be used for semi-supervised learning.
15
- This is the internal transformer model embedding that will have further fully connected layers (TabNet) to
16
- actually produce predictions.
17
- """
18
-
19
- def __init__(
20
- self,
21
- n_cont_embeddings,
22
- n_layers,
23
- n_heads,
24
- hidden_dim,
25
- tab_readout,
26
- column_embedding,
27
- orig_emb_resid,
28
- fix_attention,
29
- n_shared_embs=8,
30
- shared_embedding_added=False,
31
- **kwargs,
32
- ):
33
- super().__init__(
34
- n_cont_features=kwargs["n_cont_features"],
35
- norm_class_name=kwargs["norm_class_name"],
36
- cat_feat_origin_cards=kwargs["cat_feat_origin_cards"],
37
- max_emb_dim=kwargs["max_emb_dim"],
38
- p_dropout=kwargs["p_dropout"],
39
- one_hot_embeddings=kwargs["one_hot_embeddings"],
40
- drop_whole_embeddings=kwargs["drop_whole_embeddings"],
41
- )
42
-
43
- from .modified_transformer import TransformerEncoderLayerModified
44
-
45
- self.n_cont_embeddings = n_cont_embeddings
46
- self.hidden_dim = hidden_dim
47
- self.readout = tab_readout
48
- self.orig_emb_resid = orig_emb_resid
49
-
50
- # Overwriting some TabModelBase options
51
-
52
- self.cat_initializers = nn.ModuleDict()
53
- if isinstance(self.cat_feat_origin_cards, list):
54
- self.n_embeddings = len(self.cat_feat_origin_cards) + (n_cont_embeddings if self.n_cont_features else 0)
55
- else:
56
- self.n_embeddings = None
57
-
58
- self.cat_initializers = nn.ModuleDict()
59
-
60
- from .tab_transformer_encoder import EmbeddingInitializer
61
-
62
- for col_name, card in self.cat_feat_origin_cards:
63
- self.cat_initializers[col_name] = EmbeddingInitializer(
64
- num_embeddings=card,
65
- max_emb_dim=self.max_emb_dim,
66
- p_dropout=self.p_dropout,
67
- minimize_emb_dim=False,
68
- drop_whole_embeddings=self.drop_whole_embeddings,
69
- one_hot=False,
70
- out_dim=self.hidden_dim,
71
- shared_embedding=column_embedding,
72
- n_shared_embs=n_shared_embs,
73
- shared_embedding_added=shared_embedding_added,
74
- )
75
- if self.n_cont_features:
76
- self.cont_norm = self.get_norm(self.n_cont_features)
77
- self.cont_initializer = nn.Linear(self.n_cont_features, hidden_dim * n_cont_embeddings)
78
- self.cont_init_norm = self.get_norm(hidden_dim * n_cont_embeddings)
79
-
80
- if self.readout == "readout_emb":
81
- self.readout_emb = nn.Parameter(torch.zeros(1, hidden_dim).uniform_(-1, 1)) # We do the readout from a learned embedding
82
- self.n_embeddings += 1
83
-
84
- if fix_attention is True:
85
- self.n_cat_embeddings = len(self.cat_feat_origin_cards)
86
- self.tfmr_layers = nn.ModuleList(
87
- [
88
- TransformerEncoderLayerModified(
89
- d_model=hidden_dim,
90
- n_cat_embeddings=self.n_cat_embeddings,
91
- nhead=n_heads,
92
- dim_feedforward=4 * hidden_dim,
93
- dropout=self.p_dropout,
94
- activation="gelu",
95
- )
96
- for _ in range(n_layers)
97
- ]
98
- )
99
- else:
100
- self.tfmr_layers = nn.ModuleList(
101
- [
102
- nn.TransformerEncoderLayer(d_model=hidden_dim, nhead=n_heads, dim_feedforward=4 * hidden_dim, dropout=self.p_dropout, activation="gelu")
103
- for _ in range(n_layers)
104
- ]
105
- )
106
-
107
- def init_input(self, input):
108
- feats = [init(input[:, i]) for i, init in enumerate(self.cat_initializers.values())]
109
-
110
- if self.readout == "readout_emb":
111
- readout_emb = self.readout_emb.expand_as(feats[0])
112
- feat_embs = torch.stack([readout_emb] + feats, dim=0) # (n_feat_embeddings + 1) x batch x hidden_dim
113
- else:
114
- feat_embs = torch.stack(feats, dim=0) # n_feat_embeddings x batch x hidden_dim
115
- return feat_embs
116
-
117
- def run_tfmr(self, feat_embs):
118
- orig_feat_embs = feat_embs
119
- all_feat_embs = [feat_embs]
120
- for layer in self.tfmr_layers:
121
- feat_embs = layer(feat_embs)
122
- all_feat_embs.append(feat_embs)
123
- if self.orig_emb_resid:
124
- feat_embs = feat_embs + orig_feat_embs
125
-
126
- if self.readout == "readout_emb":
127
- out = self.fc_out(feat_embs[0])
128
- elif self.readout == "mean":
129
- out = torch.mean(feat_embs, dim=0)
130
- out = self.fc_out(out)
131
- elif self.readout == "concat_pool":
132
- all_feat_embs = torch.cat(all_feat_embs, dim=0)
133
-
134
- max = all_feat_embs.max(dim=0).values
135
- mean = all_feat_embs.mean(dim=0)
136
- last_layer = feat_embs.transpose(0, 1).reshape(feat_embs.shape[1], -1)
137
- out = torch.cat((last_layer, max, mean), dim=1)
138
- out = self.fc_out(out)
139
- elif self.readout == "concat_pool_all":
140
- feat_embs_all_layers = []
141
- for each_feat_embs in [all_feat_embs[0], all_feat_embs[-1]]:
142
- feat_embs_all_layers.append(each_feat_embs.transpose(0, 1).reshape(each_feat_embs.shape[1], -1))
143
-
144
- all_feat_embs = torch.cat(all_feat_embs, dim=0)
145
- max = all_feat_embs.max(dim=0).values
146
- mean = all_feat_embs.mean(dim=0)
147
-
148
- feat_embs_all_layers.append(max)
149
- feat_embs_all_layers.append(mean)
150
- out = torch.cat(feat_embs_all_layers, dim=1)
151
- out = self.fc_out(out)
152
- elif self.readout == "concat_pool_add":
153
- orig_feat_embs_cp = copy.deepcopy(orig_feat_embs.detach())
154
- # ce_dim = orig_feat_embs_cp.shape[-1]//8
155
- # orig_feat_embs_cp[:, :, ce_dim:] = 0
156
-
157
- last_layer = feat_embs.transpose(0, 1).reshape(feat_embs.shape[1], -1)
158
- last_layer += orig_feat_embs_cp.transpose(0, 1).reshape(orig_feat_embs_cp.shape[1], -1)
159
-
160
- all_feat_embs = torch.cat(all_feat_embs, dim=0)
161
- max = all_feat_embs.max(dim=0).values
162
- mean = all_feat_embs.mean(dim=0)
163
-
164
- out = torch.cat([last_layer, max, mean], dim=1)
165
-
166
- elif self.readout == "all_feat_embs":
167
- out = feat_embs
168
-
169
- elif self.readout == "mean_feat_embs":
170
- out = feat_embs.mean(dim=0)
171
-
172
- elif self.readout == "none":
173
- out = feat_embs.transpose(1, 0)
174
-
175
- return out
176
-
177
- def forward(self, input):
178
- """
179
- Returns logits for output classes
180
- """
181
- feat_embs = self.init_input(input)
182
- out = self.run_tfmr(feat_embs)
183
- return out