masamlp 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- masamlp-0.1.0/.gitignore +12 -0
- masamlp-0.1.0/CHANGELOG.md +46 -0
- masamlp-0.1.0/LICENSE +21 -0
- masamlp-0.1.0/PKG-INFO +201 -0
- masamlp-0.1.0/README.md +169 -0
- masamlp-0.1.0/benchmarks/README.md +23 -0
- masamlp-0.1.0/pyproject.toml +59 -0
- masamlp-0.1.0/src/masamlp/__init__.py +44 -0
- masamlp-0.1.0/src/masamlp/classifier.py +168 -0
- masamlp-0.1.0/src/masamlp/core/__init__.py +14 -0
- masamlp-0.1.0/src/masamlp/core/device.py +86 -0
- masamlp-0.1.0/src/masamlp/core/ensemble.py +272 -0
- masamlp-0.1.0/src/masamlp/core/metrics.py +176 -0
- masamlp-0.1.0/src/masamlp/core/objectives.py +330 -0
- masamlp-0.1.0/src/masamlp/core/serialization.py +179 -0
- masamlp-0.1.0/src/masamlp/core/trainer.py +379 -0
- masamlp-0.1.0/src/masamlp/data/__init__.py +4 -0
- masamlp-0.1.0/src/masamlp/data/dataset.py +44 -0
- masamlp-0.1.0/src/masamlp/data/preprocessing.py +320 -0
- masamlp-0.1.0/src/masamlp/models/__init__.py +128 -0
- masamlp-0.1.0/src/masamlp/models/base.py +299 -0
- masamlp-0.1.0/src/masamlp/models/danet.py +113 -0
- masamlp-0.1.0/src/masamlp/models/ft_transformer.py +117 -0
- masamlp-0.1.0/src/masamlp/models/gandalf.py +126 -0
- masamlp-0.1.0/src/masamlp/models/grn.py +57 -0
- masamlp-0.1.0/src/masamlp/models/layers.py +151 -0
- masamlp-0.1.0/src/masamlp/models/lnn.py +73 -0
- masamlp-0.1.0/src/masamlp/models/modernnca.py +143 -0
- masamlp-0.1.0/src/masamlp/models/realmlp.py +165 -0
- masamlp-0.1.0/src/masamlp/models/resnet.py +57 -0
- masamlp-0.1.0/src/masamlp/models/tab_transformer.py +90 -0
- masamlp-0.1.0/src/masamlp/models/tabr.py +180 -0
- masamlp-0.1.0/src/masamlp/presets.py +93 -0
- masamlp-0.1.0/src/masamlp/py.typed +0 -0
- masamlp-0.1.0/src/masamlp/regressor.py +158 -0
- masamlp-0.1.0/src/masamlp/sklearn.py +361 -0
- masamlp-0.1.0/src/masamlp/utils/__init__.py +4 -0
- masamlp-0.1.0/src/masamlp/utils/random.py +17 -0
- masamlp-0.1.0/src/masamlp/utils/validation.py +46 -0
- masamlp-0.1.0/tests/conftest.py +82 -0
- masamlp-0.1.0/tests/test_custom_metric.py +55 -0
- masamlp-0.1.0/tests/test_custom_objective.py +112 -0
- masamlp-0.1.0/tests/test_device.py +80 -0
- masamlp-0.1.0/tests/test_embeddings.py +64 -0
- masamlp-0.1.0/tests/test_ensemble.py +141 -0
- masamlp-0.1.0/tests/test_estimators.py +137 -0
- masamlp-0.1.0/tests/test_gandalf_grn.py +108 -0
- masamlp-0.1.0/tests/test_ghost_batchnorm.py +84 -0
- masamlp-0.1.0/tests/test_metrics.py +64 -0
- masamlp-0.1.0/tests/test_models.py +133 -0
- masamlp-0.1.0/tests/test_modernnca.py +84 -0
- masamlp-0.1.0/tests/test_objectives.py +80 -0
- masamlp-0.1.0/tests/test_preprocessing.py +135 -0
- masamlp-0.1.0/tests/test_realmlp.py +173 -0
- masamlp-0.1.0/tests/test_sample_weight.py +87 -0
- masamlp-0.1.0/tests/test_serialization.py +61 -0
- masamlp-0.1.0/tests/test_sklearn_compat.py +57 -0
- masamlp-0.1.0/tests/test_tabr.py +63 -0
- masamlp-0.1.0/tests/test_trainer.py +89 -0
- masamlp-0.1.0/tests/test_transformers.py +74 -0
masamlp-0.1.0/.gitignore
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
## 0.1.0 (2026-07-02)
|
|
4
|
+
|
|
5
|
+
Initial release.
|
|
6
|
+
|
|
7
|
+
- `MasaRegressor` / `MasaClassifier` sklearn-compatible estimators with
|
|
8
|
+
`fit(X, y, sample_weight=..., eval_set=...)`, early stopping on any metric,
|
|
9
|
+
and directory-format save/load.
|
|
10
|
+
- Models: `resnet` and `ft_transformer` (Gorishniy et al. 2021), `realmlp`
|
|
11
|
+
(Holzmüller et al. 2024, TD-S architecture with the full training recipe
|
|
12
|
+
in `masamlp.realmlp_params`), `tab_transformer` (Huang et al. 2020),
|
|
13
|
+
`danet` (Chen et al. AAAI 2022), `tabr` (retrieval-augmented, Gorishniy
|
|
14
|
+
et al. 2023), `modernnca` (Ye et al. 2024, soft-nearest-neighbor),
|
|
15
|
+
`gandalf` (Joseph & Raj 2022, GFLU with t-softmax feature masks),
|
|
16
|
+
`grn` (stacked TFT Gated Residual Networks), and `lnn` (experimental
|
|
17
|
+
CfC-based liquid network for static tabular data), plus a
|
|
18
|
+
`register_model` hook for custom architectures (token-based models via
|
|
19
|
+
`embedding_kind = "tokens"`).
|
|
20
|
+
- `n_ens` seed ensembling on both estimators (pytabkit semantics: members
|
|
21
|
+
seeded `random_state + i`, predictions averaged on the transformed scale);
|
|
22
|
+
save/load stores all members. `ens_mode="vectorized"` trains all members
|
|
23
|
+
in one vmapped forward/backward (torch.func) for BatchNorm-free models,
|
|
24
|
+
with per-member best-epoch tracking.
|
|
25
|
+
- Full RealMLP-TD recipe via `masamlp.realmlp_td_params(task)`: parametric
|
|
26
|
+
activations (`act_lr_factor`), flat_cos-scheduled dropout and weight decay
|
|
27
|
+
(`weight_decay_schedule`, zero decay on biases), PBLD embedding lr factor,
|
|
28
|
+
and `cat_encoding="hybrid"` (one-hot up to 9 categories, embeddings of
|
|
29
|
+
size 8 above).
|
|
30
|
+
- RealMLP insights as composable estimator options: `numeric_scaler="rssc"`,
|
|
31
|
+
`cat_encoding="onehot"`, numeric embedding zoo
|
|
32
|
+
(`num_embedding="pbld"/"plr"/"pl"/"periodic"`), learnable input scaling
|
|
33
|
+
(`num_scaling`), `lr_scheduler="coslog4"` with per-group learning-rate
|
|
34
|
+
factors, `optimizer_betas`, and regression `clip_predictions`.
|
|
35
|
+
- Objective plugin system: per-sample torch losses with a uniform
|
|
36
|
+
sample-weight contract; built-ins for regression (squared error, MAE,
|
|
37
|
+
Huber, quantile, Poisson) and classification (binary logistic, multiclass
|
|
38
|
+
softmax, both with label smoothing).
|
|
39
|
+
- Metric plugin system ported from repleafgbm (`get_metric` / `make_metric`).
|
|
40
|
+
- Built-in preprocessing: quantile/standard/robust numeric scaling, median
|
|
41
|
+
imputation, categorical index encoding with embeddings.
|
|
42
|
+
- Device support: CPU, CUDA (bf16 AMP, optional `torch.compile`), and MPS,
|
|
43
|
+
behind `device="auto"`. Verified on Colab T4 (docs/verdicts/).
|
|
44
|
+
- DANet made GPU-practical (KI-009): the grouped 1x1 conv is computed as a
|
|
45
|
+
batched einsum over the same parameters and GhostBatchNorm's training
|
|
46
|
+
path is fused — 50x on T4, 14x on CPU, bit-for-bit state_dict compatible.
|
masamlp-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Masaya Kawamata
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
masamlp-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: masamlp
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Extensible tabular deep learning: TabularResNet, DANet, and TabularLNN with first-class sample_weight, custom objectives, and custom metrics.
|
|
5
|
+
Project-URL: Homepage, https://github.com/Matapanino/masamlp
|
|
6
|
+
Project-URL: Repository, https://github.com/Matapanino/masamlp
|
|
7
|
+
Author-email: Masaya Kawamata <mkawamata038@gmail.com>
|
|
8
|
+
License: MIT
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Keywords: danet,deep-learning,machine-learning,pytorch,resnet,tabular
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Science/Research
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
16
|
+
Requires-Python: >=3.10
|
|
17
|
+
Requires-Dist: numpy>=1.23
|
|
18
|
+
Requires-Dist: pandas>=1.5
|
|
19
|
+
Requires-Dist: scikit-learn>=1.2
|
|
20
|
+
Requires-Dist: torch>=2.1
|
|
21
|
+
Provides-Extra: bench
|
|
22
|
+
Requires-Dist: lightgbm>=4.0; extra == 'bench'
|
|
23
|
+
Requires-Dist: optuna>=3; extra == 'bench'
|
|
24
|
+
Requires-Dist: pytabkit>=1.5; extra == 'bench'
|
|
25
|
+
Provides-Extra: dev
|
|
26
|
+
Requires-Dist: pytest-cov>=4; extra == 'dev'
|
|
27
|
+
Requires-Dist: pytest>=7.0; extra == 'dev'
|
|
28
|
+
Requires-Dist: ruff>=0.4; extra == 'dev'
|
|
29
|
+
Provides-Extra: docs
|
|
30
|
+
Requires-Dist: pdoc>=14; extra == 'docs'
|
|
31
|
+
Description-Content-Type: text/markdown
|
|
32
|
+
|
|
33
|
+
# masaMLP
|
|
34
|
+
|
|
35
|
+

|
|
36
|
+

|
|
37
|
+

|
|
38
|
+
|
|
39
|
+
**Extensible tabular deep learning** — TabularResNet, DANet, and TabularLNN
|
|
40
|
+
behind sklearn-compatible estimators with first-class **sample_weight**,
|
|
41
|
+
**custom objectives**, **custom metrics**, and **early stopping on any
|
|
42
|
+
metric**. The sibling library of
|
|
43
|
+
[repleafgbm](https://github.com/Matapanino/repleafgbm) (same author, same API
|
|
44
|
+
philosophy), for the neural side of tabular ML.
|
|
45
|
+
|
|
46
|
+
> **Status: alpha (0.1.x).** Built with heavy use of
|
|
47
|
+
> [Claude Code](https://claude.com/claude-code) (coding and architecture
|
|
48
|
+
> design).
|
|
49
|
+
|
|
50
|
+
## Why masaMLP
|
|
51
|
+
|
|
52
|
+
Excellent tabular DL libraries exist — [pytabkit](https://github.com/dholzmueller/pytabkit)
|
|
53
|
+
ships state-of-the-art models like RealMLP and TabM, and
|
|
54
|
+
[rtdl](https://github.com/yandex-research/rtdl) provides reference modules.
|
|
55
|
+
What they don't make easy is *extension*: `sample_weight` in `fit`, custom
|
|
56
|
+
training losses, custom evaluation metrics, and early stopping driven by
|
|
57
|
+
them. masaMLP is built around exactly those hooks:
|
|
58
|
+
|
|
59
|
+
- **`fit(X, y, sample_weight=..., eval_set=...)`** — LightGBM-style, sklearn
|
|
60
|
+
compatible. Weights flow through a single reduction
|
|
61
|
+
`(loss * w).sum() / w.sum()` that every objective shares.
|
|
62
|
+
- **Custom objectives** are per-sample torch losses — a plain function (or
|
|
63
|
+
`nn.Module` with trainable parameters). Because the trainer owns the
|
|
64
|
+
weighted reduction, your loss gets correct `sample_weight` and
|
|
65
|
+
`class_weight` handling for free.
|
|
66
|
+
- **Custom metrics** are plain NumPy callables via `make_metric`, and any of
|
|
67
|
+
them (minimize or maximize) can drive early stopping with best-epoch weight
|
|
68
|
+
restoration.
|
|
69
|
+
- **Multiclass, multioutput regression, class_weight, label smoothing**
|
|
70
|
+
supported natively; built-in preprocessing (quantile scaling, missing
|
|
71
|
+
values, categorical embeddings) so DataFrames go straight into `fit`.
|
|
72
|
+
- **CPU / CUDA / MPS** behind `device="auto"`: device-resident tensors with
|
|
73
|
+
no DataLoader overhead, automatic full-batch mode for small data, bf16 AMP
|
|
74
|
+
on CUDA, opt-in `torch.compile` with eager fallback.
|
|
75
|
+
|
|
76
|
+
masaMLP deliberately does *not* try to re-benchmark the field — see
|
|
77
|
+
[docs/attribution.md](docs/attribution.md) for the research and libraries it
|
|
78
|
+
builds on.
|
|
79
|
+
|
|
80
|
+
## Models
|
|
81
|
+
|
|
82
|
+
| name | source | notes |
|
|
83
|
+
|---|---|---|
|
|
84
|
+
| `resnet` | Gorishniy et al. 2021 (arXiv:2106.11959) | default; strong baseline |
|
|
85
|
+
| `realmlp` | Holzmüller et al. 2024 (arXiv:2407.04491) | RealMLP-TD-S architecture (scaling layer, NTP linear layers, SELU/Mish); pair with `masamlp.realmlp_params(task)` for the full training recipe |
|
|
86
|
+
| `ft_transformer` | Gorishniy et al. 2021 (arXiv:2106.11959) | feature tokens + [CLS] + PreNorm/ReGLU transformer, per the rtdl reference |
|
|
87
|
+
| `tab_transformer` | Huang et al. 2020 (arXiv:2012.06678) | transformer over categorical tokens; numerics bypass (or embed via `num_embedding`) |
|
|
88
|
+
| `danet` | Chen et al. AAAI 2022 (arXiv:2112.02962) | Abstract Layers with learnable sparse feature groups (in-house entmax15) |
|
|
89
|
+
| `tabr` | Gorishniy et al. 2023 (arXiv:2307.14338) | retrieval-augmented: nearest training rows are aggregated into each prediction |
|
|
90
|
+
| `modernnca` | Ye et al. 2024 (arXiv:2407.03257) | soft-nearest-neighbor aggregation with stochastic candidate sampling; pairs well with `num_embedding="plr-lite"` |
|
|
91
|
+
| `gandalf` | Joseph & Raj 2022 (arXiv:2207.08548) | GFLU stages: learnable sparse feature masks (t-softmax) with GRU-style gating; exposes `feature_importances()` |
|
|
92
|
+
| `grn` | GRN blocks from TFT, Lim et al. 2021 (arXiv:1912.09363) | stack of Gated Residual Networks over embedded features (masaMLP's own composition) |
|
|
93
|
+
| `lnn` | CfC cells, Hasani et al. 2022 | **experimental** liquid-network adaptation for static tabular data — see [docs/lnn.md](docs/lnn.md) |
|
|
94
|
+
|
|
95
|
+
Third-party architectures plug in with `register_model` and get the whole
|
|
96
|
+
estimator surface (weights, objectives, metrics, early stopping) for free.
|
|
97
|
+
|
|
98
|
+
### RealMLP insights are composable options
|
|
99
|
+
|
|
100
|
+
The tricks from the RealMLP paper are estimator-level options usable with
|
|
101
|
+
*any* model (`lnn` included), not baked into one architecture:
|
|
102
|
+
|
|
103
|
+
- `numeric_scaler="rssc"` — robust scale + smooth clip preprocessing
|
|
104
|
+
- `cat_encoding="onehot"` — RealMLP-style one-hot (binary → ±1, missing → 0)
|
|
105
|
+
- `num_embedding="pbld" | "plr" | "plr-lite" | "pl" | "periodic"` — the
|
|
106
|
+
numeric embedding zoo (arXiv:2203.05556 + PBLD); token models
|
|
107
|
+
(`ft_transformer`, `tab_transformer`) use the same options as feature
|
|
108
|
+
tokenizers
|
|
109
|
+
- `model_params={"num_scaling": True}` — learnable per-feature input scale
|
|
110
|
+
- `lr_scheduler="coslog4"`, `optimizer_betas=(0.9, 0.95)` — the training
|
|
111
|
+
schedule
|
|
112
|
+
- `clip_predictions=True` (regressor) — clip to the observed target range
|
|
113
|
+
- `n_ens=k` — seed ensembling as in pytabkit's RealMLP: k members trained
|
|
114
|
+
with seeds `random_state + i`, predictions averaged on the probability /
|
|
115
|
+
value scale; works with every model including the retrieval ones.
|
|
116
|
+
`ens_mode="vectorized"` trains all members in one vmapped forward/backward
|
|
117
|
+
(`torch.func`) for BatchNorm-free models — pytabkit's speed trick
|
|
118
|
+
- `weight_decay_schedule="flat_cos"` — RealMLP-TD's scheduled weight decay
|
|
119
|
+
(param groups can opt out, e.g. biases)
|
|
120
|
+
- `masamlp.realmlp_td_params(task)` — the **full RealMLP-TD recipe**:
|
|
121
|
+
parametric activations, flat_cos-scheduled dropout and weight decay, PBLD
|
|
122
|
+
embeddings with their own lr factor, and hybrid categorical encoding
|
|
123
|
+
(one-hot ≤ 9 categories, embeddings above)
|
|
124
|
+
|
|
125
|
+
```python
|
|
126
|
+
from masamlp import MasaClassifier, realmlp_params
|
|
127
|
+
|
|
128
|
+
clf = MasaClassifier(**realmlp_params("classification")) # the TD-S recipe
|
|
129
|
+
clf = MasaClassifier(**{**realmlp_params("classification"),
|
|
130
|
+
"num_embedding": "pbld"}) # toward RealMLP-TD
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
## Install
|
|
134
|
+
|
|
135
|
+
```bash
|
|
136
|
+
pip install masamlp # torch, numpy, pandas, scikit-learn
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
## Quickstart
|
|
140
|
+
|
|
141
|
+
```python
|
|
142
|
+
import numpy as np
|
|
143
|
+
from masamlp import MasaClassifier, make_metric
|
|
144
|
+
|
|
145
|
+
def f1(y_true, y_proba):
|
|
146
|
+
pred = y_proba >= 0.5
|
|
147
|
+
tp = np.sum(pred & (y_true == 1))
|
|
148
|
+
return 2 * tp / max(pred.sum() + (y_true == 1).sum(), 1)
|
|
149
|
+
|
|
150
|
+
clf = MasaClassifier(
|
|
151
|
+
model="resnet",
|
|
152
|
+
eval_metric=make_metric(f1, name="f1", minimize=False),
|
|
153
|
+
early_stopping_rounds=15,
|
|
154
|
+
class_weight="balanced",
|
|
155
|
+
)
|
|
156
|
+
clf.fit(X_train, y_train, sample_weight=w_train, eval_set=[(X_val, y_val)])
|
|
157
|
+
proba = clf.predict_proba(X_test)
|
|
158
|
+
print(clf.best_iteration_, clf.best_score_, clf.evals_result_["valid_0"]["f1"][:3])
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
Custom objective (regression, asymmetric loss):
|
|
162
|
+
|
|
163
|
+
```python
|
|
164
|
+
import torch
|
|
165
|
+
from masamlp import MasaRegressor
|
|
166
|
+
|
|
167
|
+
def asymmetric_mse(y_true, raw_pred): # -> per-sample (n,) tensor
|
|
168
|
+
err = raw_pred - y_true # raw_pred: (n, out_dim)
|
|
169
|
+
return torch.where(err < 0, 4.0 * err**2, err**2).mean(dim=1)
|
|
170
|
+
|
|
171
|
+
reg = MasaRegressor(model="danet", objective=asymmetric_mse)
|
|
172
|
+
reg.fit(X, y, sample_weight=w) # weights just work
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
Save/load is a plain directory (`manifest.json` + tensors, loaded with
|
|
176
|
+
`weights_only=True` — no pickle execution):
|
|
177
|
+
|
|
178
|
+
```python
|
|
179
|
+
reg.save_model("model_dir")
|
|
180
|
+
reg2 = MasaRegressor.load_model("model_dir")
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
## Devices
|
|
184
|
+
|
|
185
|
+
`device="auto"` resolves cuda > mps > cpu. CUDA gets bf16 AMP by default and
|
|
186
|
+
optional `compile=True`; MPS and CPU train in float32. Details and caveats:
|
|
187
|
+
[docs/devices.md](docs/devices.md).
|
|
188
|
+
|
|
189
|
+
## Development
|
|
190
|
+
|
|
191
|
+
```bash
|
|
192
|
+
pip install -e ".[dev]"
|
|
193
|
+
bash scripts/check.sh # ruff + pytest + examples/quickstart.py
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
Development rules live in [CLAUDE.md](CLAUDE.md); roadmap in
|
|
197
|
+
[docs/roadmap.md](docs/roadmap.md).
|
|
198
|
+
|
|
199
|
+
## License
|
|
200
|
+
|
|
201
|
+
MIT. Architecture attributions: [docs/attribution.md](docs/attribution.md).
|
masamlp-0.1.0/README.md
ADDED
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
# masaMLP
|
|
2
|
+
|
|
3
|
+

|
|
4
|
+

|
|
5
|
+

|
|
6
|
+
|
|
7
|
+
**Extensible tabular deep learning** — TabularResNet, DANet, and TabularLNN
|
|
8
|
+
behind sklearn-compatible estimators with first-class **sample_weight**,
|
|
9
|
+
**custom objectives**, **custom metrics**, and **early stopping on any
|
|
10
|
+
metric**. The sibling library of
|
|
11
|
+
[repleafgbm](https://github.com/Matapanino/repleafgbm) (same author, same API
|
|
12
|
+
philosophy), for the neural side of tabular ML.
|
|
13
|
+
|
|
14
|
+
> **Status: alpha (0.1.x).** Built with heavy use of
|
|
15
|
+
> [Claude Code](https://claude.com/claude-code) (coding and architecture
|
|
16
|
+
> design).
|
|
17
|
+
|
|
18
|
+
## Why masaMLP
|
|
19
|
+
|
|
20
|
+
Excellent tabular DL libraries exist — [pytabkit](https://github.com/dholzmueller/pytabkit)
|
|
21
|
+
ships state-of-the-art models like RealMLP and TabM, and
|
|
22
|
+
[rtdl](https://github.com/yandex-research/rtdl) provides reference modules.
|
|
23
|
+
What they don't make easy is *extension*: `sample_weight` in `fit`, custom
|
|
24
|
+
training losses, custom evaluation metrics, and early stopping driven by
|
|
25
|
+
them. masaMLP is built around exactly those hooks:
|
|
26
|
+
|
|
27
|
+
- **`fit(X, y, sample_weight=..., eval_set=...)`** — LightGBM-style, sklearn
|
|
28
|
+
compatible. Weights flow through a single reduction
|
|
29
|
+
`(loss * w).sum() / w.sum()` that every objective shares.
|
|
30
|
+
- **Custom objectives** are per-sample torch losses — a plain function (or
|
|
31
|
+
`nn.Module` with trainable parameters). Because the trainer owns the
|
|
32
|
+
weighted reduction, your loss gets correct `sample_weight` and
|
|
33
|
+
`class_weight` handling for free.
|
|
34
|
+
- **Custom metrics** are plain NumPy callables via `make_metric`, and any of
|
|
35
|
+
them (minimize or maximize) can drive early stopping with best-epoch weight
|
|
36
|
+
restoration.
|
|
37
|
+
- **Multiclass, multioutput regression, class_weight, label smoothing**
|
|
38
|
+
supported natively; built-in preprocessing (quantile scaling, missing
|
|
39
|
+
values, categorical embeddings) so DataFrames go straight into `fit`.
|
|
40
|
+
- **CPU / CUDA / MPS** behind `device="auto"`: device-resident tensors with
|
|
41
|
+
no DataLoader overhead, automatic full-batch mode for small data, bf16 AMP
|
|
42
|
+
on CUDA, opt-in `torch.compile` with eager fallback.
|
|
43
|
+
|
|
44
|
+
masaMLP deliberately does *not* try to re-benchmark the field — see
|
|
45
|
+
[docs/attribution.md](docs/attribution.md) for the research and libraries it
|
|
46
|
+
builds on.
|
|
47
|
+
|
|
48
|
+
## Models
|
|
49
|
+
|
|
50
|
+
| name | source | notes |
|
|
51
|
+
|---|---|---|
|
|
52
|
+
| `resnet` | Gorishniy et al. 2021 (arXiv:2106.11959) | default; strong baseline |
|
|
53
|
+
| `realmlp` | Holzmüller et al. 2024 (arXiv:2407.04491) | RealMLP-TD-S architecture (scaling layer, NTP linear layers, SELU/Mish); pair with `masamlp.realmlp_params(task)` for the full training recipe |
|
|
54
|
+
| `ft_transformer` | Gorishniy et al. 2021 (arXiv:2106.11959) | feature tokens + [CLS] + PreNorm/ReGLU transformer, per the rtdl reference |
|
|
55
|
+
| `tab_transformer` | Huang et al. 2020 (arXiv:2012.06678) | transformer over categorical tokens; numerics bypass (or embed via `num_embedding`) |
|
|
56
|
+
| `danet` | Chen et al. AAAI 2022 (arXiv:2112.02962) | Abstract Layers with learnable sparse feature groups (in-house entmax15) |
|
|
57
|
+
| `tabr` | Gorishniy et al. 2023 (arXiv:2307.14338) | retrieval-augmented: nearest training rows are aggregated into each prediction |
|
|
58
|
+
| `modernnca` | Ye et al. 2024 (arXiv:2407.03257) | soft-nearest-neighbor aggregation with stochastic candidate sampling; pairs well with `num_embedding="plr-lite"` |
|
|
59
|
+
| `gandalf` | Joseph & Raj 2022 (arXiv:2207.08548) | GFLU stages: learnable sparse feature masks (t-softmax) with GRU-style gating; exposes `feature_importances()` |
|
|
60
|
+
| `grn` | GRN blocks from TFT, Lim et al. 2021 (arXiv:1912.09363) | stack of Gated Residual Networks over embedded features (masaMLP's own composition) |
|
|
61
|
+
| `lnn` | CfC cells, Hasani et al. 2022 | **experimental** liquid-network adaptation for static tabular data — see [docs/lnn.md](docs/lnn.md) |
|
|
62
|
+
|
|
63
|
+
Third-party architectures plug in with `register_model` and get the whole
|
|
64
|
+
estimator surface (weights, objectives, metrics, early stopping) for free.
|
|
65
|
+
|
|
66
|
+
### RealMLP insights are composable options
|
|
67
|
+
|
|
68
|
+
The tricks from the RealMLP paper are estimator-level options usable with
|
|
69
|
+
*any* model (`lnn` included), not baked into one architecture:
|
|
70
|
+
|
|
71
|
+
- `numeric_scaler="rssc"` — robust scale + smooth clip preprocessing
|
|
72
|
+
- `cat_encoding="onehot"` — RealMLP-style one-hot (binary → ±1, missing → 0)
|
|
73
|
+
- `num_embedding="pbld" | "plr" | "plr-lite" | "pl" | "periodic"` — the
|
|
74
|
+
numeric embedding zoo (arXiv:2203.05556 + PBLD); token models
|
|
75
|
+
(`ft_transformer`, `tab_transformer`) use the same options as feature
|
|
76
|
+
tokenizers
|
|
77
|
+
- `model_params={"num_scaling": True}` — learnable per-feature input scale
|
|
78
|
+
- `lr_scheduler="coslog4"`, `optimizer_betas=(0.9, 0.95)` — the training
|
|
79
|
+
schedule
|
|
80
|
+
- `clip_predictions=True` (regressor) — clip to the observed target range
|
|
81
|
+
- `n_ens=k` — seed ensembling as in pytabkit's RealMLP: k members trained
|
|
82
|
+
with seeds `random_state + i`, predictions averaged on the probability /
|
|
83
|
+
value scale; works with every model including the retrieval ones.
|
|
84
|
+
`ens_mode="vectorized"` trains all members in one vmapped forward/backward
|
|
85
|
+
(`torch.func`) for BatchNorm-free models — pytabkit's speed trick
|
|
86
|
+
- `weight_decay_schedule="flat_cos"` — RealMLP-TD's scheduled weight decay
|
|
87
|
+
(param groups can opt out, e.g. biases)
|
|
88
|
+
- `masamlp.realmlp_td_params(task)` — the **full RealMLP-TD recipe**:
|
|
89
|
+
parametric activations, flat_cos-scheduled dropout and weight decay, PBLD
|
|
90
|
+
embeddings with their own lr factor, and hybrid categorical encoding
|
|
91
|
+
(one-hot ≤ 9 categories, embeddings above)
|
|
92
|
+
|
|
93
|
+
```python
|
|
94
|
+
from masamlp import MasaClassifier, realmlp_params
|
|
95
|
+
|
|
96
|
+
clf = MasaClassifier(**realmlp_params("classification")) # the TD-S recipe
|
|
97
|
+
clf = MasaClassifier(**{**realmlp_params("classification"),
|
|
98
|
+
"num_embedding": "pbld"}) # toward RealMLP-TD
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
## Install
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
pip install masamlp # torch, numpy, pandas, scikit-learn
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
## Quickstart
|
|
108
|
+
|
|
109
|
+
```python
|
|
110
|
+
import numpy as np
|
|
111
|
+
from masamlp import MasaClassifier, make_metric
|
|
112
|
+
|
|
113
|
+
def f1(y_true, y_proba):
|
|
114
|
+
pred = y_proba >= 0.5
|
|
115
|
+
tp = np.sum(pred & (y_true == 1))
|
|
116
|
+
return 2 * tp / max(pred.sum() + (y_true == 1).sum(), 1)
|
|
117
|
+
|
|
118
|
+
clf = MasaClassifier(
|
|
119
|
+
model="resnet",
|
|
120
|
+
eval_metric=make_metric(f1, name="f1", minimize=False),
|
|
121
|
+
early_stopping_rounds=15,
|
|
122
|
+
class_weight="balanced",
|
|
123
|
+
)
|
|
124
|
+
clf.fit(X_train, y_train, sample_weight=w_train, eval_set=[(X_val, y_val)])
|
|
125
|
+
proba = clf.predict_proba(X_test)
|
|
126
|
+
print(clf.best_iteration_, clf.best_score_, clf.evals_result_["valid_0"]["f1"][:3])
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
Custom objective (regression, asymmetric loss):
|
|
130
|
+
|
|
131
|
+
```python
|
|
132
|
+
import torch
|
|
133
|
+
from masamlp import MasaRegressor
|
|
134
|
+
|
|
135
|
+
def asymmetric_mse(y_true, raw_pred): # -> per-sample (n,) tensor
|
|
136
|
+
err = raw_pred - y_true # raw_pred: (n, out_dim)
|
|
137
|
+
return torch.where(err < 0, 4.0 * err**2, err**2).mean(dim=1)
|
|
138
|
+
|
|
139
|
+
reg = MasaRegressor(model="danet", objective=asymmetric_mse)
|
|
140
|
+
reg.fit(X, y, sample_weight=w) # weights just work
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
Save/load is a plain directory (`manifest.json` + tensors, loaded with
|
|
144
|
+
`weights_only=True` — no pickle execution):
|
|
145
|
+
|
|
146
|
+
```python
|
|
147
|
+
reg.save_model("model_dir")
|
|
148
|
+
reg2 = MasaRegressor.load_model("model_dir")
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
## Devices
|
|
152
|
+
|
|
153
|
+
`device="auto"` resolves cuda > mps > cpu. CUDA gets bf16 AMP by default and
|
|
154
|
+
optional `compile=True`; MPS and CPU train in float32. Details and caveats:
|
|
155
|
+
[docs/devices.md](docs/devices.md).
|
|
156
|
+
|
|
157
|
+
## Development
|
|
158
|
+
|
|
159
|
+
```bash
|
|
160
|
+
pip install -e ".[dev]"
|
|
161
|
+
bash scripts/check.sh # ruff + pytest + examples/quickstart.py
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
Development rules live in [CLAUDE.md](CLAUDE.md); roadmap in
|
|
165
|
+
[docs/roadmap.md](docs/roadmap.md).
|
|
166
|
+
|
|
167
|
+
## License
|
|
168
|
+
|
|
169
|
+
MIT. Architecture attributions: [docs/attribution.md](docs/attribution.md).
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# Benchmarks
|
|
2
|
+
|
|
3
|
+
Not shipped with the package; the library (`src/`) never imports anything
|
|
4
|
+
here.
|
|
5
|
+
|
|
6
|
+
- `parity_realmlp.py` — the honesty check: masamlp's RealMLP-TD-S recipe vs
|
|
7
|
+
the author's standalone reference implementation (vendored under
|
|
8
|
+
`vendor/`, MIT) on california housing and adult, with sklearn's
|
|
9
|
+
HistGradientBoosting as an anchor. Expected outcome: comparable metrics
|
|
10
|
+
(same recipe, different shuffling details), not bitwise equality.
|
|
11
|
+
- `model_zoo.py` — every registered model on the same two datasets with its
|
|
12
|
+
recommended knobs. Single seed, capped epochs, subsampled rows, no HPO:
|
|
13
|
+
a smoke-level leaderboard, not a paper-grade ranking.
|
|
14
|
+
|
|
15
|
+
Run from the repo root:
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
PYTHONPATH=src python3 benchmarks/parity_realmlp.py
|
|
19
|
+
PYTHONPATH=src python3 benchmarks/model_zoo.py
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
If OpenML downloads fail with SSL errors (framework Python on macOS):
|
|
23
|
+
`export SSL_CERT_FILE=$(python -m certifi)`.
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "masamlp"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Extensible tabular deep learning: TabularResNet, DANet, and TabularLNN with first-class sample_weight, custom objectives, and custom metrics."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
authors = [{ name = "Masaya Kawamata", email = "mkawamata038@gmail.com" }]
|
|
13
|
+
keywords = ["tabular", "deep-learning", "pytorch", "machine-learning", "resnet", "danet"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 3 - Alpha",
|
|
16
|
+
"Intended Audience :: Science/Research",
|
|
17
|
+
"License :: OSI Approved :: MIT License",
|
|
18
|
+
"Programming Language :: Python :: 3",
|
|
19
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
20
|
+
]
|
|
21
|
+
dependencies = [
|
|
22
|
+
"torch>=2.1",
|
|
23
|
+
"numpy>=1.23",
|
|
24
|
+
"pandas>=1.5",
|
|
25
|
+
"scikit-learn>=1.2",
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
[project.optional-dependencies]
|
|
29
|
+
# Benchmark tooling only; the library (src/) never imports these.
|
|
30
|
+
bench = ["lightgbm>=4.0", "pytabkit>=1.5", "optuna>=3"]
|
|
31
|
+
# API reference generation (scripts/build_docs.sh).
|
|
32
|
+
docs = ["pdoc>=14"]
|
|
33
|
+
dev = ["pytest>=7.0", "pytest-cov>=4", "ruff>=0.4"]
|
|
34
|
+
|
|
35
|
+
[project.urls]
|
|
36
|
+
Homepage = "https://github.com/Matapanino/masamlp"
|
|
37
|
+
Repository = "https://github.com/Matapanino/masamlp"
|
|
38
|
+
|
|
39
|
+
[tool.hatch.build.targets.wheel]
|
|
40
|
+
packages = ["src/masamlp"]
|
|
41
|
+
# Ship the PEP 561 marker so type checkers honour the inline annotations.
|
|
42
|
+
force-include = { "src/masamlp/py.typed" = "masamlp/py.typed" }
|
|
43
|
+
|
|
44
|
+
[tool.hatch.build.targets.sdist]
|
|
45
|
+
include = ["src/masamlp", "tests", "pyproject.toml", "README.md", "LICENSE", "CHANGELOG.md"]
|
|
46
|
+
|
|
47
|
+
[tool.ruff]
|
|
48
|
+
line-length = 100
|
|
49
|
+
src = ["src", "tests"]
|
|
50
|
+
|
|
51
|
+
[tool.ruff.lint]
|
|
52
|
+
select = ["E", "F", "W", "I", "UP", "B"]
|
|
53
|
+
|
|
54
|
+
[tool.pytest.ini_options]
|
|
55
|
+
testpaths = ["tests"]
|
|
56
|
+
addopts = "-q"
|
|
57
|
+
filterwarnings = [
|
|
58
|
+
"ignore:.*torch.compile.*:UserWarning",
|
|
59
|
+
]
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
"""masaMLP: extensible tabular deep learning.
|
|
2
|
+
|
|
3
|
+
TabularResNet, DANet, and TabularLNN behind sklearn-compatible estimators
|
|
4
|
+
with first-class sample_weight, custom objectives, custom metrics, and early
|
|
5
|
+
stopping on any metric — the sibling library of repleafgbm.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from masamlp.classifier import MasaClassifier
|
|
9
|
+
from masamlp.core.metrics import BaseMetric, get_metric, make_metric
|
|
10
|
+
from masamlp.core.objectives import (
|
|
11
|
+
BaseObjective,
|
|
12
|
+
BinaryLogistic,
|
|
13
|
+
Huber,
|
|
14
|
+
MulticlassSoftmax,
|
|
15
|
+
PoissonRegression,
|
|
16
|
+
Quantile,
|
|
17
|
+
get_objective,
|
|
18
|
+
make_objective,
|
|
19
|
+
)
|
|
20
|
+
from masamlp.models import register_model
|
|
21
|
+
from masamlp.presets import realmlp_params, realmlp_td_params
|
|
22
|
+
from masamlp.regressor import MasaRegressor
|
|
23
|
+
|
|
24
|
+
__version__ = "0.1.0"
|
|
25
|
+
|
|
26
|
+
__all__ = [
|
|
27
|
+
"MasaRegressor",
|
|
28
|
+
"MasaClassifier",
|
|
29
|
+
"BaseMetric",
|
|
30
|
+
"get_metric",
|
|
31
|
+
"make_metric",
|
|
32
|
+
"BaseObjective",
|
|
33
|
+
"get_objective",
|
|
34
|
+
"make_objective",
|
|
35
|
+
"Huber",
|
|
36
|
+
"Quantile",
|
|
37
|
+
"PoissonRegression",
|
|
38
|
+
"BinaryLogistic",
|
|
39
|
+
"MulticlassSoftmax",
|
|
40
|
+
"register_model",
|
|
41
|
+
"realmlp_params",
|
|
42
|
+
"realmlp_td_params",
|
|
43
|
+
"__version__",
|
|
44
|
+
]
|