masamlp 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. masamlp-0.1.0/.gitignore +12 -0
  2. masamlp-0.1.0/CHANGELOG.md +46 -0
  3. masamlp-0.1.0/LICENSE +21 -0
  4. masamlp-0.1.0/PKG-INFO +201 -0
  5. masamlp-0.1.0/README.md +169 -0
  6. masamlp-0.1.0/benchmarks/README.md +23 -0
  7. masamlp-0.1.0/pyproject.toml +59 -0
  8. masamlp-0.1.0/src/masamlp/__init__.py +44 -0
  9. masamlp-0.1.0/src/masamlp/classifier.py +168 -0
  10. masamlp-0.1.0/src/masamlp/core/__init__.py +14 -0
  11. masamlp-0.1.0/src/masamlp/core/device.py +86 -0
  12. masamlp-0.1.0/src/masamlp/core/ensemble.py +272 -0
  13. masamlp-0.1.0/src/masamlp/core/metrics.py +176 -0
  14. masamlp-0.1.0/src/masamlp/core/objectives.py +330 -0
  15. masamlp-0.1.0/src/masamlp/core/serialization.py +179 -0
  16. masamlp-0.1.0/src/masamlp/core/trainer.py +379 -0
  17. masamlp-0.1.0/src/masamlp/data/__init__.py +4 -0
  18. masamlp-0.1.0/src/masamlp/data/dataset.py +44 -0
  19. masamlp-0.1.0/src/masamlp/data/preprocessing.py +320 -0
  20. masamlp-0.1.0/src/masamlp/models/__init__.py +128 -0
  21. masamlp-0.1.0/src/masamlp/models/base.py +299 -0
  22. masamlp-0.1.0/src/masamlp/models/danet.py +113 -0
  23. masamlp-0.1.0/src/masamlp/models/ft_transformer.py +117 -0
  24. masamlp-0.1.0/src/masamlp/models/gandalf.py +126 -0
  25. masamlp-0.1.0/src/masamlp/models/grn.py +57 -0
  26. masamlp-0.1.0/src/masamlp/models/layers.py +151 -0
  27. masamlp-0.1.0/src/masamlp/models/lnn.py +73 -0
  28. masamlp-0.1.0/src/masamlp/models/modernnca.py +143 -0
  29. masamlp-0.1.0/src/masamlp/models/realmlp.py +165 -0
  30. masamlp-0.1.0/src/masamlp/models/resnet.py +57 -0
  31. masamlp-0.1.0/src/masamlp/models/tab_transformer.py +90 -0
  32. masamlp-0.1.0/src/masamlp/models/tabr.py +180 -0
  33. masamlp-0.1.0/src/masamlp/presets.py +93 -0
  34. masamlp-0.1.0/src/masamlp/py.typed +0 -0
  35. masamlp-0.1.0/src/masamlp/regressor.py +158 -0
  36. masamlp-0.1.0/src/masamlp/sklearn.py +361 -0
  37. masamlp-0.1.0/src/masamlp/utils/__init__.py +4 -0
  38. masamlp-0.1.0/src/masamlp/utils/random.py +17 -0
  39. masamlp-0.1.0/src/masamlp/utils/validation.py +46 -0
  40. masamlp-0.1.0/tests/conftest.py +82 -0
  41. masamlp-0.1.0/tests/test_custom_metric.py +55 -0
  42. masamlp-0.1.0/tests/test_custom_objective.py +112 -0
  43. masamlp-0.1.0/tests/test_device.py +80 -0
  44. masamlp-0.1.0/tests/test_embeddings.py +64 -0
  45. masamlp-0.1.0/tests/test_ensemble.py +141 -0
  46. masamlp-0.1.0/tests/test_estimators.py +137 -0
  47. masamlp-0.1.0/tests/test_gandalf_grn.py +108 -0
  48. masamlp-0.1.0/tests/test_ghost_batchnorm.py +84 -0
  49. masamlp-0.1.0/tests/test_metrics.py +64 -0
  50. masamlp-0.1.0/tests/test_models.py +133 -0
  51. masamlp-0.1.0/tests/test_modernnca.py +84 -0
  52. masamlp-0.1.0/tests/test_objectives.py +80 -0
  53. masamlp-0.1.0/tests/test_preprocessing.py +135 -0
  54. masamlp-0.1.0/tests/test_realmlp.py +173 -0
  55. masamlp-0.1.0/tests/test_sample_weight.py +87 -0
  56. masamlp-0.1.0/tests/test_serialization.py +61 -0
  57. masamlp-0.1.0/tests/test_sklearn_compat.py +57 -0
  58. masamlp-0.1.0/tests/test_tabr.py +63 -0
  59. masamlp-0.1.0/tests/test_trainer.py +89 -0
  60. masamlp-0.1.0/tests/test_transformers.py +74 -0
@@ -0,0 +1,12 @@
1
+ .claude/
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.egg-info/
5
+ dist/
6
+ build/
7
+ .venv/
8
+ .pytest_cache/
9
+ .ruff_cache/
10
+ .coverage
11
+ htmlcov/
12
+ site/
@@ -0,0 +1,46 @@
1
+ # Changelog
2
+
3
+ ## 0.1.0 (2026-07-02)
4
+
5
+ Initial release.
6
+
7
+ - `MasaRegressor` / `MasaClassifier` sklearn-compatible estimators with
8
+ `fit(X, y, sample_weight=..., eval_set=...)`, early stopping on any metric,
9
+ and directory-format save/load.
10
+ - Models: `resnet` and `ft_transformer` (Gorishniy et al. 2021), `realmlp`
11
+ (Holzmüller et al. 2024, TD-S architecture with the full training recipe
12
+ in `masamlp.realmlp_params`), `tab_transformer` (Huang et al. 2020),
13
+ `danet` (Chen et al. AAAI 2022), `tabr` (retrieval-augmented, Gorishniy
14
+ et al. 2023), `modernnca` (Ye et al. 2024, soft-nearest-neighbor),
15
+ `gandalf` (Joseph & Raj 2022, GFLU with t-softmax feature masks),
16
+ `grn` (stacked TFT Gated Residual Networks), and `lnn` (experimental
17
+ CfC-based liquid network for static tabular data), plus a
18
+ `register_model` hook for custom architectures (token-based models via
19
+ `embedding_kind = "tokens"`).
20
+ - `n_ens` seed ensembling on both estimators (pytabkit semantics: members
21
+ seeded `random_state + i`, predictions averaged on the transformed scale);
22
+ save/load stores all members. `ens_mode="vectorized"` trains all members
23
+ in one vmapped forward/backward (torch.func) for BatchNorm-free models,
24
+ with per-member best-epoch tracking.
25
+ - Full RealMLP-TD recipe via `masamlp.realmlp_td_params(task)`: parametric
26
+ activations (`act_lr_factor`), flat_cos-scheduled dropout and weight decay
27
+ (`weight_decay_schedule`, zero decay on biases), PBLD embedding lr factor,
28
+ and `cat_encoding="hybrid"` (one-hot up to 9 categories, embeddings of
29
+ size 8 above).
30
+ - RealMLP insights as composable estimator options: `numeric_scaler="rssc"`,
31
+ `cat_encoding="onehot"`, numeric embedding zoo
32
+ (`num_embedding="pbld"/"plr"/"pl"/"periodic"`), learnable input scaling
33
+ (`num_scaling`), `lr_scheduler="coslog4"` with per-group learning-rate
34
+ factors, `optimizer_betas`, and regression `clip_predictions`.
35
+ - Objective plugin system: per-sample torch losses with a uniform
36
+ sample-weight contract; built-ins for regression (squared error, MAE,
37
+ Huber, quantile, Poisson) and classification (binary logistic, multiclass
38
+ softmax, both with label smoothing).
39
+ - Metric plugin system ported from repleafgbm (`get_metric` / `make_metric`).
40
+ - Built-in preprocessing: quantile/standard/robust numeric scaling, median
41
+ imputation, categorical index encoding with embeddings.
42
+ - Device support: CPU, CUDA (bf16 AMP, optional `torch.compile`), and MPS,
43
+ behind `device="auto"`. Verified on Colab T4 (docs/verdicts/).
44
+ - DANet made GPU-practical (KI-009): the grouped 1x1 conv is computed as a
45
+ batched einsum over the same parameters and GhostBatchNorm's training
46
+ path is fused — 50x on T4, 14x on CPU, bit-for-bit state_dict compatible.
masamlp-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Masaya Kawamata
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
masamlp-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,201 @@
1
+ Metadata-Version: 2.4
2
+ Name: masamlp
3
+ Version: 0.1.0
4
+ Summary: Extensible tabular deep learning: TabularResNet, DANet, and TabularLNN with first-class sample_weight, custom objectives, and custom metrics.
5
+ Project-URL: Homepage, https://github.com/Matapanino/masamlp
6
+ Project-URL: Repository, https://github.com/Matapanino/masamlp
7
+ Author-email: Masaya Kawamata <mkawamata038@gmail.com>
8
+ License: MIT
9
+ License-File: LICENSE
10
+ Keywords: danet,deep-learning,machine-learning,pytorch,resnet,tabular
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Science/Research
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
16
+ Requires-Python: >=3.10
17
+ Requires-Dist: numpy>=1.23
18
+ Requires-Dist: pandas>=1.5
19
+ Requires-Dist: scikit-learn>=1.2
20
+ Requires-Dist: torch>=2.1
21
+ Provides-Extra: bench
22
+ Requires-Dist: lightgbm>=4.0; extra == 'bench'
23
+ Requires-Dist: optuna>=3; extra == 'bench'
24
+ Requires-Dist: pytabkit>=1.5; extra == 'bench'
25
+ Provides-Extra: dev
26
+ Requires-Dist: pytest-cov>=4; extra == 'dev'
27
+ Requires-Dist: pytest>=7.0; extra == 'dev'
28
+ Requires-Dist: ruff>=0.4; extra == 'dev'
29
+ Provides-Extra: docs
30
+ Requires-Dist: pdoc>=14; extra == 'docs'
31
+ Description-Content-Type: text/markdown
32
+
33
+ # masaMLP
34
+
35
+ ![CI](https://github.com/Matapanino/masamlp/actions/workflows/ci.yml/badge.svg)
36
+ ![PyPI](https://img.shields.io/pypi/v/masamlp.svg)
37
+ ![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)
38
+
39
+ **Extensible tabular deep learning** — TabularResNet, DANet, and TabularLNN
40
+ behind sklearn-compatible estimators with first-class **sample_weight**,
41
+ **custom objectives**, **custom metrics**, and **early stopping on any
42
+ metric**. The sibling library of
43
+ [repleafgbm](https://github.com/Matapanino/repleafgbm) (same author, same API
44
+ philosophy), for the neural side of tabular ML.
45
+
46
+ > **Status: alpha (0.1.x).** Built with heavy use of
47
+ > [Claude Code](https://claude.com/claude-code) (coding and architecture
48
+ > design).
49
+
50
+ ## Why masaMLP
51
+
52
+ Excellent tabular DL libraries exist — [pytabkit](https://github.com/dholzmueller/pytabkit)
53
+ ships state-of-the-art models like RealMLP and TabM, and
54
+ [rtdl](https://github.com/yandex-research/rtdl) provides reference modules.
55
+ What they don't make easy is *extension*: `sample_weight` in `fit`, custom
56
+ training losses, custom evaluation metrics, and early stopping driven by
57
+ them. masaMLP is built around exactly those hooks:
58
+
59
+ - **`fit(X, y, sample_weight=..., eval_set=...)`** — LightGBM-style, sklearn
60
+ compatible. Weights flow through a single reduction
61
+ `(loss * w).sum() / w.sum()` that every objective shares.
62
+ - **Custom objectives** are per-sample torch losses — a plain function (or
63
+ `nn.Module` with trainable parameters). Because the trainer owns the
64
+ weighted reduction, your loss gets correct `sample_weight` and
65
+ `class_weight` handling for free.
66
+ - **Custom metrics** are plain NumPy callables via `make_metric`, and any of
67
+ them (minimize or maximize) can drive early stopping with best-epoch weight
68
+ restoration.
69
+ - **Multiclass, multioutput regression, class_weight, label smoothing**
70
+ supported natively; built-in preprocessing (quantile scaling, missing
71
+ values, categorical embeddings) so DataFrames go straight into `fit`.
72
+ - **CPU / CUDA / MPS** behind `device="auto"`: device-resident tensors with
73
+ no DataLoader overhead, automatic full-batch mode for small data, bf16 AMP
74
+ on CUDA, opt-in `torch.compile` with eager fallback.
75
+
76
+ masaMLP deliberately does *not* try to re-benchmark the field — see
77
+ [docs/attribution.md](docs/attribution.md) for the research and libraries it
78
+ builds on.
79
+
80
+ ## Models
81
+
82
+ | name | source | notes |
83
+ |---|---|---|
84
+ | `resnet` | Gorishniy et al. 2021 (arXiv:2106.11959) | default; strong baseline |
85
+ | `realmlp` | Holzmüller et al. 2024 (arXiv:2407.04491) | RealMLP-TD-S architecture (scaling layer, NTP linear layers, SELU/Mish); pair with `masamlp.realmlp_params(task)` for the full training recipe |
86
+ | `ft_transformer` | Gorishniy et al. 2021 (arXiv:2106.11959) | feature tokens + [CLS] + PreNorm/ReGLU transformer, per the rtdl reference |
87
+ | `tab_transformer` | Huang et al. 2020 (arXiv:2012.06678) | transformer over categorical tokens; numerics bypass (or embed via `num_embedding`) |
88
+ | `danet` | Chen et al. AAAI 2022 (arXiv:2112.02962) | Abstract Layers with learnable sparse feature groups (in-house entmax15) |
89
+ | `tabr` | Gorishniy et al. 2023 (arXiv:2307.14338) | retrieval-augmented: nearest training rows are aggregated into each prediction |
90
+ | `modernnca` | Ye et al. 2024 (arXiv:2407.03257) | soft-nearest-neighbor aggregation with stochastic candidate sampling; pairs well with `num_embedding="plr-lite"` |
91
+ | `gandalf` | Joseph & Raj 2022 (arXiv:2207.08548) | GFLU stages: learnable sparse feature masks (t-softmax) with GRU-style gating; exposes `feature_importances()` |
92
+ | `grn` | GRN blocks from TFT, Lim et al. 2021 (arXiv:1912.09363) | stack of Gated Residual Networks over embedded features (masaMLP's own composition) |
93
+ | `lnn` | CfC cells, Hasani et al. 2022 | **experimental** liquid-network adaptation for static tabular data — see [docs/lnn.md](docs/lnn.md) |
94
+
95
+ Third-party architectures plug in with `register_model` and get the whole
96
+ estimator surface (weights, objectives, metrics, early stopping) for free.
97
+
98
+ ### RealMLP insights are composable options
99
+
100
+ The tricks from the RealMLP paper are estimator-level options usable with
101
+ *any* model (`lnn` included), not baked into one architecture:
102
+
103
+ - `numeric_scaler="rssc"` — robust scale + smooth clip preprocessing
104
+ - `cat_encoding="onehot"` — RealMLP-style one-hot (binary → ±1, missing → 0)
105
+ - `num_embedding="pbld" | "plr" | "plr-lite" | "pl" | "periodic"` — the
106
+ numeric embedding zoo (arXiv:2203.05556 + PBLD); token models
107
+ (`ft_transformer`, `tab_transformer`) use the same options as feature
108
+ tokenizers
109
+ - `model_params={"num_scaling": True}` — learnable per-feature input scale
110
+ - `lr_scheduler="coslog4"`, `optimizer_betas=(0.9, 0.95)` — the training
111
+ schedule
112
+ - `clip_predictions=True` (regressor) — clip to the observed target range
113
+ - `n_ens=k` — seed ensembling as in pytabkit's RealMLP: k members trained
114
+ with seeds `random_state + i`, predictions averaged on the probability /
115
+ value scale; works with every model including the retrieval ones.
116
+ `ens_mode="vectorized"` trains all members in one vmapped forward/backward
117
+ (`torch.func`) for BatchNorm-free models — pytabkit's speed trick
118
+ - `weight_decay_schedule="flat_cos"` — RealMLP-TD's scheduled weight decay
119
+ (param groups can opt out, e.g. biases)
120
+ - `masamlp.realmlp_td_params(task)` — the **full RealMLP-TD recipe**:
121
+ parametric activations, flat_cos-scheduled dropout and weight decay, PBLD
122
+ embeddings with their own lr factor, and hybrid categorical encoding
123
+ (one-hot ≤ 9 categories, embeddings above)
124
+
125
+ ```python
126
+ from masamlp import MasaClassifier, realmlp_params
127
+
128
+ clf = MasaClassifier(**realmlp_params("classification")) # the TD-S recipe
129
+ clf = MasaClassifier(**{**realmlp_params("classification"),
130
+ "num_embedding": "pbld"}) # toward RealMLP-TD
131
+ ```
132
+
133
+ ## Install
134
+
135
+ ```bash
136
+ pip install masamlp # torch, numpy, pandas, scikit-learn
137
+ ```
138
+
139
+ ## Quickstart
140
+
141
+ ```python
142
+ import numpy as np
143
+ from masamlp import MasaClassifier, make_metric
144
+
145
+ def f1(y_true, y_proba):
146
+ pred = y_proba >= 0.5
147
+ tp = np.sum(pred & (y_true == 1))
148
+ return 2 * tp / max(pred.sum() + (y_true == 1).sum(), 1)
149
+
150
+ clf = MasaClassifier(
151
+ model="resnet",
152
+ eval_metric=make_metric(f1, name="f1", minimize=False),
153
+ early_stopping_rounds=15,
154
+ class_weight="balanced",
155
+ )
156
+ clf.fit(X_train, y_train, sample_weight=w_train, eval_set=[(X_val, y_val)])
157
+ proba = clf.predict_proba(X_test)
158
+ print(clf.best_iteration_, clf.best_score_, clf.evals_result_["valid_0"]["f1"][:3])
159
+ ```
160
+
161
+ Custom objective (regression, asymmetric loss):
162
+
163
+ ```python
164
+ import torch
165
+ from masamlp import MasaRegressor
166
+
167
+ def asymmetric_mse(y_true, raw_pred): # -> per-sample (n,) tensor
168
+ err = raw_pred - y_true # raw_pred: (n, out_dim)
169
+ return torch.where(err < 0, 4.0 * err**2, err**2).mean(dim=1)
170
+
171
+ reg = MasaRegressor(model="danet", objective=asymmetric_mse)
172
+ reg.fit(X, y, sample_weight=w) # weights just work
173
+ ```
174
+
175
+ Save/load is a plain directory (`manifest.json` + tensors, loaded with
176
+ `weights_only=True` — no pickle execution):
177
+
178
+ ```python
179
+ reg.save_model("model_dir")
180
+ reg2 = MasaRegressor.load_model("model_dir")
181
+ ```
182
+
183
+ ## Devices
184
+
185
+ `device="auto"` resolves cuda > mps > cpu. CUDA gets bf16 AMP by default and
186
+ optional `compile=True`; MPS and CPU train in float32. Details and caveats:
187
+ [docs/devices.md](docs/devices.md).
188
+
189
+ ## Development
190
+
191
+ ```bash
192
+ pip install -e ".[dev]"
193
+ bash scripts/check.sh # ruff + pytest + examples/quickstart.py
194
+ ```
195
+
196
+ Development rules live in [CLAUDE.md](CLAUDE.md); roadmap in
197
+ [docs/roadmap.md](docs/roadmap.md).
198
+
199
+ ## License
200
+
201
+ MIT. Architecture attributions: [docs/attribution.md](docs/attribution.md).
@@ -0,0 +1,169 @@
1
+ # masaMLP
2
+
3
+ ![CI](https://github.com/Matapanino/masamlp/actions/workflows/ci.yml/badge.svg)
4
+ ![PyPI](https://img.shields.io/pypi/v/masamlp.svg)
5
+ ![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)
6
+
7
+ **Extensible tabular deep learning** — TabularResNet, DANet, and TabularLNN
8
+ behind sklearn-compatible estimators with first-class **sample_weight**,
9
+ **custom objectives**, **custom metrics**, and **early stopping on any
10
+ metric**. The sibling library of
11
+ [repleafgbm](https://github.com/Matapanino/repleafgbm) (same author, same API
12
+ philosophy), for the neural side of tabular ML.
13
+
14
+ > **Status: alpha (0.1.x).** Built with heavy use of
15
+ > [Claude Code](https://claude.com/claude-code) (coding and architecture
16
+ > design).
17
+
18
+ ## Why masaMLP
19
+
20
+ Excellent tabular DL libraries exist — [pytabkit](https://github.com/dholzmueller/pytabkit)
21
+ ships state-of-the-art models like RealMLP and TabM, and
22
+ [rtdl](https://github.com/yandex-research/rtdl) provides reference modules.
23
+ What they don't make easy is *extension*: `sample_weight` in `fit`, custom
24
+ training losses, custom evaluation metrics, and early stopping driven by
25
+ them. masaMLP is built around exactly those hooks:
26
+
27
+ - **`fit(X, y, sample_weight=..., eval_set=...)`** — LightGBM-style, sklearn
28
+ compatible. Weights flow through a single reduction
29
+ `(loss * w).sum() / w.sum()` that every objective shares.
30
+ - **Custom objectives** are per-sample torch losses — a plain function (or
31
+ `nn.Module` with trainable parameters). Because the trainer owns the
32
+ weighted reduction, your loss gets correct `sample_weight` and
33
+ `class_weight` handling for free.
34
+ - **Custom metrics** are plain NumPy callables via `make_metric`, and any of
35
+ them (minimize or maximize) can drive early stopping with best-epoch weight
36
+ restoration.
37
+ - **Multiclass, multioutput regression, class_weight, label smoothing**
38
+ supported natively; built-in preprocessing (quantile scaling, missing
39
+ values, categorical embeddings) so DataFrames go straight into `fit`.
40
+ - **CPU / CUDA / MPS** behind `device="auto"`: device-resident tensors with
41
+ no DataLoader overhead, automatic full-batch mode for small data, bf16 AMP
42
+ on CUDA, opt-in `torch.compile` with eager fallback.
43
+
44
+ masaMLP deliberately does *not* try to re-benchmark the field — see
45
+ [docs/attribution.md](docs/attribution.md) for the research and libraries it
46
+ builds on.
47
+
48
+ ## Models
49
+
50
+ | name | source | notes |
51
+ |---|---|---|
52
+ | `resnet` | Gorishniy et al. 2021 (arXiv:2106.11959) | default; strong baseline |
53
+ | `realmlp` | Holzmüller et al. 2024 (arXiv:2407.04491) | RealMLP-TD-S architecture (scaling layer, NTP linear layers, SELU/Mish); pair with `masamlp.realmlp_params(task)` for the full training recipe |
54
+ | `ft_transformer` | Gorishniy et al. 2021 (arXiv:2106.11959) | feature tokens + [CLS] + PreNorm/ReGLU transformer, per the rtdl reference |
55
+ | `tab_transformer` | Huang et al. 2020 (arXiv:2012.06678) | transformer over categorical tokens; numerics bypass (or embed via `num_embedding`) |
56
+ | `danet` | Chen et al. AAAI 2022 (arXiv:2112.02962) | Abstract Layers with learnable sparse feature groups (in-house entmax15) |
57
+ | `tabr` | Gorishniy et al. 2023 (arXiv:2307.14338) | retrieval-augmented: nearest training rows are aggregated into each prediction |
58
+ | `modernnca` | Ye et al. 2024 (arXiv:2407.03257) | soft-nearest-neighbor aggregation with stochastic candidate sampling; pairs well with `num_embedding="plr-lite"` |
59
+ | `gandalf` | Joseph & Raj 2022 (arXiv:2207.08548) | GFLU stages: learnable sparse feature masks (t-softmax) with GRU-style gating; exposes `feature_importances()` |
60
+ | `grn` | GRN blocks from TFT, Lim et al. 2021 (arXiv:1912.09363) | stack of Gated Residual Networks over embedded features (masaMLP's own composition) |
61
+ | `lnn` | CfC cells, Hasani et al. 2022 | **experimental** liquid-network adaptation for static tabular data — see [docs/lnn.md](docs/lnn.md) |
62
+
63
+ Third-party architectures plug in with `register_model` and get the whole
64
+ estimator surface (weights, objectives, metrics, early stopping) for free.
65
+
66
+ ### RealMLP insights are composable options
67
+
68
+ The tricks from the RealMLP paper are estimator-level options usable with
69
+ *any* model (`lnn` included), not baked into one architecture:
70
+
71
+ - `numeric_scaler="rssc"` — robust scale + smooth clip preprocessing
72
+ - `cat_encoding="onehot"` — RealMLP-style one-hot (binary → ±1, missing → 0)
73
+ - `num_embedding="pbld" | "plr" | "plr-lite" | "pl" | "periodic"` — the
74
+ numeric embedding zoo (arXiv:2203.05556 + PBLD); token models
75
+ (`ft_transformer`, `tab_transformer`) use the same options as feature
76
+ tokenizers
77
+ - `model_params={"num_scaling": True}` — learnable per-feature input scale
78
+ - `lr_scheduler="coslog4"`, `optimizer_betas=(0.9, 0.95)` — the training
79
+ schedule
80
+ - `clip_predictions=True` (regressor) — clip to the observed target range
81
+ - `n_ens=k` — seed ensembling as in pytabkit's RealMLP: k members trained
82
+ with seeds `random_state + i`, predictions averaged on the probability /
83
+ value scale; works with every model including the retrieval ones.
84
+ `ens_mode="vectorized"` trains all members in one vmapped forward/backward
85
+ (`torch.func`) for BatchNorm-free models — pytabkit's speed trick
86
+ - `weight_decay_schedule="flat_cos"` — RealMLP-TD's scheduled weight decay
87
+ (param groups can opt out, e.g. biases)
88
+ - `masamlp.realmlp_td_params(task)` — the **full RealMLP-TD recipe**:
89
+ parametric activations, flat_cos-scheduled dropout and weight decay, PBLD
90
+ embeddings with their own lr factor, and hybrid categorical encoding
91
+ (one-hot ≤ 9 categories, embeddings above)
92
+
93
+ ```python
94
+ from masamlp import MasaClassifier, realmlp_params
95
+
96
+ clf = MasaClassifier(**realmlp_params("classification")) # the TD-S recipe
97
+ clf = MasaClassifier(**{**realmlp_params("classification"),
98
+ "num_embedding": "pbld"}) # toward RealMLP-TD
99
+ ```
100
+
101
+ ## Install
102
+
103
+ ```bash
104
+ pip install masamlp # torch, numpy, pandas, scikit-learn
105
+ ```
106
+
107
+ ## Quickstart
108
+
109
+ ```python
110
+ import numpy as np
111
+ from masamlp import MasaClassifier, make_metric
112
+
113
+ def f1(y_true, y_proba):
114
+ pred = y_proba >= 0.5
115
+ tp = np.sum(pred & (y_true == 1))
116
+ return 2 * tp / max(pred.sum() + (y_true == 1).sum(), 1)
117
+
118
+ clf = MasaClassifier(
119
+ model="resnet",
120
+ eval_metric=make_metric(f1, name="f1", minimize=False),
121
+ early_stopping_rounds=15,
122
+ class_weight="balanced",
123
+ )
124
+ clf.fit(X_train, y_train, sample_weight=w_train, eval_set=[(X_val, y_val)])
125
+ proba = clf.predict_proba(X_test)
126
+ print(clf.best_iteration_, clf.best_score_, clf.evals_result_["valid_0"]["f1"][:3])
127
+ ```
128
+
129
+ Custom objective (regression, asymmetric loss):
130
+
131
+ ```python
132
+ import torch
133
+ from masamlp import MasaRegressor
134
+
135
+ def asymmetric_mse(y_true, raw_pred): # -> per-sample (n,) tensor
136
+ err = raw_pred - y_true # raw_pred: (n, out_dim)
137
+ return torch.where(err < 0, 4.0 * err**2, err**2).mean(dim=1)
138
+
139
+ reg = MasaRegressor(model="danet", objective=asymmetric_mse)
140
+ reg.fit(X, y, sample_weight=w) # weights just work
141
+ ```
142
+
143
+ Save/load is a plain directory (`manifest.json` + tensors, loaded with
144
+ `weights_only=True` — no pickle execution):
145
+
146
+ ```python
147
+ reg.save_model("model_dir")
148
+ reg2 = MasaRegressor.load_model("model_dir")
149
+ ```
150
+
151
+ ## Devices
152
+
153
+ `device="auto"` resolves cuda > mps > cpu. CUDA gets bf16 AMP by default and
154
+ optional `compile=True`; MPS and CPU train in float32. Details and caveats:
155
+ [docs/devices.md](docs/devices.md).
156
+
157
+ ## Development
158
+
159
+ ```bash
160
+ pip install -e ".[dev]"
161
+ bash scripts/check.sh # ruff + pytest + examples/quickstart.py
162
+ ```
163
+
164
+ Development rules live in [CLAUDE.md](CLAUDE.md); roadmap in
165
+ [docs/roadmap.md](docs/roadmap.md).
166
+
167
+ ## License
168
+
169
+ MIT. Architecture attributions: [docs/attribution.md](docs/attribution.md).
@@ -0,0 +1,23 @@
1
+ # Benchmarks
2
+
3
+ Not shipped with the package; the library (`src/`) never imports anything
4
+ here.
5
+
6
+ - `parity_realmlp.py` — the honesty check: masamlp's RealMLP-TD-S recipe vs
7
+ the author's standalone reference implementation (vendored under
8
+ `vendor/`, MIT) on california housing and adult, with sklearn's
9
+ HistGradientBoosting as an anchor. Expected outcome: comparable metrics
10
+ (same recipe, different shuffling details), not bitwise equality.
11
+ - `model_zoo.py` — every registered model on the same two datasets with its
12
+ recommended knobs. Single seed, capped epochs, subsampled rows, no HPO:
13
+ a smoke-level leaderboard, not a paper-grade ranking.
14
+
15
+ Run from the repo root:
16
+
17
+ ```bash
18
+ PYTHONPATH=src python3 benchmarks/parity_realmlp.py
19
+ PYTHONPATH=src python3 benchmarks/model_zoo.py
20
+ ```
21
+
22
+ If OpenML downloads fail with SSL errors (framework Python on macOS):
23
+ `export SSL_CERT_FILE=$(python -m certifi)`.
@@ -0,0 +1,59 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "masamlp"
7
+ version = "0.1.0"
8
+ description = "Extensible tabular deep learning: TabularResNet, DANet, and TabularLNN with first-class sample_weight, custom objectives, and custom metrics."
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ license = { text = "MIT" }
12
+ authors = [{ name = "Masaya Kawamata", email = "mkawamata038@gmail.com" }]
13
+ keywords = ["tabular", "deep-learning", "pytorch", "machine-learning", "resnet", "danet"]
14
+ classifiers = [
15
+ "Development Status :: 3 - Alpha",
16
+ "Intended Audience :: Science/Research",
17
+ "License :: OSI Approved :: MIT License",
18
+ "Programming Language :: Python :: 3",
19
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
20
+ ]
21
+ dependencies = [
22
+ "torch>=2.1",
23
+ "numpy>=1.23",
24
+ "pandas>=1.5",
25
+ "scikit-learn>=1.2",
26
+ ]
27
+
28
+ [project.optional-dependencies]
29
+ # Benchmark tooling only; the library (src/) never imports these.
30
+ bench = ["lightgbm>=4.0", "pytabkit>=1.5", "optuna>=3"]
31
+ # API reference generation (scripts/build_docs.sh).
32
+ docs = ["pdoc>=14"]
33
+ dev = ["pytest>=7.0", "pytest-cov>=4", "ruff>=0.4"]
34
+
35
+ [project.urls]
36
+ Homepage = "https://github.com/Matapanino/masamlp"
37
+ Repository = "https://github.com/Matapanino/masamlp"
38
+
39
+ [tool.hatch.build.targets.wheel]
40
+ packages = ["src/masamlp"]
41
+ # Ship the PEP 561 marker so type checkers honour the inline annotations.
42
+ force-include = { "src/masamlp/py.typed" = "masamlp/py.typed" }
43
+
44
+ [tool.hatch.build.targets.sdist]
45
+ include = ["src/masamlp", "tests", "pyproject.toml", "README.md", "LICENSE", "CHANGELOG.md"]
46
+
47
+ [tool.ruff]
48
+ line-length = 100
49
+ src = ["src", "tests"]
50
+
51
+ [tool.ruff.lint]
52
+ select = ["E", "F", "W", "I", "UP", "B"]
53
+
54
+ [tool.pytest.ini_options]
55
+ testpaths = ["tests"]
56
+ addopts = "-q"
57
+ filterwarnings = [
58
+ "ignore:.*torch.compile.*:UserWarning",
59
+ ]
@@ -0,0 +1,44 @@
1
+ """masaMLP: extensible tabular deep learning.
2
+
3
+ TabularResNet, DANet, and TabularLNN behind sklearn-compatible estimators
4
+ with first-class sample_weight, custom objectives, custom metrics, and early
5
+ stopping on any metric — the sibling library of repleafgbm.
6
+ """
7
+
8
+ from masamlp.classifier import MasaClassifier
9
+ from masamlp.core.metrics import BaseMetric, get_metric, make_metric
10
+ from masamlp.core.objectives import (
11
+ BaseObjective,
12
+ BinaryLogistic,
13
+ Huber,
14
+ MulticlassSoftmax,
15
+ PoissonRegression,
16
+ Quantile,
17
+ get_objective,
18
+ make_objective,
19
+ )
20
+ from masamlp.models import register_model
21
+ from masamlp.presets import realmlp_params, realmlp_td_params
22
+ from masamlp.regressor import MasaRegressor
23
+
24
+ __version__ = "0.1.0"
25
+
26
+ __all__ = [
27
+ "MasaRegressor",
28
+ "MasaClassifier",
29
+ "BaseMetric",
30
+ "get_metric",
31
+ "make_metric",
32
+ "BaseObjective",
33
+ "get_objective",
34
+ "make_objective",
35
+ "Huber",
36
+ "Quantile",
37
+ "PoissonRegression",
38
+ "BinaryLogistic",
39
+ "MulticlassSoftmax",
40
+ "register_model",
41
+ "realmlp_params",
42
+ "realmlp_td_params",
43
+ "__version__",
44
+ ]