autogluon.tabular 1.2.1b20250311__py3-none-any.whl → 1.2.1b20250312__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- autogluon/tabular/configs/hyperparameter_configs.py +0 -1
- autogluon/tabular/models/__init__.py +0 -1
- autogluon/tabular/register/_ag_model_register.py +0 -2
- autogluon/tabular/version.py +1 -1
- {autogluon.tabular-1.2.1b20250311.dist-info → autogluon.tabular-1.2.1b20250312.dist-info}/METADATA +9 -13
- {autogluon.tabular-1.2.1b20250311.dist-info → autogluon.tabular-1.2.1b20250312.dist-info}/RECORD +13 -16
- autogluon/tabular/models/vowpalwabbit/__init__.py +0 -0
- autogluon/tabular/models/vowpalwabbit/vowpalwabbit_model.py +0 -286
- autogluon/tabular/models/vowpalwabbit/vowpalwabbit_utils.py +0 -93
- /autogluon.tabular-1.2.1b20250311-py3.9-nspkg.pth → /autogluon.tabular-1.2.1b20250312-py3.9-nspkg.pth +0 -0
- {autogluon.tabular-1.2.1b20250311.dist-info → autogluon.tabular-1.2.1b20250312.dist-info}/LICENSE +0 -0
- {autogluon.tabular-1.2.1b20250311.dist-info → autogluon.tabular-1.2.1b20250312.dist-info}/NOTICE +0 -0
- {autogluon.tabular-1.2.1b20250311.dist-info → autogluon.tabular-1.2.1b20250312.dist-info}/WHEEL +0 -0
- {autogluon.tabular-1.2.1b20250311.dist-info → autogluon.tabular-1.2.1b20250312.dist-info}/namespace_packages.txt +0 -0
- {autogluon.tabular-1.2.1b20250311.dist-info → autogluon.tabular-1.2.1b20250312.dist-info}/top_level.txt +0 -0
- {autogluon.tabular-1.2.1b20250311.dist-info → autogluon.tabular-1.2.1b20250312.dist-info}/zip-safe +0 -0
@@ -107,7 +107,6 @@ hyperparameter_config_dict = dict(
|
|
107
107
|
"XGB": {},
|
108
108
|
# 'FASTAI': {}, # FastAI gets killed if the dataset is large (400K rows).
|
109
109
|
"AG_AUTOMM": {},
|
110
|
-
"VW": {},
|
111
110
|
},
|
112
111
|
# Hyperparameters intended to find an interpretable model which doesn't sacrifice predictive accuracy
|
113
112
|
interpretable={
|
@@ -22,6 +22,5 @@ from .tabpfn.tabpfn_model import TabPFNModel
|
|
22
22
|
from .tabpfnmix.tabpfnmix_model import TabPFNMixModel
|
23
23
|
from .tabular_nn.torch.tabular_nn_torch import TabularNeuralNetTorchModel
|
24
24
|
from .text_prediction.text_prediction_v1_model import TextPredictorModel
|
25
|
-
from .vowpalwabbit.vowpalwabbit_model import VowpalWabbitModel
|
26
25
|
from .xgboost.xgboost_model import XGBoostModel
|
27
26
|
from .xt.xt_model import XTModel
|
@@ -25,7 +25,6 @@ from ..models import (
|
|
25
25
|
TabPFNModel,
|
26
26
|
TabularNeuralNetTorchModel,
|
27
27
|
TextPredictorModel,
|
28
|
-
VowpalWabbitModel,
|
29
28
|
XGBoostModel,
|
30
29
|
XTModel,
|
31
30
|
)
|
@@ -51,7 +50,6 @@ REGISTERED_MODEL_CLS_LST = [
|
|
51
50
|
TabPFNModel,
|
52
51
|
TabPFNMixModel,
|
53
52
|
FastTextModel,
|
54
|
-
VowpalWabbitModel,
|
55
53
|
GreedyWeightedEnsembleModel,
|
56
54
|
SimpleWeightedEnsembleModel,
|
57
55
|
RuleFitModel,
|
autogluon/tabular/version.py
CHANGED
{autogluon.tabular-1.2.1b20250311.dist-info → autogluon.tabular-1.2.1b20250312.dist-info}/METADATA
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: autogluon.tabular
|
3
|
-
Version: 1.2.
|
3
|
+
Version: 1.2.1b20250312
|
4
4
|
Summary: Fast and Accurate ML in 3 Lines of Code
|
5
5
|
Home-page: https://github.com/autogluon/autogluon
|
6
6
|
Author: AutoGluon Community
|
@@ -41,18 +41,18 @@ Requires-Dist: scipy<1.16,>=1.5.4
|
|
41
41
|
Requires-Dist: pandas<2.3.0,>=2.0.0
|
42
42
|
Requires-Dist: scikit-learn<1.5.3,>=1.4.0
|
43
43
|
Requires-Dist: networkx<4,>=3.0
|
44
|
-
Requires-Dist: autogluon.core==1.2.
|
45
|
-
Requires-Dist: autogluon.features==1.2.
|
44
|
+
Requires-Dist: autogluon.core==1.2.1b20250312
|
45
|
+
Requires-Dist: autogluon.features==1.2.1b20250312
|
46
46
|
Provides-Extra: all
|
47
47
|
Requires-Dist: xgboost<2.2,>=2.0; extra == "all"
|
48
|
-
Requires-Dist:
|
48
|
+
Requires-Dist: torch<2.6,>=2.2; extra == "all"
|
49
|
+
Requires-Dist: catboost<1.3,>=1.2; extra == "all"
|
50
|
+
Requires-Dist: einops<0.9,>=0.7; extra == "all"
|
51
|
+
Requires-Dist: fastai<2.8,>=2.3.1; extra == "all"
|
49
52
|
Requires-Dist: spacy<3.8; extra == "all"
|
53
|
+
Requires-Dist: autogluon.core[all]==1.2.1b20250312; extra == "all"
|
50
54
|
Requires-Dist: huggingface-hub[torch]; extra == "all"
|
51
|
-
Requires-Dist: fastai<2.8,>=2.3.1; extra == "all"
|
52
|
-
Requires-Dist: einops<0.9,>=0.7; extra == "all"
|
53
55
|
Requires-Dist: numpy<2.0.0,>=1.25; extra == "all"
|
54
|
-
Requires-Dist: torch<2.6,>=2.2; extra == "all"
|
55
|
-
Requires-Dist: catboost<1.3,>=1.2; extra == "all"
|
56
56
|
Requires-Dist: lightgbm<4.7,>=4.0; extra == "all"
|
57
57
|
Provides-Extra: catboost
|
58
58
|
Requires-Dist: numpy<2.0.0,>=1.25; extra == "catboost"
|
@@ -66,7 +66,7 @@ Requires-Dist: imodels<1.4.0,>=1.3.10; extra == "imodels"
|
|
66
66
|
Provides-Extra: lightgbm
|
67
67
|
Requires-Dist: lightgbm<4.7,>=4.0; extra == "lightgbm"
|
68
68
|
Provides-Extra: ray
|
69
|
-
Requires-Dist: autogluon.core[all]==1.2.
|
69
|
+
Requires-Dist: autogluon.core[all]==1.2.1b20250312; extra == "ray"
|
70
70
|
Provides-Extra: skex
|
71
71
|
Requires-Dist: scikit-learn-intelex<2025.1,>=2024.0; extra == "skex"
|
72
72
|
Provides-Extra: skl2onnx
|
@@ -82,7 +82,6 @@ Requires-Dist: torch<2.6,>=2.2; extra == "tabpfnmix"
|
|
82
82
|
Requires-Dist: huggingface-hub[torch]; extra == "tabpfnmix"
|
83
83
|
Requires-Dist: einops<0.9,>=0.7; extra == "tabpfnmix"
|
84
84
|
Provides-Extra: tests
|
85
|
-
Requires-Dist: tabpfn<0.2,>=0.1.11; extra == "tests"
|
86
85
|
Requires-Dist: torch<2.6,>=2.2; extra == "tests"
|
87
86
|
Requires-Dist: huggingface-hub[torch]; extra == "tests"
|
88
87
|
Requires-Dist: einops<0.9,>=0.7; extra == "tests"
|
@@ -92,9 +91,6 @@ Requires-Dist: onnxruntime<1.20.0,>=1.17.0; extra == "tests"
|
|
92
91
|
Requires-Dist: onnxruntime-gpu<1.20.0,>=1.17.0; extra == "tests"
|
93
92
|
Requires-Dist: onnx<1.18.0,>=1.13.0; platform_system != "Windows" and extra == "tests"
|
94
93
|
Requires-Dist: onnx<1.16.2,>=1.13.0; platform_system == "Windows" and extra == "tests"
|
95
|
-
Requires-Dist: vowpalwabbit<9.10,>=9; (python_version < "3.11" and sys_platform != "darwin") and extra == "tests"
|
96
|
-
Provides-Extra: vowpalwabbit
|
97
|
-
Requires-Dist: vowpalwabbit<9.10,>=9; (python_version < "3.11" and sys_platform != "darwin") and extra == "vowpalwabbit"
|
98
94
|
Provides-Extra: xgboost
|
99
95
|
Requires-Dist: xgboost<2.2,>=2.0; extra == "xgboost"
|
100
96
|
|
{autogluon.tabular-1.2.1b20250311.dist-info → autogluon.tabular-1.2.1b20250312.dist-info}/RECORD
RENAMED
@@ -1,10 +1,10 @@
|
|
1
|
-
autogluon.tabular-1.2.
|
1
|
+
autogluon.tabular-1.2.1b20250312-py3.9-nspkg.pth,sha256=cQGwpuGPqg1GXscIwt-7PmME1OnSpD-7ixkikJ31WAY,554
|
2
2
|
autogluon/tabular/__init__.py,sha256=2OXpJCvENRHubBTYNIPpHX93WWuFZzsJBtTZbNVHVas,400
|
3
|
-
autogluon/tabular/version.py,sha256=
|
3
|
+
autogluon/tabular/version.py,sha256=jJrTQDH93aen31K7aA8rJsXAtKx1fUM2eMDvRjqIPVs,91
|
4
4
|
autogluon/tabular/configs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
5
|
autogluon/tabular/configs/config_helper.py,sha256=Pb2aW9Z9w77pYKPRVZ3nBzHY3KJaiEJSJ747zZcJIVk,21132
|
6
6
|
autogluon/tabular/configs/feature_generator_presets.py,sha256=EV5Ym8VW15q92MwOUpTi7wZFS2QooM51fLg3RdUsn-M,1223
|
7
|
-
autogluon/tabular/configs/hyperparameter_configs.py,sha256=
|
7
|
+
autogluon/tabular/configs/hyperparameter_configs.py,sha256=hp8J7g5GY3Couz929f1ItawobCw-isLTZJBcLoJY348,18035
|
8
8
|
autogluon/tabular/configs/presets_configs.py,sha256=2Jlq1X9sVmVlyUxWsZpDV7ma2TncH5Y2HXDML7x2gYc,6810
|
9
9
|
autogluon/tabular/configs/zeroshot/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
10
10
|
autogluon/tabular/configs/zeroshot/zeroshot_portfolio_2023.py,sha256=oKO_2nEpI_EiLaUGmNN-3kPBIp5ATndbCOaVZ1m0048,29911
|
@@ -16,7 +16,7 @@ autogluon/tabular/experimental/plot_leaderboard.py,sha256=BN_kB-zmOZNUYWyI7z9pF6
|
|
16
16
|
autogluon/tabular/learner/__init__.py,sha256=Hhmk5WpKQHohVmI-veOaKMelKJpIdzeXrmw_DPn3DTU,63
|
17
17
|
autogluon/tabular/learner/abstract_learner.py,sha256=0kf0huvg0nphe-lrdKtNTzdIFr14jzJPsfZDRBkKo3g,55253
|
18
18
|
autogluon/tabular/learner/default_learner.py,sha256=hjdKbcFtIQxQ3-k1LiGOo-w5sLxIIQAyFLs3-R35aw0,24781
|
19
|
-
autogluon/tabular/models/__init__.py,sha256=
|
19
|
+
autogluon/tabular/models/__init__.py,sha256=fZDKUKiD9hDzEyFXXbt7_b4yADK9peREdP8QoukWukQ,1036
|
20
20
|
autogluon/tabular/models/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
21
21
|
autogluon/tabular/models/_utils/rapids_utils.py,sha256=gbej9Hjn4alCWZuGN9sOLXMMAyWbgHPThTsp2feS39o,1038
|
22
22
|
autogluon/tabular/models/_utils/torch_utils.py,sha256=dxs_KMMAOmNkRNjYf_hrzqaHIfkqn1xoKRKqCFbQ1Rk,537
|
@@ -130,9 +130,6 @@ autogluon/tabular/models/tabular_nn/utils/data_preprocessor.py,sha256=ypXqtxdt1q
|
|
130
130
|
autogluon/tabular/models/tabular_nn/utils/nn_architecture_utils.py,sha256=tttzR5EtYcFa6sIrUG9wyegdYmYE5DPK_CiLF1-L3c8,2875
|
131
131
|
autogluon/tabular/models/text_prediction/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
132
132
|
autogluon/tabular/models/text_prediction/text_prediction_v1_model.py,sha256=PBN7F98qgEAO6U76rV_hxZfAmKr_XpVKjElOdBvfX8c,1090
|
133
|
-
autogluon/tabular/models/vowpalwabbit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
134
|
-
autogluon/tabular/models/vowpalwabbit/vowpalwabbit_model.py,sha256=h_j33hnsuxDM0mfExpmejO2pu5lIpTm1uIEirS2OXXI,11802
|
135
|
-
autogluon/tabular/models/vowpalwabbit/vowpalwabbit_utils.py,sha256=jZ0STjvqwKw8jJDeoo5yAXTvgwFvY8Fsz6OqSif_JGI,3677
|
136
133
|
autogluon/tabular/models/xgboost/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
137
134
|
autogluon/tabular/models/xgboost/callbacks.py,sha256=uynimXya07XQMBkDvec-7mXK6OfMGP6M8MiVYu8OVRI,7008
|
138
135
|
autogluon/tabular/models/xgboost/xgboost_model.py,sha256=tTSnTzEot2JB0qEvhki4h3RdaLjEpfMs-jWKsxlJWO4,14304
|
@@ -146,7 +143,7 @@ autogluon/tabular/predictor/__init__.py,sha256=zCMgjxQlWpDWnr1l1xjBCiK3rWC3N3RoD
|
|
146
143
|
autogluon/tabular/predictor/interpretable_predictor.py,sha256=5UeKgnMFsfY65tiO3kxfHBPr03lyswLrgdtjPhI0Y7Q,6934
|
147
144
|
autogluon/tabular/predictor/predictor.py,sha256=jOkpypHAPrL2nsI4iypVkZV90TpMORK-G_Ixr3Kw3XQ,357182
|
148
145
|
autogluon/tabular/register/__init__.py,sha256=7CLOTWIUho0wi4eAwhYJ5Y0PfvNCWKnRwlw3bwYoTNE,93
|
149
|
-
autogluon/tabular/register/_ag_model_register.py,sha256=
|
146
|
+
autogluon/tabular/register/_ag_model_register.py,sha256=JNnmL6cwL_zvObRmyuRbwYsCxDT-qrVF5PY8dzJ5U9k,1518
|
150
147
|
autogluon/tabular/register/_model_register.py,sha256=jqSg0d89dXAAcp-OT4II90ce994ByKMMzAYmpkyaRbI,6824
|
151
148
|
autogluon/tabular/testing/__init__.py,sha256=XrEGLmMdmRT6QHNR13M9wna57LO4O3Q4tt27Ca8omAc,79
|
152
149
|
autogluon/tabular/testing/fit_helper.py,sha256=gVHTdAsp_lSZ_qbwjXM7aA5fI32zHj3_zXwEXC9C_ds,19586
|
@@ -160,11 +157,11 @@ autogluon/tabular/trainer/model_presets/presets.py,sha256=bTPGPyz07a7GG6327yO6ry
|
|
160
157
|
autogluon/tabular/trainer/model_presets/presets_distill.py,sha256=MnFC2GJc6RmDBNAGbsO2XMfo3PjR8cUrZoilWW8gTYQ,3295
|
161
158
|
autogluon/tabular/tuning/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
162
159
|
autogluon/tabular/tuning/feature_pruner.py,sha256=9iNku8gVbYEkjuKlyITPJDicsNkoraaQOlINQq9iZlQ,6877
|
163
|
-
autogluon.tabular-1.2.
|
164
|
-
autogluon.tabular-1.2.
|
165
|
-
autogluon.tabular-1.2.
|
166
|
-
autogluon.tabular-1.2.
|
167
|
-
autogluon.tabular-1.2.
|
168
|
-
autogluon.tabular-1.2.
|
169
|
-
autogluon.tabular-1.2.
|
170
|
-
autogluon.tabular-1.2.
|
160
|
+
autogluon.tabular-1.2.1b20250312.dist-info/LICENSE,sha256=CeipvOyAZxBGUsFoaFqwkx54aPnIKEtm9a5u2uXxEws,10142
|
161
|
+
autogluon.tabular-1.2.1b20250312.dist-info/METADATA,sha256=er3--iygQUQAhMpOQ3Pd_WJnlXRKkGr7go0ueJRdfrA,14069
|
162
|
+
autogluon.tabular-1.2.1b20250312.dist-info/NOTICE,sha256=7nPQuj8Kp-uXsU0S5so3-2dNU5EctS5hDXvvzzehd7E,114
|
163
|
+
autogluon.tabular-1.2.1b20250312.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
|
164
|
+
autogluon.tabular-1.2.1b20250312.dist-info/namespace_packages.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
|
165
|
+
autogluon.tabular-1.2.1b20250312.dist-info/top_level.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
|
166
|
+
autogluon.tabular-1.2.1b20250312.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
167
|
+
autogluon.tabular-1.2.1b20250312.dist-info/RECORD,,
|
File without changes
|
@@ -1,286 +0,0 @@
|
|
1
|
-
from __future__ import annotations
|
2
|
-
|
3
|
-
import logging
|
4
|
-
import os
|
5
|
-
import time
|
6
|
-
|
7
|
-
import numpy as np
|
8
|
-
import pandas as pd
|
9
|
-
|
10
|
-
from autogluon.common.features.types import (
|
11
|
-
R_CATEGORY,
|
12
|
-
R_FLOAT,
|
13
|
-
R_INT,
|
14
|
-
R_OBJECT,
|
15
|
-
S_IMAGE_PATH,
|
16
|
-
S_TEXT_AS_CATEGORY,
|
17
|
-
S_TEXT_NGRAM,
|
18
|
-
S_TEXT_SPECIAL,
|
19
|
-
)
|
20
|
-
from autogluon.common.utils.try_import import try_import_vowpalwabbit
|
21
|
-
from autogluon.core.constants import (
|
22
|
-
BINARY,
|
23
|
-
MULTICLASS,
|
24
|
-
PROBLEM_TYPES_CLASSIFICATION,
|
25
|
-
PROBLEM_TYPES_REGRESSION,
|
26
|
-
REGRESSION,
|
27
|
-
)
|
28
|
-
from autogluon.core.models import AbstractModel
|
29
|
-
from autogluon.core.utils.exceptions import TimeLimitExceeded
|
30
|
-
|
31
|
-
from .vowpalwabbit_utils import VWFeaturesConverter
|
32
|
-
|
33
|
-
logger = logging.getLogger(__name__)
|
34
|
-
|
35
|
-
|
36
|
-
class VowpalWabbitModel(AbstractModel):
|
37
|
-
"""
|
38
|
-
VowpalWabbit Model: https://vowpalwabbit.org/
|
39
|
-
|
40
|
-
VowpalWabbit Command Line args: https://github.com/VowpalWabbit/vowpal_wabbit/wiki/Command-line-arguments
|
41
|
-
|
42
|
-
"""
|
43
|
-
ag_key = "VW"
|
44
|
-
ag_name = "VowpalWabbit"
|
45
|
-
ag_priority = 10
|
46
|
-
|
47
|
-
model_internals_file_name = "model-internals.pkl"
|
48
|
-
|
49
|
-
# Ref: https://github.com/VowpalWabbit/vowpal_wabbit/wiki/Loss-functions
|
50
|
-
CLASSIFICATION_LOSS_FUNCTIONS = ["logistic", "hinge"]
|
51
|
-
REGRESSION_LOSS_FUNCTIONS = ["squared", "quantile", "poisson", "classic"]
|
52
|
-
|
53
|
-
def __init__(self, **kwargs):
|
54
|
-
super().__init__(**kwargs)
|
55
|
-
self._load_model = None # Used for saving and loading internal model file
|
56
|
-
|
57
|
-
# The `_preprocess` method takes the input data and transforms it to the internal representation usable by the model.
|
58
|
-
# `_preprocess` is called by `preprocess` and is used during model fit and model inference.
|
59
|
-
def _preprocess(self, X: pd.DataFrame, is_train=False, **kwargs) -> pd.Series:
|
60
|
-
X = super()._preprocess(X, **kwargs)
|
61
|
-
if is_train:
|
62
|
-
self._features_converter = VWFeaturesConverter()
|
63
|
-
self._feature_metadata_dict = self._feature_metadata.to_dict()
|
64
|
-
# self._feature_metadata contains the information related to features metadata.
|
65
|
-
X_series = self._features_converter.convert_features_to_vw_format(X, self._feature_metadata_dict)
|
66
|
-
return X_series
|
67
|
-
|
68
|
-
# The `_fit` method takes the input training data (and optionally the validation data) and trains the model.
|
69
|
-
def _fit(
|
70
|
-
self,
|
71
|
-
X: pd.DataFrame,
|
72
|
-
y: pd.Series,
|
73
|
-
time_limit=None,
|
74
|
-
verbosity=2,
|
75
|
-
**kwargs, # training data # training labels
|
76
|
-
): # kwargs includes many other potential inputs, refer to AbstractModel documentation for details
|
77
|
-
time_start = time.time()
|
78
|
-
try_import_vowpalwabbit()
|
79
|
-
import vowpalwabbit
|
80
|
-
|
81
|
-
seed = 0 # Random seed
|
82
|
-
|
83
|
-
# Valid self.problem_type values include ['binary', 'multiclass', 'regression', 'quantile', 'softclass']
|
84
|
-
if self.problem_type not in PROBLEM_TYPES_REGRESSION + PROBLEM_TYPES_CLASSIFICATION:
|
85
|
-
raise TypeError(f"Vowpal Wabbit does not support {self.problem_type}")
|
86
|
-
|
87
|
-
# Certain parameters like passes are passed as hyperparameters but are not used
|
88
|
-
# while initialising the model.
|
89
|
-
# passes: Used as epochs
|
90
|
-
|
91
|
-
params = self._get_model_params()
|
92
|
-
params["loss_function"] = params.get("loss_function", self._get_default_loss_function())
|
93
|
-
passes = params.pop("passes")
|
94
|
-
|
95
|
-
# Make sure to call preprocess on X near the start of `_fit`.
|
96
|
-
# This is necessary because the data is converted via preprocess during predict, and needs to be in the same format as during fit.
|
97
|
-
X_series = self.preprocess(X, is_train=True)
|
98
|
-
|
99
|
-
self._validate_loss_function(loss_function=params["loss_function"])
|
100
|
-
|
101
|
-
# VW expects label from 1 to N for Binary and Multiclass classification problems
|
102
|
-
# AutoGluon does label encoding from 0 to N-1, hence we increment the value of y by 1
|
103
|
-
if self.problem_type != REGRESSION:
|
104
|
-
y = y.apply(lambda row: row + 1)
|
105
|
-
y = y.astype(str) + " "
|
106
|
-
|
107
|
-
# Concatenate y and X to get the training data in VW format
|
108
|
-
final_training_data = y + X_series
|
109
|
-
final_training_data = final_training_data.tolist()
|
110
|
-
|
111
|
-
extra_params = {
|
112
|
-
"cache_file": "train.cache",
|
113
|
-
"holdout_off": True,
|
114
|
-
}
|
115
|
-
|
116
|
-
if verbosity <= 3:
|
117
|
-
extra_params["quiet"] = True
|
118
|
-
|
119
|
-
# Initialize the model
|
120
|
-
if self.problem_type in PROBLEM_TYPES_CLASSIFICATION:
|
121
|
-
# Ref: https://github.com/VowpalWabbit/vowpal_wabbit/wiki/Predicting-probabilities#multi-class---oaa
|
122
|
-
extra_params["oaa"] = self.num_classes
|
123
|
-
extra_params["probabilities"] = True
|
124
|
-
self.model = vowpalwabbit.Workspace(**params, **extra_params)
|
125
|
-
|
126
|
-
time_start_fit = time.time()
|
127
|
-
if time_limit is not None:
|
128
|
-
time_limit_fit = time_limit - (time_start_fit - time_start) - 0.3 # Account for 0.3s overhead
|
129
|
-
if time_limit_fit <= 0:
|
130
|
-
raise TimeLimitExceeded
|
131
|
-
else:
|
132
|
-
time_limit_fit = None
|
133
|
-
|
134
|
-
# Train the model
|
135
|
-
np.random.seed(seed)
|
136
|
-
epoch = 0
|
137
|
-
|
138
|
-
for epoch in range(1, passes + 1):
|
139
|
-
# TODO: Add Early Stopping support via validation
|
140
|
-
self._train_single_epoch(training_data=final_training_data)
|
141
|
-
if time_limit_fit is not None and epoch < passes:
|
142
|
-
time_fit_used = time.time() - time_start_fit
|
143
|
-
time_fit_used_per_epoch = time_fit_used / epoch
|
144
|
-
time_left = time_limit_fit - time_fit_used
|
145
|
-
if time_left <= (time_fit_used_per_epoch * 2):
|
146
|
-
logger.log(30, f"\tEarly stopping due to lack of time. Fit {epoch}/{passes} passes...")
|
147
|
-
break
|
148
|
-
|
149
|
-
self.params_trained["passes"] = epoch
|
150
|
-
|
151
|
-
def _train_single_epoch(self, training_data):
|
152
|
-
row_order = np.arange(0, len(training_data))
|
153
|
-
row_order = np.random.permutation(row_order)
|
154
|
-
for row_i in row_order:
|
155
|
-
row = training_data[row_i]
|
156
|
-
self.model.learn(row)
|
157
|
-
|
158
|
-
def _validate_loss_function(self, loss_function):
|
159
|
-
# Ref: https://github.com/VowpalWabbit/vowpal_wabbit/wiki/Loss-functions
|
160
|
-
if loss_function:
|
161
|
-
if self.problem_type in PROBLEM_TYPES_CLASSIFICATION:
|
162
|
-
assert loss_function in self.CLASSIFICATION_LOSS_FUNCTIONS, (
|
163
|
-
f"For {self.problem_type} problem, VW supports: {self.CLASSIFICATION_LOSS_FUNCTIONS}. " f"Got loss_function:{loss_function}"
|
164
|
-
)
|
165
|
-
elif self.problem_type in PROBLEM_TYPES_REGRESSION:
|
166
|
-
assert loss_function in self.REGRESSION_LOSS_FUNCTIONS, (
|
167
|
-
f"For {self.problem_type} problem, VW supports: {self.REGRESSION_LOSS_FUNCTIONS}. " f"Got loss_function:{loss_function}"
|
168
|
-
)
|
169
|
-
|
170
|
-
def _get_default_loss_function(self) -> str:
|
171
|
-
# Ref: https://github.com/VowpalWabbit/vowpal_wabbit/wiki/Loss-functions
|
172
|
-
if self.problem_type in PROBLEM_TYPES_CLASSIFICATION:
|
173
|
-
return "logistic"
|
174
|
-
else:
|
175
|
-
return "squared"
|
176
|
-
|
177
|
-
def save(self, path: str = None, verbose=True) -> str:
|
178
|
-
"""
|
179
|
-
AutoGluon by default saves the complete Abstract Model in a pickle file format.
|
180
|
-
This includes the internal self.model which is the actual model.
|
181
|
-
However, saving VW model in pickle is not possible.
|
182
|
-
Hence, we dump the Abstract Model by setting setting self.model as None
|
183
|
-
and save self.model as a separate internal file using that model's saving mechanism
|
184
|
-
|
185
|
-
:param path: path where model is to be saved
|
186
|
-
:param verbose: verbosity
|
187
|
-
:return: path where model is saved
|
188
|
-
"""
|
189
|
-
|
190
|
-
self._load_model = self.model is not None
|
191
|
-
__model = self.model
|
192
|
-
self.model = None
|
193
|
-
path = super().save(path=path, verbose=verbose)
|
194
|
-
self.model = __model
|
195
|
-
# Export model
|
196
|
-
if self._load_model:
|
197
|
-
file_path = os.path.join(path, self.model_internals_file_name)
|
198
|
-
self.model.save(file_path)
|
199
|
-
self._load_model = None
|
200
|
-
return path
|
201
|
-
|
202
|
-
@classmethod
|
203
|
-
def load(cls, path: str, reset_paths=True, verbose=True):
|
204
|
-
"""
|
205
|
-
There are two files which needs to be loaded.
|
206
|
-
First is the Abstract Model pickle dump and second is the internal model file.
|
207
|
-
For VW, based on different problem_type/hyperparams, loading arguments will be different
|
208
|
-
"""
|
209
|
-
try_import_vowpalwabbit()
|
210
|
-
import vowpalwabbit
|
211
|
-
|
212
|
-
# Load Abstract Model. This is without the internal model
|
213
|
-
model = super().load(path, reset_paths=reset_paths, verbose=verbose)
|
214
|
-
params = model._get_model_params()
|
215
|
-
# Load the internal model file
|
216
|
-
if model._load_model:
|
217
|
-
file_path = os.path.join(path, cls.model_internals_file_name)
|
218
|
-
|
219
|
-
model_load_params = f" -i {file_path} --quiet"
|
220
|
-
if model.problem_type in PROBLEM_TYPES_CLASSIFICATION:
|
221
|
-
model_load_params += " --probabilities --loss_function=logistic"
|
222
|
-
if params["sparse_weights"]:
|
223
|
-
model_load_params += " --sparse_weights"
|
224
|
-
|
225
|
-
model.model = vowpalwabbit.Workspace(model_load_params)
|
226
|
-
model._load_model = None
|
227
|
-
return model
|
228
|
-
|
229
|
-
def _predict_proba(self, X, **kwargs):
|
230
|
-
# Preprocess the set of X features
|
231
|
-
X = self.preprocess(X, **kwargs)
|
232
|
-
|
233
|
-
y_pred_proba = np.array([self.model.predict(row) for row in X])
|
234
|
-
return self._convert_proba_to_unified_form(y_pred_proba)
|
235
|
-
|
236
|
-
def _get_memory_size(self) -> int:
|
237
|
-
# TODO: Can be improved further to make it more accurate
|
238
|
-
# Returning 5MB as the value
|
239
|
-
return int(5e6)
|
240
|
-
|
241
|
-
# The `_set_default_params` method defines the default hyperparameters of the model.
|
242
|
-
# User-specified parameters will override these values on a key-by-key basis.
|
243
|
-
def _set_default_params(self):
|
244
|
-
default_params = {
|
245
|
-
"passes": 10, # TODO: Much better if 500+, revisit this if wanting to use VW to get strong results
|
246
|
-
"bit_precision": 32,
|
247
|
-
"ngram": 2,
|
248
|
-
"skips": 1,
|
249
|
-
"learning_rate": 1,
|
250
|
-
"sparse_weights": True,
|
251
|
-
}
|
252
|
-
for param, val in default_params.items():
|
253
|
-
self._set_default_param_value(param, val)
|
254
|
-
|
255
|
-
# The `_get_default_auxiliary_params` method defines various model-agnostic parameters such as maximum memory usage and valid input column dtypes.
|
256
|
-
# For most users who build custom models, they will only need to specify the valid/invalid dtypes to the model here.
|
257
|
-
def _get_default_auxiliary_params(self) -> dict:
|
258
|
-
default_auxiliary_params = super()._get_default_auxiliary_params()
|
259
|
-
# Ignore the below mentioned special types. Only those features that are not of the below mentioned
|
260
|
-
# type are passed to the model for training list are passed features
|
261
|
-
extra_auxiliary_params = dict(
|
262
|
-
valid_raw_types=[R_INT, R_FLOAT, R_CATEGORY, R_OBJECT], ignored_type_group_special=[S_IMAGE_PATH, S_TEXT_NGRAM, S_TEXT_AS_CATEGORY, S_TEXT_SPECIAL]
|
263
|
-
)
|
264
|
-
default_auxiliary_params.update(extra_auxiliary_params)
|
265
|
-
return default_auxiliary_params
|
266
|
-
|
267
|
-
@classmethod
|
268
|
-
def _get_default_ag_args(cls) -> dict:
|
269
|
-
default_ag_args = super()._get_default_ag_args()
|
270
|
-
extra_ag_args = {
|
271
|
-
"valid_stacker": False,
|
272
|
-
}
|
273
|
-
default_ag_args.update(extra_ag_args)
|
274
|
-
return default_ag_args
|
275
|
-
|
276
|
-
@classmethod
|
277
|
-
def supported_problem_types(cls) -> list[str] | None:
|
278
|
-
return ["binary", "multiclass", "regression"]
|
279
|
-
|
280
|
-
def _more_tags(self):
|
281
|
-
# `can_refit_full=True` because best epoch is communicated at end of `_fit`: `self.params_trained['passes'] = epoch`
|
282
|
-
return {"can_refit_full": True}
|
283
|
-
|
284
|
-
@classmethod
|
285
|
-
def _class_tags(cls):
|
286
|
-
return {"handles_text": True}
|
@@ -1,93 +0,0 @@
|
|
1
|
-
import pandas as pd
|
2
|
-
|
3
|
-
from autogluon.common.features.types import R_CATEGORY, R_FLOAT, R_INT, S_TEXT
|
4
|
-
|
5
|
-
|
6
|
-
class VWFeaturesConverter:
|
7
|
-
"""
|
8
|
-
Converts features in PandasDataFrame to VW format
|
9
|
-
Ref: https://github.com/VowpalWabbit/vowpal_wabbit/wiki/Input-format
|
10
|
-
"""
|
11
|
-
|
12
|
-
PIPE = "|"
|
13
|
-
SPACE = " "
|
14
|
-
|
15
|
-
# TODO: Add support for different namespaces
|
16
|
-
|
17
|
-
def convert_features_to_vw_format(self, X, feature_metadata) -> pd.Series:
|
18
|
-
"""
|
19
|
-
Converts features to VW format.
|
20
|
-
:param X: features
|
21
|
-
:param feature_metadata: schema of X
|
22
|
-
:return: Returns a series of features converted to VW format
|
23
|
-
"""
|
24
|
-
|
25
|
-
X_out: pd.Series = None
|
26
|
-
|
27
|
-
for feature in feature_metadata:
|
28
|
-
raw_feature, special_feature = feature_metadata[feature]
|
29
|
-
if X_out is None:
|
30
|
-
X_out = (
|
31
|
-
self.PIPE
|
32
|
-
+ self.SPACE
|
33
|
-
+ self.__generate_namespace_based_on_ml_type(X[feature], raw_feature, special_feature, feature).astype("str")
|
34
|
-
+ self.SPACE
|
35
|
-
)
|
36
|
-
else:
|
37
|
-
X_out += (
|
38
|
-
"" + self.SPACE + self.__generate_namespace_based_on_ml_type(X[feature], raw_feature, special_feature, feature).astype("str") + self.SPACE
|
39
|
-
)
|
40
|
-
return X_out
|
41
|
-
|
42
|
-
def __generate_namespace_based_on_ml_type(self, input_series, raw_feature, special_feature, feature_name=None):
|
43
|
-
"""
|
44
|
-
Based on the type of feature, preprocess/sanify these features so that it is in VW format
|
45
|
-
Only use raw text, numeric integer, numeric decimals, and category
|
46
|
-
Ref: https://github.com/autogluon/autogluon/blob/master/common/src/autogluon/common/features/types.py
|
47
|
-
|
48
|
-
:param input_series: A single feature as Pandas Series
|
49
|
-
:param raw_feature: Raw feature Type
|
50
|
-
:param special_feature: Special Feature Type
|
51
|
-
:param feature_name: Column Name of this feature
|
52
|
-
:return: Preprocessed Feature as a Pandas Series
|
53
|
-
"""
|
54
|
-
if S_TEXT in special_feature:
|
55
|
-
return input_series.apply(self.__preprocess_text)
|
56
|
-
elif raw_feature in [R_INT, R_FLOAT]:
|
57
|
-
return input_series.apply(self.__numeric_namespace_generator, args=(feature_name,))
|
58
|
-
elif raw_feature == R_CATEGORY:
|
59
|
-
return input_series.apply(self.__categorical_namespace_generator, args=(feature_name,))
|
60
|
-
else:
|
61
|
-
raise ValueError(
|
62
|
-
f"Received unsupported raw_feature_type '{str(raw_feature)}' special_feature_type '{str(special_feature)}'"
|
63
|
-
f" for feature '{feature_name}' for class {self.__class__.__name__}."
|
64
|
-
)
|
65
|
-
|
66
|
-
def __preprocess_text(self, s) -> str:
|
67
|
-
if pd.isnull(s):
|
68
|
-
return ""
|
69
|
-
s = " ".join(str(s).split())
|
70
|
-
# Added split to remove tabs spaces since tab is used as separator
|
71
|
-
text = self.__sanify(s)
|
72
|
-
return text
|
73
|
-
|
74
|
-
def __sanify(self, s) -> str:
|
75
|
-
"""
|
76
|
-
The sanify is performed because : and | are reserved by vowpal wabbit for distinguishing namespaces and numeric
|
77
|
-
data
|
78
|
-
@param s: input string
|
79
|
-
@returns string
|
80
|
-
"""
|
81
|
-
return str(s).replace(":", ";").replace("|", "/")
|
82
|
-
|
83
|
-
def __numeric_namespace_generator(self, feature, feature_name) -> str:
|
84
|
-
if pd.isnull(feature):
|
85
|
-
return ""
|
86
|
-
return feature_name + ":" + str(feature)
|
87
|
-
|
88
|
-
def __categorical_namespace_generator(self, feature, feature_name) -> str:
|
89
|
-
if pd.isnull(feature):
|
90
|
-
return ""
|
91
|
-
else:
|
92
|
-
feature = str(feature).replace(" ", "_")
|
93
|
-
return feature_name + "=" + self.__sanify(feature)
|
File without changes
|
{autogluon.tabular-1.2.1b20250311.dist-info → autogluon.tabular-1.2.1b20250312.dist-info}/LICENSE
RENAMED
File without changes
|
{autogluon.tabular-1.2.1b20250311.dist-info → autogluon.tabular-1.2.1b20250312.dist-info}/NOTICE
RENAMED
File without changes
|
{autogluon.tabular-1.2.1b20250311.dist-info → autogluon.tabular-1.2.1b20250312.dist-info}/WHEEL
RENAMED
File without changes
|
File without changes
|
File without changes
|
{autogluon.tabular-1.2.1b20250311.dist-info → autogluon.tabular-1.2.1b20250312.dist-info}/zip-safe
RENAMED
File without changes
|