autogluon.tabular 1.2.1b20250310__py3-none-any.whl → 1.2.1b20250312__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -107,7 +107,6 @@ hyperparameter_config_dict = dict(
107
107
  "XGB": {},
108
108
  # 'FASTAI': {}, # FastAI gets killed if the dataset is large (400K rows).
109
109
  "AG_AUTOMM": {},
110
- "VW": {},
111
110
  },
112
111
  # Hyperparameters intended to find an interpretable model which doesn't sacrifice predictive accuracy
113
112
  interpretable={
@@ -22,6 +22,5 @@ from .tabpfn.tabpfn_model import TabPFNModel
22
22
  from .tabpfnmix.tabpfnmix_model import TabPFNMixModel
23
23
  from .tabular_nn.torch.tabular_nn_torch import TabularNeuralNetTorchModel
24
24
  from .text_prediction.text_prediction_v1_model import TextPredictorModel
25
- from .vowpalwabbit.vowpalwabbit_model import VowpalWabbitModel
26
25
  from .xgboost.xgboost_model import XGBoostModel
27
26
  from .xt.xt_model import XTModel
@@ -25,7 +25,6 @@ from ..models import (
25
25
  TabPFNModel,
26
26
  TabularNeuralNetTorchModel,
27
27
  TextPredictorModel,
28
- VowpalWabbitModel,
29
28
  XGBoostModel,
30
29
  XTModel,
31
30
  )
@@ -51,7 +50,6 @@ REGISTERED_MODEL_CLS_LST = [
51
50
  TabPFNModel,
52
51
  TabPFNMixModel,
53
52
  FastTextModel,
54
- VowpalWabbitModel,
55
53
  GreedyWeightedEnsembleModel,
56
54
  SimpleWeightedEnsembleModel,
57
55
  RuleFitModel,
@@ -1,4 +1,4 @@
1
1
  """This is the autogluon version file."""
2
2
 
3
- __version__ = "1.2.1b20250310"
3
+ __version__ = "1.2.1b20250312"
4
4
  __lite__ = False
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: autogluon.tabular
3
- Version: 1.2.1b20250310
3
+ Version: 1.2.1b20250312
4
4
  Summary: Fast and Accurate ML in 3 Lines of Code
5
5
  Home-page: https://github.com/autogluon/autogluon
6
6
  Author: AutoGluon Community
@@ -41,19 +41,19 @@ Requires-Dist: scipy<1.16,>=1.5.4
41
41
  Requires-Dist: pandas<2.3.0,>=2.0.0
42
42
  Requires-Dist: scikit-learn<1.5.3,>=1.4.0
43
43
  Requires-Dist: networkx<4,>=3.0
44
- Requires-Dist: autogluon.core==1.2.1b20250310
45
- Requires-Dist: autogluon.features==1.2.1b20250310
44
+ Requires-Dist: autogluon.core==1.2.1b20250312
45
+ Requires-Dist: autogluon.features==1.2.1b20250312
46
46
  Provides-Extra: all
47
- Requires-Dist: lightgbm<4.7,>=4.0; extra == "all"
48
- Requires-Dist: spacy<3.8; extra == "all"
49
- Requires-Dist: torch<2.6,>=2.2; extra == "all"
50
- Requires-Dist: huggingface-hub[torch]; extra == "all"
51
- Requires-Dist: autogluon.core[all]==1.2.1b20250310; extra == "all"
52
- Requires-Dist: einops<0.9,>=0.7; extra == "all"
53
- Requires-Dist: numpy<2.0.0,>=1.25; extra == "all"
54
47
  Requires-Dist: xgboost<2.2,>=2.0; extra == "all"
48
+ Requires-Dist: torch<2.6,>=2.2; extra == "all"
55
49
  Requires-Dist: catboost<1.3,>=1.2; extra == "all"
50
+ Requires-Dist: einops<0.9,>=0.7; extra == "all"
56
51
  Requires-Dist: fastai<2.8,>=2.3.1; extra == "all"
52
+ Requires-Dist: spacy<3.8; extra == "all"
53
+ Requires-Dist: autogluon.core[all]==1.2.1b20250312; extra == "all"
54
+ Requires-Dist: huggingface-hub[torch]; extra == "all"
55
+ Requires-Dist: numpy<2.0.0,>=1.25; extra == "all"
56
+ Requires-Dist: lightgbm<4.7,>=4.0; extra == "all"
57
57
  Provides-Extra: catboost
58
58
  Requires-Dist: numpy<2.0.0,>=1.25; extra == "catboost"
59
59
  Requires-Dist: catboost<1.3,>=1.2; extra == "catboost"
@@ -66,7 +66,7 @@ Requires-Dist: imodels<1.4.0,>=1.3.10; extra == "imodels"
66
66
  Provides-Extra: lightgbm
67
67
  Requires-Dist: lightgbm<4.7,>=4.0; extra == "lightgbm"
68
68
  Provides-Extra: ray
69
- Requires-Dist: autogluon.core[all]==1.2.1b20250310; extra == "ray"
69
+ Requires-Dist: autogluon.core[all]==1.2.1b20250312; extra == "ray"
70
70
  Provides-Extra: skex
71
71
  Requires-Dist: scikit-learn-intelex<2025.1,>=2024.0; extra == "skex"
72
72
  Provides-Extra: skl2onnx
@@ -82,7 +82,6 @@ Requires-Dist: torch<2.6,>=2.2; extra == "tabpfnmix"
82
82
  Requires-Dist: huggingface-hub[torch]; extra == "tabpfnmix"
83
83
  Requires-Dist: einops<0.9,>=0.7; extra == "tabpfnmix"
84
84
  Provides-Extra: tests
85
- Requires-Dist: tabpfn<0.2,>=0.1.11; extra == "tests"
86
85
  Requires-Dist: torch<2.6,>=2.2; extra == "tests"
87
86
  Requires-Dist: huggingface-hub[torch]; extra == "tests"
88
87
  Requires-Dist: einops<0.9,>=0.7; extra == "tests"
@@ -92,9 +91,6 @@ Requires-Dist: onnxruntime<1.20.0,>=1.17.0; extra == "tests"
92
91
  Requires-Dist: onnxruntime-gpu<1.20.0,>=1.17.0; extra == "tests"
93
92
  Requires-Dist: onnx<1.18.0,>=1.13.0; platform_system != "Windows" and extra == "tests"
94
93
  Requires-Dist: onnx<1.16.2,>=1.13.0; platform_system == "Windows" and extra == "tests"
95
- Requires-Dist: vowpalwabbit<9.10,>=9; (python_version < "3.11" and sys_platform != "darwin") and extra == "tests"
96
- Provides-Extra: vowpalwabbit
97
- Requires-Dist: vowpalwabbit<9.10,>=9; (python_version < "3.11" and sys_platform != "darwin") and extra == "vowpalwabbit"
98
94
  Provides-Extra: xgboost
99
95
  Requires-Dist: xgboost<2.2,>=2.0; extra == "xgboost"
100
96
 
@@ -1,10 +1,10 @@
1
- autogluon.tabular-1.2.1b20250310-py3.9-nspkg.pth,sha256=cQGwpuGPqg1GXscIwt-7PmME1OnSpD-7ixkikJ31WAY,554
1
+ autogluon.tabular-1.2.1b20250312-py3.9-nspkg.pth,sha256=cQGwpuGPqg1GXscIwt-7PmME1OnSpD-7ixkikJ31WAY,554
2
2
  autogluon/tabular/__init__.py,sha256=2OXpJCvENRHubBTYNIPpHX93WWuFZzsJBtTZbNVHVas,400
3
- autogluon/tabular/version.py,sha256=sSV76kl5V2aM4pV9L3LYTKrnmgMZkThLQK3iof64UCw,91
3
+ autogluon/tabular/version.py,sha256=jJrTQDH93aen31K7aA8rJsXAtKx1fUM2eMDvRjqIPVs,91
4
4
  autogluon/tabular/configs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  autogluon/tabular/configs/config_helper.py,sha256=Pb2aW9Z9w77pYKPRVZ3nBzHY3KJaiEJSJ747zZcJIVk,21132
6
6
  autogluon/tabular/configs/feature_generator_presets.py,sha256=EV5Ym8VW15q92MwOUpTi7wZFS2QooM51fLg3RdUsn-M,1223
7
- autogluon/tabular/configs/hyperparameter_configs.py,sha256=SHR56xcHTslf5dULbl34TgcuSqZdwgI-k3zEbBg4cFE,18053
7
+ autogluon/tabular/configs/hyperparameter_configs.py,sha256=hp8J7g5GY3Couz929f1ItawobCw-isLTZJBcLoJY348,18035
8
8
  autogluon/tabular/configs/presets_configs.py,sha256=2Jlq1X9sVmVlyUxWsZpDV7ma2TncH5Y2HXDML7x2gYc,6810
9
9
  autogluon/tabular/configs/zeroshot/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
10
  autogluon/tabular/configs/zeroshot/zeroshot_portfolio_2023.py,sha256=oKO_2nEpI_EiLaUGmNN-3kPBIp5ATndbCOaVZ1m0048,29911
@@ -16,7 +16,7 @@ autogluon/tabular/experimental/plot_leaderboard.py,sha256=BN_kB-zmOZNUYWyI7z9pF6
16
16
  autogluon/tabular/learner/__init__.py,sha256=Hhmk5WpKQHohVmI-veOaKMelKJpIdzeXrmw_DPn3DTU,63
17
17
  autogluon/tabular/learner/abstract_learner.py,sha256=0kf0huvg0nphe-lrdKtNTzdIFr14jzJPsfZDRBkKo3g,55253
18
18
  autogluon/tabular/learner/default_learner.py,sha256=hjdKbcFtIQxQ3-k1LiGOo-w5sLxIIQAyFLs3-R35aw0,24781
19
- autogluon/tabular/models/__init__.py,sha256=tDVqwVG9q2ctLWRouyXeYs5NiSBnOnwh3anAfZyM3jg,1099
19
+ autogluon/tabular/models/__init__.py,sha256=fZDKUKiD9hDzEyFXXbt7_b4yADK9peREdP8QoukWukQ,1036
20
20
  autogluon/tabular/models/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
21
  autogluon/tabular/models/_utils/rapids_utils.py,sha256=gbej9Hjn4alCWZuGN9sOLXMMAyWbgHPThTsp2feS39o,1038
22
22
  autogluon/tabular/models/_utils/torch_utils.py,sha256=dxs_KMMAOmNkRNjYf_hrzqaHIfkqn1xoKRKqCFbQ1Rk,537
@@ -130,9 +130,6 @@ autogluon/tabular/models/tabular_nn/utils/data_preprocessor.py,sha256=ypXqtxdt1q
130
130
  autogluon/tabular/models/tabular_nn/utils/nn_architecture_utils.py,sha256=tttzR5EtYcFa6sIrUG9wyegdYmYE5DPK_CiLF1-L3c8,2875
131
131
  autogluon/tabular/models/text_prediction/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
132
132
  autogluon/tabular/models/text_prediction/text_prediction_v1_model.py,sha256=PBN7F98qgEAO6U76rV_hxZfAmKr_XpVKjElOdBvfX8c,1090
133
- autogluon/tabular/models/vowpalwabbit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
134
- autogluon/tabular/models/vowpalwabbit/vowpalwabbit_model.py,sha256=h_j33hnsuxDM0mfExpmejO2pu5lIpTm1uIEirS2OXXI,11802
135
- autogluon/tabular/models/vowpalwabbit/vowpalwabbit_utils.py,sha256=jZ0STjvqwKw8jJDeoo5yAXTvgwFvY8Fsz6OqSif_JGI,3677
136
133
  autogluon/tabular/models/xgboost/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
137
134
  autogluon/tabular/models/xgboost/callbacks.py,sha256=uynimXya07XQMBkDvec-7mXK6OfMGP6M8MiVYu8OVRI,7008
138
135
  autogluon/tabular/models/xgboost/xgboost_model.py,sha256=tTSnTzEot2JB0qEvhki4h3RdaLjEpfMs-jWKsxlJWO4,14304
@@ -146,7 +143,7 @@ autogluon/tabular/predictor/__init__.py,sha256=zCMgjxQlWpDWnr1l1xjBCiK3rWC3N3RoD
146
143
  autogluon/tabular/predictor/interpretable_predictor.py,sha256=5UeKgnMFsfY65tiO3kxfHBPr03lyswLrgdtjPhI0Y7Q,6934
147
144
  autogluon/tabular/predictor/predictor.py,sha256=jOkpypHAPrL2nsI4iypVkZV90TpMORK-G_Ixr3Kw3XQ,357182
148
145
  autogluon/tabular/register/__init__.py,sha256=7CLOTWIUho0wi4eAwhYJ5Y0PfvNCWKnRwlw3bwYoTNE,93
149
- autogluon/tabular/register/_ag_model_register.py,sha256=tMr-QgxgCE49tdThdSFOZaJg2D9ckDh6fiR5K4cRtvk,1564
146
+ autogluon/tabular/register/_ag_model_register.py,sha256=JNnmL6cwL_zvObRmyuRbwYsCxDT-qrVF5PY8dzJ5U9k,1518
150
147
  autogluon/tabular/register/_model_register.py,sha256=jqSg0d89dXAAcp-OT4II90ce994ByKMMzAYmpkyaRbI,6824
151
148
  autogluon/tabular/testing/__init__.py,sha256=XrEGLmMdmRT6QHNR13M9wna57LO4O3Q4tt27Ca8omAc,79
152
149
  autogluon/tabular/testing/fit_helper.py,sha256=gVHTdAsp_lSZ_qbwjXM7aA5fI32zHj3_zXwEXC9C_ds,19586
@@ -160,11 +157,11 @@ autogluon/tabular/trainer/model_presets/presets.py,sha256=bTPGPyz07a7GG6327yO6ry
160
157
  autogluon/tabular/trainer/model_presets/presets_distill.py,sha256=MnFC2GJc6RmDBNAGbsO2XMfo3PjR8cUrZoilWW8gTYQ,3295
161
158
  autogluon/tabular/tuning/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
162
159
  autogluon/tabular/tuning/feature_pruner.py,sha256=9iNku8gVbYEkjuKlyITPJDicsNkoraaQOlINQq9iZlQ,6877
163
- autogluon.tabular-1.2.1b20250310.dist-info/LICENSE,sha256=CeipvOyAZxBGUsFoaFqwkx54aPnIKEtm9a5u2uXxEws,10142
164
- autogluon.tabular-1.2.1b20250310.dist-info/METADATA,sha256=tlHCXr3m5eN2ruU75t6rOpAGgxq2lCt1Xt4UZRYilP4,14386
165
- autogluon.tabular-1.2.1b20250310.dist-info/NOTICE,sha256=7nPQuj8Kp-uXsU0S5so3-2dNU5EctS5hDXvvzzehd7E,114
166
- autogluon.tabular-1.2.1b20250310.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
167
- autogluon.tabular-1.2.1b20250310.dist-info/namespace_packages.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
168
- autogluon.tabular-1.2.1b20250310.dist-info/top_level.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
169
- autogluon.tabular-1.2.1b20250310.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
170
- autogluon.tabular-1.2.1b20250310.dist-info/RECORD,,
160
+ autogluon.tabular-1.2.1b20250312.dist-info/LICENSE,sha256=CeipvOyAZxBGUsFoaFqwkx54aPnIKEtm9a5u2uXxEws,10142
161
+ autogluon.tabular-1.2.1b20250312.dist-info/METADATA,sha256=er3--iygQUQAhMpOQ3Pd_WJnlXRKkGr7go0ueJRdfrA,14069
162
+ autogluon.tabular-1.2.1b20250312.dist-info/NOTICE,sha256=7nPQuj8Kp-uXsU0S5so3-2dNU5EctS5hDXvvzzehd7E,114
163
+ autogluon.tabular-1.2.1b20250312.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
164
+ autogluon.tabular-1.2.1b20250312.dist-info/namespace_packages.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
165
+ autogluon.tabular-1.2.1b20250312.dist-info/top_level.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
166
+ autogluon.tabular-1.2.1b20250312.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
167
+ autogluon.tabular-1.2.1b20250312.dist-info/RECORD,,
File without changes
@@ -1,286 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import logging
4
- import os
5
- import time
6
-
7
- import numpy as np
8
- import pandas as pd
9
-
10
- from autogluon.common.features.types import (
11
- R_CATEGORY,
12
- R_FLOAT,
13
- R_INT,
14
- R_OBJECT,
15
- S_IMAGE_PATH,
16
- S_TEXT_AS_CATEGORY,
17
- S_TEXT_NGRAM,
18
- S_TEXT_SPECIAL,
19
- )
20
- from autogluon.common.utils.try_import import try_import_vowpalwabbit
21
- from autogluon.core.constants import (
22
- BINARY,
23
- MULTICLASS,
24
- PROBLEM_TYPES_CLASSIFICATION,
25
- PROBLEM_TYPES_REGRESSION,
26
- REGRESSION,
27
- )
28
- from autogluon.core.models import AbstractModel
29
- from autogluon.core.utils.exceptions import TimeLimitExceeded
30
-
31
- from .vowpalwabbit_utils import VWFeaturesConverter
32
-
33
- logger = logging.getLogger(__name__)
34
-
35
-
36
- class VowpalWabbitModel(AbstractModel):
37
- """
38
- VowpalWabbit Model: https://vowpalwabbit.org/
39
-
40
- VowpalWabbit Command Line args: https://github.com/VowpalWabbit/vowpal_wabbit/wiki/Command-line-arguments
41
-
42
- """
43
- ag_key = "VW"
44
- ag_name = "VowpalWabbit"
45
- ag_priority = 10
46
-
47
- model_internals_file_name = "model-internals.pkl"
48
-
49
- # Ref: https://github.com/VowpalWabbit/vowpal_wabbit/wiki/Loss-functions
50
- CLASSIFICATION_LOSS_FUNCTIONS = ["logistic", "hinge"]
51
- REGRESSION_LOSS_FUNCTIONS = ["squared", "quantile", "poisson", "classic"]
52
-
53
- def __init__(self, **kwargs):
54
- super().__init__(**kwargs)
55
- self._load_model = None # Used for saving and loading internal model file
56
-
57
- # The `_preprocess` method takes the input data and transforms it to the internal representation usable by the model.
58
- # `_preprocess` is called by `preprocess` and is used during model fit and model inference.
59
- def _preprocess(self, X: pd.DataFrame, is_train=False, **kwargs) -> pd.Series:
60
- X = super()._preprocess(X, **kwargs)
61
- if is_train:
62
- self._features_converter = VWFeaturesConverter()
63
- self._feature_metadata_dict = self._feature_metadata.to_dict()
64
- # self._feature_metadata contains the information related to features metadata.
65
- X_series = self._features_converter.convert_features_to_vw_format(X, self._feature_metadata_dict)
66
- return X_series
67
-
68
- # The `_fit` method takes the input training data (and optionally the validation data) and trains the model.
69
- def _fit(
70
- self,
71
- X: pd.DataFrame,
72
- y: pd.Series,
73
- time_limit=None,
74
- verbosity=2,
75
- **kwargs, # training data # training labels
76
- ): # kwargs includes many other potential inputs, refer to AbstractModel documentation for details
77
- time_start = time.time()
78
- try_import_vowpalwabbit()
79
- import vowpalwabbit
80
-
81
- seed = 0 # Random seed
82
-
83
- # Valid self.problem_type values include ['binary', 'multiclass', 'regression', 'quantile', 'softclass']
84
- if self.problem_type not in PROBLEM_TYPES_REGRESSION + PROBLEM_TYPES_CLASSIFICATION:
85
- raise TypeError(f"Vowpal Wabbit does not support {self.problem_type}")
86
-
87
- # Certain parameters like passes are passed as hyperparameters but are not used
88
- # while initialising the model.
89
- # passes: Used as epochs
90
-
91
- params = self._get_model_params()
92
- params["loss_function"] = params.get("loss_function", self._get_default_loss_function())
93
- passes = params.pop("passes")
94
-
95
- # Make sure to call preprocess on X near the start of `_fit`.
96
- # This is necessary because the data is converted via preprocess during predict, and needs to be in the same format as during fit.
97
- X_series = self.preprocess(X, is_train=True)
98
-
99
- self._validate_loss_function(loss_function=params["loss_function"])
100
-
101
- # VW expects label from 1 to N for Binary and Multiclass classification problems
102
- # AutoGluon does label encoding from 0 to N-1, hence we increment the value of y by 1
103
- if self.problem_type != REGRESSION:
104
- y = y.apply(lambda row: row + 1)
105
- y = y.astype(str) + " "
106
-
107
- # Concatenate y and X to get the training data in VW format
108
- final_training_data = y + X_series
109
- final_training_data = final_training_data.tolist()
110
-
111
- extra_params = {
112
- "cache_file": "train.cache",
113
- "holdout_off": True,
114
- }
115
-
116
- if verbosity <= 3:
117
- extra_params["quiet"] = True
118
-
119
- # Initialize the model
120
- if self.problem_type in PROBLEM_TYPES_CLASSIFICATION:
121
- # Ref: https://github.com/VowpalWabbit/vowpal_wabbit/wiki/Predicting-probabilities#multi-class---oaa
122
- extra_params["oaa"] = self.num_classes
123
- extra_params["probabilities"] = True
124
- self.model = vowpalwabbit.Workspace(**params, **extra_params)
125
-
126
- time_start_fit = time.time()
127
- if time_limit is not None:
128
- time_limit_fit = time_limit - (time_start_fit - time_start) - 0.3 # Account for 0.3s overhead
129
- if time_limit_fit <= 0:
130
- raise TimeLimitExceeded
131
- else:
132
- time_limit_fit = None
133
-
134
- # Train the model
135
- np.random.seed(seed)
136
- epoch = 0
137
-
138
- for epoch in range(1, passes + 1):
139
- # TODO: Add Early Stopping support via validation
140
- self._train_single_epoch(training_data=final_training_data)
141
- if time_limit_fit is not None and epoch < passes:
142
- time_fit_used = time.time() - time_start_fit
143
- time_fit_used_per_epoch = time_fit_used / epoch
144
- time_left = time_limit_fit - time_fit_used
145
- if time_left <= (time_fit_used_per_epoch * 2):
146
- logger.log(30, f"\tEarly stopping due to lack of time. Fit {epoch}/{passes} passes...")
147
- break
148
-
149
- self.params_trained["passes"] = epoch
150
-
151
- def _train_single_epoch(self, training_data):
152
- row_order = np.arange(0, len(training_data))
153
- row_order = np.random.permutation(row_order)
154
- for row_i in row_order:
155
- row = training_data[row_i]
156
- self.model.learn(row)
157
-
158
- def _validate_loss_function(self, loss_function):
159
- # Ref: https://github.com/VowpalWabbit/vowpal_wabbit/wiki/Loss-functions
160
- if loss_function:
161
- if self.problem_type in PROBLEM_TYPES_CLASSIFICATION:
162
- assert loss_function in self.CLASSIFICATION_LOSS_FUNCTIONS, (
163
- f"For {self.problem_type} problem, VW supports: {self.CLASSIFICATION_LOSS_FUNCTIONS}. " f"Got loss_function:{loss_function}"
164
- )
165
- elif self.problem_type in PROBLEM_TYPES_REGRESSION:
166
- assert loss_function in self.REGRESSION_LOSS_FUNCTIONS, (
167
- f"For {self.problem_type} problem, VW supports: {self.REGRESSION_LOSS_FUNCTIONS}. " f"Got loss_function:{loss_function}"
168
- )
169
-
170
- def _get_default_loss_function(self) -> str:
171
- # Ref: https://github.com/VowpalWabbit/vowpal_wabbit/wiki/Loss-functions
172
- if self.problem_type in PROBLEM_TYPES_CLASSIFICATION:
173
- return "logistic"
174
- else:
175
- return "squared"
176
-
177
- def save(self, path: str = None, verbose=True) -> str:
178
- """
179
- AutoGluon by default saves the complete Abstract Model in a pickle file format.
180
- This includes the internal self.model which is the actual model.
181
- However, saving VW model in pickle is not possible.
182
- Hence, we dump the Abstract Model by setting setting self.model as None
183
- and save self.model as a separate internal file using that model's saving mechanism
184
-
185
- :param path: path where model is to be saved
186
- :param verbose: verbosity
187
- :return: path where model is saved
188
- """
189
-
190
- self._load_model = self.model is not None
191
- __model = self.model
192
- self.model = None
193
- path = super().save(path=path, verbose=verbose)
194
- self.model = __model
195
- # Export model
196
- if self._load_model:
197
- file_path = os.path.join(path, self.model_internals_file_name)
198
- self.model.save(file_path)
199
- self._load_model = None
200
- return path
201
-
202
- @classmethod
203
- def load(cls, path: str, reset_paths=True, verbose=True):
204
- """
205
- There are two files which needs to be loaded.
206
- First is the Abstract Model pickle dump and second is the internal model file.
207
- For VW, based on different problem_type/hyperparams, loading arguments will be different
208
- """
209
- try_import_vowpalwabbit()
210
- import vowpalwabbit
211
-
212
- # Load Abstract Model. This is without the internal model
213
- model = super().load(path, reset_paths=reset_paths, verbose=verbose)
214
- params = model._get_model_params()
215
- # Load the internal model file
216
- if model._load_model:
217
- file_path = os.path.join(path, cls.model_internals_file_name)
218
-
219
- model_load_params = f" -i {file_path} --quiet"
220
- if model.problem_type in PROBLEM_TYPES_CLASSIFICATION:
221
- model_load_params += " --probabilities --loss_function=logistic"
222
- if params["sparse_weights"]:
223
- model_load_params += " --sparse_weights"
224
-
225
- model.model = vowpalwabbit.Workspace(model_load_params)
226
- model._load_model = None
227
- return model
228
-
229
- def _predict_proba(self, X, **kwargs):
230
- # Preprocess the set of X features
231
- X = self.preprocess(X, **kwargs)
232
-
233
- y_pred_proba = np.array([self.model.predict(row) for row in X])
234
- return self._convert_proba_to_unified_form(y_pred_proba)
235
-
236
- def _get_memory_size(self) -> int:
237
- # TODO: Can be improved further to make it more accurate
238
- # Returning 5MB as the value
239
- return int(5e6)
240
-
241
- # The `_set_default_params` method defines the default hyperparameters of the model.
242
- # User-specified parameters will override these values on a key-by-key basis.
243
- def _set_default_params(self):
244
- default_params = {
245
- "passes": 10, # TODO: Much better if 500+, revisit this if wanting to use VW to get strong results
246
- "bit_precision": 32,
247
- "ngram": 2,
248
- "skips": 1,
249
- "learning_rate": 1,
250
- "sparse_weights": True,
251
- }
252
- for param, val in default_params.items():
253
- self._set_default_param_value(param, val)
254
-
255
- # The `_get_default_auxiliary_params` method defines various model-agnostic parameters such as maximum memory usage and valid input column dtypes.
256
- # For most users who build custom models, they will only need to specify the valid/invalid dtypes to the model here.
257
- def _get_default_auxiliary_params(self) -> dict:
258
- default_auxiliary_params = super()._get_default_auxiliary_params()
259
- # Ignore the below mentioned special types. Only those features that are not of the below mentioned
260
- # type are passed to the model for training list are passed features
261
- extra_auxiliary_params = dict(
262
- valid_raw_types=[R_INT, R_FLOAT, R_CATEGORY, R_OBJECT], ignored_type_group_special=[S_IMAGE_PATH, S_TEXT_NGRAM, S_TEXT_AS_CATEGORY, S_TEXT_SPECIAL]
263
- )
264
- default_auxiliary_params.update(extra_auxiliary_params)
265
- return default_auxiliary_params
266
-
267
- @classmethod
268
- def _get_default_ag_args(cls) -> dict:
269
- default_ag_args = super()._get_default_ag_args()
270
- extra_ag_args = {
271
- "valid_stacker": False,
272
- }
273
- default_ag_args.update(extra_ag_args)
274
- return default_ag_args
275
-
276
- @classmethod
277
- def supported_problem_types(cls) -> list[str] | None:
278
- return ["binary", "multiclass", "regression"]
279
-
280
- def _more_tags(self):
281
- # `can_refit_full=True` because best epoch is communicated at end of `_fit`: `self.params_trained['passes'] = epoch`
282
- return {"can_refit_full": True}
283
-
284
- @classmethod
285
- def _class_tags(cls):
286
- return {"handles_text": True}
@@ -1,93 +0,0 @@
1
- import pandas as pd
2
-
3
- from autogluon.common.features.types import R_CATEGORY, R_FLOAT, R_INT, S_TEXT
4
-
5
-
6
- class VWFeaturesConverter:
7
- """
8
- Converts features in PandasDataFrame to VW format
9
- Ref: https://github.com/VowpalWabbit/vowpal_wabbit/wiki/Input-format
10
- """
11
-
12
- PIPE = "|"
13
- SPACE = " "
14
-
15
- # TODO: Add support for different namespaces
16
-
17
- def convert_features_to_vw_format(self, X, feature_metadata) -> pd.Series:
18
- """
19
- Converts features to VW format.
20
- :param X: features
21
- :param feature_metadata: schema of X
22
- :return: Returns a series of features converted to VW format
23
- """
24
-
25
- X_out: pd.Series = None
26
-
27
- for feature in feature_metadata:
28
- raw_feature, special_feature = feature_metadata[feature]
29
- if X_out is None:
30
- X_out = (
31
- self.PIPE
32
- + self.SPACE
33
- + self.__generate_namespace_based_on_ml_type(X[feature], raw_feature, special_feature, feature).astype("str")
34
- + self.SPACE
35
- )
36
- else:
37
- X_out += (
38
- "" + self.SPACE + self.__generate_namespace_based_on_ml_type(X[feature], raw_feature, special_feature, feature).astype("str") + self.SPACE
39
- )
40
- return X_out
41
-
42
- def __generate_namespace_based_on_ml_type(self, input_series, raw_feature, special_feature, feature_name=None):
43
- """
44
- Based on the type of feature, preprocess/sanify these features so that it is in VW format
45
- Only use raw text, numeric integer, numeric decimals, and category
46
- Ref: https://github.com/autogluon/autogluon/blob/master/common/src/autogluon/common/features/types.py
47
-
48
- :param input_series: A single feature as Pandas Series
49
- :param raw_feature: Raw feature Type
50
- :param special_feature: Special Feature Type
51
- :param feature_name: Column Name of this feature
52
- :return: Preprocessed Feature as a Pandas Series
53
- """
54
- if S_TEXT in special_feature:
55
- return input_series.apply(self.__preprocess_text)
56
- elif raw_feature in [R_INT, R_FLOAT]:
57
- return input_series.apply(self.__numeric_namespace_generator, args=(feature_name,))
58
- elif raw_feature == R_CATEGORY:
59
- return input_series.apply(self.__categorical_namespace_generator, args=(feature_name,))
60
- else:
61
- raise ValueError(
62
- f"Received unsupported raw_feature_type '{str(raw_feature)}' special_feature_type '{str(special_feature)}'"
63
- f" for feature '{feature_name}' for class {self.__class__.__name__}."
64
- )
65
-
66
- def __preprocess_text(self, s) -> str:
67
- if pd.isnull(s):
68
- return ""
69
- s = " ".join(str(s).split())
70
- # Added split to remove tabs spaces since tab is used as separator
71
- text = self.__sanify(s)
72
- return text
73
-
74
- def __sanify(self, s) -> str:
75
- """
76
- The sanify is performed because : and | are reserved by vowpal wabbit for distinguishing namespaces and numeric
77
- data
78
- @param s: input string
79
- @returns string
80
- """
81
- return str(s).replace(":", ";").replace("|", "/")
82
-
83
- def __numeric_namespace_generator(self, feature, feature_name) -> str:
84
- if pd.isnull(feature):
85
- return ""
86
- return feature_name + ":" + str(feature)
87
-
88
- def __categorical_namespace_generator(self, feature, feature_name) -> str:
89
- if pd.isnull(feature):
90
- return ""
91
- else:
92
- feature = str(feature).replace(" ", "_")
93
- return feature_name + "=" + self.__sanify(feature)