openstef 3.4.56__py3-none-any.whl → 4.0.0a3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openstef-4.0.0a3.dist-info/METADATA +177 -0
- openstef-4.0.0a3.dist-info/RECORD +4 -0
- {openstef-3.4.56.dist-info → openstef-4.0.0a3.dist-info}/WHEEL +1 -2
- openstef/__init__.py +0 -14
- openstef/__main__.py +0 -3
- openstef/app_settings.py +0 -19
- openstef/data/NL_terrestrial_radiation.csv +0 -25585
- openstef/data/NL_terrestrial_radiation.csv.license +0 -3
- openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z +0 -0
- openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z.license +0 -3
- openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md +0 -18
- openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md.license +0 -3
- openstef/data/dutch_holidays.csv +0 -1759
- openstef/data/dutch_holidays.csv.license +0 -3
- openstef/data/pv_single_coefs.csv +0 -601
- openstef/data/pv_single_coefs.csv.license +0 -3
- openstef/data_classes/__init__.py +0 -3
- openstef/data_classes/data_prep.py +0 -99
- openstef/data_classes/model_specifications.py +0 -30
- openstef/data_classes/prediction_job.py +0 -135
- openstef/data_classes/split_function.py +0 -97
- openstef/enums.py +0 -140
- openstef/exceptions.py +0 -74
- openstef/feature_engineering/__init__.py +0 -3
- openstef/feature_engineering/apply_features.py +0 -138
- openstef/feature_engineering/bidding_zone_to_country_mapping.py +0 -106
- openstef/feature_engineering/cyclic_features.py +0 -161
- openstef/feature_engineering/data_preparation.py +0 -152
- openstef/feature_engineering/feature_adder.py +0 -206
- openstef/feature_engineering/feature_applicator.py +0 -202
- openstef/feature_engineering/general.py +0 -141
- openstef/feature_engineering/holiday_features.py +0 -231
- openstef/feature_engineering/lag_features.py +0 -165
- openstef/feature_engineering/missing_values_transformer.py +0 -141
- openstef/feature_engineering/rolling_features.py +0 -58
- openstef/feature_engineering/weather_features.py +0 -492
- openstef/metrics/__init__.py +0 -3
- openstef/metrics/figure.py +0 -303
- openstef/metrics/metrics.py +0 -486
- openstef/metrics/reporter.py +0 -222
- openstef/model/__init__.py +0 -3
- openstef/model/basecase.py +0 -82
- openstef/model/confidence_interval_applicator.py +0 -242
- openstef/model/fallback.py +0 -77
- openstef/model/metamodels/__init__.py +0 -3
- openstef/model/metamodels/feature_clipper.py +0 -90
- openstef/model/metamodels/grouped_regressor.py +0 -222
- openstef/model/metamodels/missing_values_handler.py +0 -138
- openstef/model/model_creator.py +0 -214
- openstef/model/objective.py +0 -426
- openstef/model/objective_creator.py +0 -65
- openstef/model/regressors/__init__.py +0 -3
- openstef/model/regressors/arima.py +0 -197
- openstef/model/regressors/custom_regressor.py +0 -64
- openstef/model/regressors/dazls.py +0 -116
- openstef/model/regressors/flatliner.py +0 -95
- openstef/model/regressors/gblinear_quantile.py +0 -334
- openstef/model/regressors/lgbm.py +0 -29
- openstef/model/regressors/linear.py +0 -90
- openstef/model/regressors/linear_quantile.py +0 -305
- openstef/model/regressors/regressor.py +0 -114
- openstef/model/regressors/xgb.py +0 -52
- openstef/model/regressors/xgb_multioutput_quantile.py +0 -261
- openstef/model/regressors/xgb_quantile.py +0 -228
- openstef/model/serializer.py +0 -431
- openstef/model/standard_deviation_generator.py +0 -81
- openstef/model_selection/__init__.py +0 -3
- openstef/model_selection/model_selection.py +0 -311
- openstef/monitoring/__init__.py +0 -3
- openstef/monitoring/performance_meter.py +0 -92
- openstef/monitoring/teams.py +0 -203
- openstef/pipeline/__init__.py +0 -3
- openstef/pipeline/create_basecase_forecast.py +0 -133
- openstef/pipeline/create_component_forecast.py +0 -168
- openstef/pipeline/create_forecast.py +0 -171
- openstef/pipeline/optimize_hyperparameters.py +0 -317
- openstef/pipeline/train_create_forecast_backtest.py +0 -163
- openstef/pipeline/train_model.py +0 -561
- openstef/pipeline/utils.py +0 -52
- openstef/postprocessing/__init__.py +0 -3
- openstef/postprocessing/postprocessing.py +0 -275
- openstef/preprocessing/__init__.py +0 -3
- openstef/preprocessing/preprocessing.py +0 -42
- openstef/settings.py +0 -15
- openstef/tasks/__init__.py +0 -3
- openstef/tasks/calculate_kpi.py +0 -324
- openstef/tasks/create_basecase_forecast.py +0 -118
- openstef/tasks/create_components_forecast.py +0 -162
- openstef/tasks/create_forecast.py +0 -145
- openstef/tasks/create_solar_forecast.py +0 -420
- openstef/tasks/create_wind_forecast.py +0 -80
- openstef/tasks/optimize_hyperparameters.py +0 -135
- openstef/tasks/split_forecast.py +0 -273
- openstef/tasks/train_model.py +0 -224
- openstef/tasks/utils/__init__.py +0 -3
- openstef/tasks/utils/dependencies.py +0 -107
- openstef/tasks/utils/predictionjobloop.py +0 -243
- openstef/tasks/utils/taskcontext.py +0 -160
- openstef/validation/__init__.py +0 -3
- openstef/validation/validation.py +0 -322
- openstef-3.4.56.dist-info/METADATA +0 -154
- openstef-3.4.56.dist-info/RECORD +0 -102
- openstef-3.4.56.dist-info/top_level.txt +0 -1
- /openstef-3.4.56.dist-info/LICENSE → /openstef-4.0.0a3.dist-info/licenses/LICENSE.md +0 -0
|
@@ -1,222 +0,0 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: 2017-2021 Alliander N.V. <korte.termijn.prognoses@alliander.com> # noqa E501>
|
|
2
|
-
#
|
|
3
|
-
# SPDX-License-Identifier: MPL-2.0
|
|
4
|
-
"""This module defines the grouped regressor."""
|
|
5
|
-
from typing import Any, Callable, Union
|
|
6
|
-
|
|
7
|
-
import joblib
|
|
8
|
-
import numpy as np
|
|
9
|
-
import pandas as pd
|
|
10
|
-
from pandas.core.groupby.generic import DataFrameGroupBy
|
|
11
|
-
from sklearn.base import BaseEstimator, MetaEstimatorMixin, RegressorMixin, clone
|
|
12
|
-
from sklearn.utils.validation import check_is_fitted
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
class GroupedRegressor(BaseEstimator, RegressorMixin, MetaEstimatorMixin):
|
|
16
|
-
"""Meta-model that trains an instance of the base estimator for each key of a groupby operation applied on the data.
|
|
17
|
-
|
|
18
|
-
The base estimator is a sklearn regressor, the groupby is performed on the columns specified in parameters.
|
|
19
|
-
Moreover fit and predict methods can be performed in parallel for each group key thanks to joblib.
|
|
20
|
-
|
|
21
|
-
Example:
|
|
22
|
-
|
|
23
|
-
.. code-block:: md
|
|
24
|
-
|
|
25
|
-
data = | index | group | x0 | x1 | x3 | y |
|
|
26
|
-
| 0 | 1 | .. | .. | .. | . |
|
|
27
|
-
| 1 | 2 | .. | .. | .. | . |
|
|
28
|
-
| 2 | 1 | .. | .. | .. | . |
|
|
29
|
-
| 3 | 2 | .. | .. | .. | . |
|
|
30
|
-
|
|
31
|
-
[ X ][ Y ]
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
The GroupedRegressor on the data with the group_columns='group' fits 2 models:
|
|
35
|
-
- The model 1 with the row 0 and 2, columns x0, x1 and x3 as the features and column y as the target.
|
|
36
|
-
- The model 2 with the row 1 and 3, columns x0, x1 and x3 as the features and column y as the target.
|
|
37
|
-
|
|
38
|
-
Args:
|
|
39
|
-
base_estimator: Regressor .
|
|
40
|
-
|
|
41
|
-
group_columns: Name(s) of the column(s) used as the key for groupby operation.
|
|
42
|
-
|
|
43
|
-
n_jobs: default=1
|
|
44
|
-
The maximum number of concurrently running jobs,
|
|
45
|
-
such as the number of Python worker processes when backend=”multiprocessing”
|
|
46
|
-
or the size of the thread-pool when backend=”threading
|
|
47
|
-
|
|
48
|
-
Attributes:
|
|
49
|
-
feature_names_: All input feature (without group_columns).
|
|
50
|
-
|
|
51
|
-
estimators_:
|
|
52
|
-
Dictionnary that stocks fitted estimators for each group.
|
|
53
|
-
The keys are the keys of grouping and the values are the regressors fitted on the grouped data.
|
|
54
|
-
|
|
55
|
-
"""
|
|
56
|
-
|
|
57
|
-
def __init__(
|
|
58
|
-
self,
|
|
59
|
-
base_estimator: RegressorMixin,
|
|
60
|
-
group_columns: Union[str, int, list[str], list[int]],
|
|
61
|
-
n_jobs: int = 1,
|
|
62
|
-
):
|
|
63
|
-
"""Initialize meta model."""
|
|
64
|
-
self.base_estimator = base_estimator
|
|
65
|
-
if type(group_columns) in [int, str]:
|
|
66
|
-
self.group_columns = [group_columns]
|
|
67
|
-
else:
|
|
68
|
-
self.group_columns = group_columns
|
|
69
|
-
self.n_jobs = n_jobs
|
|
70
|
-
|
|
71
|
-
def _get_tags(self):
|
|
72
|
-
return self.base_estimator._get_tags()
|
|
73
|
-
|
|
74
|
-
def _check_group_columns(self, df):
|
|
75
|
-
if type(self.group_columns) is not list:
|
|
76
|
-
raise ValueError(
|
|
77
|
-
"The group columns parameter should be a list, it gets a {}".format(
|
|
78
|
-
type(self.group_columns)
|
|
79
|
-
)
|
|
80
|
-
)
|
|
81
|
-
for c in self.group_columns:
|
|
82
|
-
if c not in list(df.columns):
|
|
83
|
-
raise ValueError("The group column {} is missing!".format(c))
|
|
84
|
-
|
|
85
|
-
def _partial_fit(
|
|
86
|
-
self, group: Any, df_group: pd.DataFrame, eval_set=None, **kwargs
|
|
87
|
-
) -> tuple[Any, BaseEstimator]:
|
|
88
|
-
estimator = clone(self.base_estimator)
|
|
89
|
-
X = df_group.loc[:, self.feature_names_]
|
|
90
|
-
y = df_group.loc[:, "__target__"]
|
|
91
|
-
|
|
92
|
-
if eval_set is None:
|
|
93
|
-
estimator_fitted = estimator.fit(X, y, **kwargs)
|
|
94
|
-
else:
|
|
95
|
-
estimator_fitted = estimator.fit(
|
|
96
|
-
X,
|
|
97
|
-
y,
|
|
98
|
-
eval_set=[
|
|
99
|
-
(
|
|
100
|
-
df.loc[
|
|
101
|
-
(df[self.group_columns] == group).to_numpy().flatten(),
|
|
102
|
-
self.feature_names_,
|
|
103
|
-
],
|
|
104
|
-
df.loc[
|
|
105
|
-
(df[self.group_columns] == group).to_numpy().flatten(),
|
|
106
|
-
"__target__",
|
|
107
|
-
],
|
|
108
|
-
)
|
|
109
|
-
for df in eval_set
|
|
110
|
-
],
|
|
111
|
-
**kwargs
|
|
112
|
-
)
|
|
113
|
-
|
|
114
|
-
return (group, estimator_fitted)
|
|
115
|
-
|
|
116
|
-
def _partial_predict(self, group, df_group, **kwargs):
|
|
117
|
-
return self.estimators_[group].predict(df_group, **kwargs)
|
|
118
|
-
|
|
119
|
-
@classmethod
|
|
120
|
-
def grouped_compute(
|
|
121
|
-
cls,
|
|
122
|
-
df: pd.DataFrame,
|
|
123
|
-
group_columns: Union[list[str], list[int]],
|
|
124
|
-
func: Callable[[tuple, pd.DataFrame], np.array],
|
|
125
|
-
n_jobs: int = 1,
|
|
126
|
-
eval_set=None,
|
|
127
|
-
) -> tuple[tuple[np.array, ...], DataFrameGroupBy, pd.DataFrame]:
|
|
128
|
-
"""Computes the specified function on each group defined by the grouping columns.
|
|
129
|
-
|
|
130
|
-
It is an utility function used to perform fit and predict on each group.
|
|
131
|
-
The df_res is the final dataframe that aggregate the results for each
|
|
132
|
-
group. The group_res is a tuple where each field is corresponding to a results for a group. The gb is the
|
|
133
|
-
grouping object.
|
|
134
|
-
|
|
135
|
-
Args:
|
|
136
|
-
df: DataFrame containing the input data necessary for the computation .
|
|
137
|
-
group_columns: List of the columns used for the groupby operation
|
|
138
|
-
func: Function that take the group key and the conrresponding data of this group
|
|
139
|
-
and perform the computation on this group.
|
|
140
|
-
n_jobs: The maximum number of concurrently running jobs,
|
|
141
|
-
|
|
142
|
-
Returns:
|
|
143
|
-
The tuple of the results of each group, the grouping dataframe and the global dataframe of results.
|
|
144
|
-
|
|
145
|
-
"""
|
|
146
|
-
index_name = df.index.name or "index"
|
|
147
|
-
df_reset = df.reset_index()
|
|
148
|
-
|
|
149
|
-
df_res = pd.DataFrame(index=df_reset.index)
|
|
150
|
-
|
|
151
|
-
gb = df_reset.groupby(group_columns)
|
|
152
|
-
|
|
153
|
-
if n_jobs > 1:
|
|
154
|
-
# Preferred scaling is at cluster level (e.g. k8s/serverless) instead of process level
|
|
155
|
-
group_res = joblib.Parallel(n_jobs=n_jobs)(
|
|
156
|
-
joblib.delayed(func)(
|
|
157
|
-
group, df_group.set_index(index_name).drop(group_columns, axis=1)
|
|
158
|
-
)
|
|
159
|
-
for group, df_group in gb
|
|
160
|
-
)
|
|
161
|
-
else:
|
|
162
|
-
group_res = (
|
|
163
|
-
func(group, df_group.set_index(index_name).drop(group_columns, axis=1))
|
|
164
|
-
for group, df_group in gb
|
|
165
|
-
)
|
|
166
|
-
return group_res, gb, df_res
|
|
167
|
-
|
|
168
|
-
def _grouped_predict(self, df: pd.DataFrame, n_jobs: int = 1, **kwargs) -> np.array:
|
|
169
|
-
group_res, gb, df_res = self.grouped_compute(
|
|
170
|
-
df,
|
|
171
|
-
self.group_columns,
|
|
172
|
-
lambda group, df_group: self._partial_predict(group, df_group, **kwargs),
|
|
173
|
-
n_jobs,
|
|
174
|
-
)
|
|
175
|
-
|
|
176
|
-
for (group, group_index), result in zip(gb.groups.items(), group_res):
|
|
177
|
-
df_res.loc[group_index, "__result__"] = np.array(result)
|
|
178
|
-
|
|
179
|
-
return df_res["__result__"].to_numpy()
|
|
180
|
-
|
|
181
|
-
def _grouped_fit(
|
|
182
|
-
self, df: pd.DataFrame, n_jobs: int = 1, eval_set=None, **kwargs
|
|
183
|
-
) -> dict[Any, BaseEstimator]:
|
|
184
|
-
group_res, _, _ = self.grouped_compute(
|
|
185
|
-
df,
|
|
186
|
-
self.group_columns,
|
|
187
|
-
lambda group, df_group: self._partial_fit(
|
|
188
|
-
group, df_group, eval_set=eval_set, **kwargs
|
|
189
|
-
),
|
|
190
|
-
n_jobs,
|
|
191
|
-
)
|
|
192
|
-
return dict(group_res)
|
|
193
|
-
|
|
194
|
-
def fit(self, x: np.ndarray, y: np.ndarray, eval_set=None, **kwargs):
|
|
195
|
-
"""Fit the model."""
|
|
196
|
-
df = pd.DataFrame(x).copy(deep=True)
|
|
197
|
-
self._check_group_columns(df)
|
|
198
|
-
|
|
199
|
-
eval_df = None
|
|
200
|
-
if eval_set is not None:
|
|
201
|
-
eval_df = []
|
|
202
|
-
for x_set, y_set in eval_set:
|
|
203
|
-
self._check_group_columns(x_set)
|
|
204
|
-
df_set = pd.DataFrame(x_set).copy(deep=True)
|
|
205
|
-
df_set["__target__"] = y_set
|
|
206
|
-
eval_df.append(df_set)
|
|
207
|
-
|
|
208
|
-
self.feature_names_ = [
|
|
209
|
-
c for c in list(df.columns) if c not in self.group_columns
|
|
210
|
-
]
|
|
211
|
-
df.loc[:, "__target__"] = y
|
|
212
|
-
self.estimators_ = self._grouped_fit(
|
|
213
|
-
df, self.n_jobs, eval_set=eval_df, **kwargs
|
|
214
|
-
)
|
|
215
|
-
return self
|
|
216
|
-
|
|
217
|
-
def predict(self, x: np.ndarray, **kwargs) -> np.ndarray:
|
|
218
|
-
"""Make a predicion."""
|
|
219
|
-
check_is_fitted(self)
|
|
220
|
-
df = pd.DataFrame(x)
|
|
221
|
-
self._check_group_columns(df)
|
|
222
|
-
return self._grouped_predict(df, self.n_jobs, **kwargs)
|
|
@@ -1,138 +0,0 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: 2017-2023 Alliander N.V. <korte.termijn.prognoses@alliander.com> # noqa E501>
|
|
2
|
-
#
|
|
3
|
-
# SPDX-License-Identifier: MPL-2.0
|
|
4
|
-
"""This module defines the missing value handler."""
|
|
5
|
-
from typing import Union
|
|
6
|
-
|
|
7
|
-
import numpy as np
|
|
8
|
-
import pandas as pd
|
|
9
|
-
from sklearn.base import BaseEstimator, MetaEstimatorMixin, RegressorMixin, clone
|
|
10
|
-
from sklearn.impute import SimpleImputer
|
|
11
|
-
from sklearn.pipeline import Pipeline
|
|
12
|
-
from sklearn.preprocessing import FunctionTransformer
|
|
13
|
-
from sklearn.utils.validation import check_array, check_is_fitted, check_X_y
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
class MissingValuesHandler(BaseEstimator, RegressorMixin, MetaEstimatorMixin):
|
|
17
|
-
"""Class for a meta-models that handles missing values and removes columns filled exclusively by NaN.
|
|
18
|
-
|
|
19
|
-
It's a pipeline of:
|
|
20
|
-
|
|
21
|
-
- An Imputation transformer for completing missing values.
|
|
22
|
-
- A Regressor fitted on the filled data.
|
|
23
|
-
|
|
24
|
-
Args:
|
|
25
|
-
base_estimator: Regressor used in the pipeline.
|
|
26
|
-
missing_values: The placeholder for the missing values. All occurrences of
|
|
27
|
-
`missing_values` will be imputed. For pandas' dataframes with
|
|
28
|
-
nullable integer dtypes with missing values, `missing_values`
|
|
29
|
-
should be set to `np.nan`, since `pd.NA` will be converted to `np.nan`.
|
|
30
|
-
imputation_strategy: The imputation strategy.
|
|
31
|
-
- If None no imputation is performed.
|
|
32
|
-
- If "mean", then replace missing values using the mean along
|
|
33
|
-
each column. Can only be used with numeric data.
|
|
34
|
-
- If "median", then replace missing values using the median along
|
|
35
|
-
each column. Can only be used with numeric data.
|
|
36
|
-
- If "most_frequent", then replace missing using the most frequent
|
|
37
|
-
value along each column. Can be used with strings or numeric data.
|
|
38
|
-
If there is more than one such value, only the smallest is returned.
|
|
39
|
-
- If "constant", then replace missing values with fill_value. Can be
|
|
40
|
-
used with strings or numeric data.
|
|
41
|
-
fill_value: When strategy == "constant", fill_value is used to replace all
|
|
42
|
-
occurrences of missing_values.
|
|
43
|
-
If left to the default, fill_value will be 0 when imputing numerical
|
|
44
|
-
data and "missing_value" for strings or object data types.
|
|
45
|
-
|
|
46
|
-
Attributes:
|
|
47
|
-
feature_names:
|
|
48
|
-
All input feature.
|
|
49
|
-
|
|
50
|
-
non_null_columns_:
|
|
51
|
-
Valid features used by the regressor.
|
|
52
|
-
|
|
53
|
-
n_features_in_:
|
|
54
|
-
Number of input features.
|
|
55
|
-
|
|
56
|
-
regressor_: RegressorMixin
|
|
57
|
-
Regressor fitted on valid columns.
|
|
58
|
-
|
|
59
|
-
imputer_: SimpleImputer
|
|
60
|
-
Imputer for missig value fitted on valid columns.
|
|
61
|
-
|
|
62
|
-
pipeline_: Pipeline
|
|
63
|
-
Pipeline that chains the imputer and the regressor.
|
|
64
|
-
|
|
65
|
-
feature_importances_: ndarray (n_features_in_, )
|
|
66
|
-
The feature importances from the regressor for valid features and zero otherwise.
|
|
67
|
-
|
|
68
|
-
"""
|
|
69
|
-
|
|
70
|
-
def __init__(
|
|
71
|
-
self,
|
|
72
|
-
base_estimator: RegressorMixin,
|
|
73
|
-
missing_values: Union[int, float, str, None] = np.nan,
|
|
74
|
-
imputation_strategy: str = None,
|
|
75
|
-
fill_value: Union[str, int, float] = None,
|
|
76
|
-
):
|
|
77
|
-
"""Initialize missing values handler."""
|
|
78
|
-
self.base_estimator = base_estimator
|
|
79
|
-
self.missing_values = missing_values
|
|
80
|
-
self.imputation_strategy = imputation_strategy
|
|
81
|
-
self.fill_value = fill_value
|
|
82
|
-
|
|
83
|
-
def _get_tags(self):
|
|
84
|
-
tags = self.base_estimator._get_tags()
|
|
85
|
-
tags["requires_y"] = True
|
|
86
|
-
tags["multioutput"] = False
|
|
87
|
-
tags["allow_nan"] = self.imputation_strategy is not None
|
|
88
|
-
return tags
|
|
89
|
-
|
|
90
|
-
def fit(self, x, y):
|
|
91
|
-
"""Fit model."""
|
|
92
|
-
_, y = check_X_y(x, y, force_all_finite="allow-nan", y_numeric=True)
|
|
93
|
-
if type(x) != pd.DataFrame:
|
|
94
|
-
x = pd.DataFrame(np.asarray(x))
|
|
95
|
-
self.feature_in_names_ = list(x.columns)
|
|
96
|
-
self.n_features_in_ = x.shape[1]
|
|
97
|
-
|
|
98
|
-
# Remove always null columns
|
|
99
|
-
is_column_null = x.isnull().all(axis="index")
|
|
100
|
-
self.non_null_columns_ = list(x.columns[~is_column_null])
|
|
101
|
-
|
|
102
|
-
self.regressor_ = clone(self.base_estimator)
|
|
103
|
-
|
|
104
|
-
# Build the proper imputation transformer
|
|
105
|
-
# - Identity function if strategy is None
|
|
106
|
-
# - SimpleImputer with the dedicated strategy
|
|
107
|
-
if self.imputation_strategy is None:
|
|
108
|
-
self.imputer_ = FunctionTransformer(func=self._identity)
|
|
109
|
-
else:
|
|
110
|
-
self.imputer_ = SimpleImputer(
|
|
111
|
-
missing_values=self.missing_values,
|
|
112
|
-
strategy=self.imputation_strategy,
|
|
113
|
-
fill_value=self.fill_value,
|
|
114
|
-
)
|
|
115
|
-
|
|
116
|
-
self.pipeline_ = Pipeline(
|
|
117
|
-
[("imputer", self.imputer_), ("regressor", self.regressor_)]
|
|
118
|
-
)
|
|
119
|
-
|
|
120
|
-
# Fit only on non_null_columns
|
|
121
|
-
self.pipeline_.fit(x[self.non_null_columns_], y)
|
|
122
|
-
|
|
123
|
-
return self
|
|
124
|
-
|
|
125
|
-
@classmethod
|
|
126
|
-
def _identity(cls, x):
|
|
127
|
-
return x
|
|
128
|
-
|
|
129
|
-
def predict(self, x):
|
|
130
|
-
"""Make a prediction."""
|
|
131
|
-
check_is_fitted(self)
|
|
132
|
-
check_array(
|
|
133
|
-
x,
|
|
134
|
-
force_all_finite="allow-nan",
|
|
135
|
-
)
|
|
136
|
-
if type(x) != pd.DataFrame:
|
|
137
|
-
x = pd.DataFrame(np.array(x))
|
|
138
|
-
return self.pipeline_.predict(x[self.non_null_columns_])
|
openstef/model/model_creator.py
DELETED
|
@@ -1,214 +0,0 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
|
|
2
|
-
#
|
|
3
|
-
# SPDX-License-Identifier: MPL-2.0
|
|
4
|
-
import logging
|
|
5
|
-
from typing import Union
|
|
6
|
-
|
|
7
|
-
import structlog
|
|
8
|
-
|
|
9
|
-
from openstef.enums import ModelType
|
|
10
|
-
from openstef.model.regressors.arima import ARIMAOpenstfRegressor
|
|
11
|
-
from openstef.model.regressors.custom_regressor import is_custom_type, load_custom_model
|
|
12
|
-
from openstef.model.regressors.gblinear_quantile import GBLinearQuantileOpenstfRegressor
|
|
13
|
-
from openstef.model.regressors.lgbm import LGBMOpenstfRegressor
|
|
14
|
-
from openstef.model.regressors.linear import LinearOpenstfRegressor
|
|
15
|
-
from openstef.model.regressors.linear_quantile import LinearQuantileOpenstfRegressor
|
|
16
|
-
from openstef.model.regressors.regressor import OpenstfRegressor
|
|
17
|
-
from openstef.model.regressors.flatliner import FlatlinerRegressor
|
|
18
|
-
from openstef.model.regressors.xgb import XGBOpenstfRegressor
|
|
19
|
-
from openstef.model.regressors.xgb_quantile import XGBQuantileOpenstfRegressor
|
|
20
|
-
from openstef.model.regressors.xgb_multioutput_quantile import (
|
|
21
|
-
XGBMultiOutputQuantileOpenstfRegressor,
|
|
22
|
-
)
|
|
23
|
-
from openstef.settings import Settings
|
|
24
|
-
|
|
25
|
-
structlog.configure(
|
|
26
|
-
wrapper_class=structlog.make_filtering_bound_logger(
|
|
27
|
-
logging.getLevelName(Settings.log_level)
|
|
28
|
-
)
|
|
29
|
-
)
|
|
30
|
-
logger = structlog.get_logger(__name__)
|
|
31
|
-
|
|
32
|
-
valid_model_kwargs = {
|
|
33
|
-
ModelType.XGB: [
|
|
34
|
-
"n_estimators",
|
|
35
|
-
"objective",
|
|
36
|
-
"max_depth",
|
|
37
|
-
"learning_rate",
|
|
38
|
-
"verbosity",
|
|
39
|
-
"booster",
|
|
40
|
-
"tree_method",
|
|
41
|
-
"gamma",
|
|
42
|
-
"min_child_weight",
|
|
43
|
-
"max_delta_step",
|
|
44
|
-
"subsample",
|
|
45
|
-
"colsample_bytree",
|
|
46
|
-
"colsample_bylevel",
|
|
47
|
-
"colsample_bynode",
|
|
48
|
-
"reg_alpha",
|
|
49
|
-
"reg_lambda",
|
|
50
|
-
"scale_pos_weight",
|
|
51
|
-
"base_score",
|
|
52
|
-
"missing",
|
|
53
|
-
"num_parallel_tree",
|
|
54
|
-
"kwargs",
|
|
55
|
-
"random_state",
|
|
56
|
-
"n_jobs",
|
|
57
|
-
"monotone_constraints",
|
|
58
|
-
"interaction_constraints",
|
|
59
|
-
"importance_type",
|
|
60
|
-
"gpu_id",
|
|
61
|
-
"validate_parameters",
|
|
62
|
-
"early_stopping_rounds",
|
|
63
|
-
],
|
|
64
|
-
ModelType.LGB: [
|
|
65
|
-
"boosting_type",
|
|
66
|
-
"objective",
|
|
67
|
-
"num_leaves",
|
|
68
|
-
"max_depth",
|
|
69
|
-
"learning_rate",
|
|
70
|
-
"n_estimators",
|
|
71
|
-
"subsample_for_bin",
|
|
72
|
-
"min_split_gain",
|
|
73
|
-
"min_child_weight",
|
|
74
|
-
"min_child_samples",
|
|
75
|
-
"subsample",
|
|
76
|
-
"subsample_freq",
|
|
77
|
-
"colsample_bytree",
|
|
78
|
-
"reg_alpha",
|
|
79
|
-
"reg_lambda",
|
|
80
|
-
"random_state",
|
|
81
|
-
"n_jobs",
|
|
82
|
-
"silent",
|
|
83
|
-
"importance_type",
|
|
84
|
-
"early_stopping_rounds",
|
|
85
|
-
],
|
|
86
|
-
ModelType.XGB_QUANTILE: [
|
|
87
|
-
"quantiles",
|
|
88
|
-
"gamma",
|
|
89
|
-
"colsample_bytree",
|
|
90
|
-
"subsample",
|
|
91
|
-
"min_child_weight",
|
|
92
|
-
"max_depth",
|
|
93
|
-
"early_stopping_rounds",
|
|
94
|
-
],
|
|
95
|
-
ModelType.XGB_MULTIOUTPUT_QUANTILE: [
|
|
96
|
-
"quantiles",
|
|
97
|
-
"gamma",
|
|
98
|
-
"colsample_bytree",
|
|
99
|
-
"subsample",
|
|
100
|
-
"min_child_weight",
|
|
101
|
-
"max_depth",
|
|
102
|
-
"early_stopping_rounds",
|
|
103
|
-
"arctan_smoothing",
|
|
104
|
-
],
|
|
105
|
-
ModelType.LINEAR: [
|
|
106
|
-
"missing_values",
|
|
107
|
-
"imputation_strategy",
|
|
108
|
-
"fill_value",
|
|
109
|
-
],
|
|
110
|
-
ModelType.FLATLINER: [
|
|
111
|
-
"quantiles",
|
|
112
|
-
],
|
|
113
|
-
ModelType.LINEAR_QUANTILE: [
|
|
114
|
-
"alpha",
|
|
115
|
-
"quantiles",
|
|
116
|
-
"solver",
|
|
117
|
-
"missing_values",
|
|
118
|
-
"imputation_strategy",
|
|
119
|
-
"fill_value",
|
|
120
|
-
"weight_scale_percentile",
|
|
121
|
-
"weight_exponent",
|
|
122
|
-
"weight_floor",
|
|
123
|
-
"no_fill_future_values_features",
|
|
124
|
-
],
|
|
125
|
-
ModelType.GBLINEAR_QUANTILE: [
|
|
126
|
-
"quantiles",
|
|
127
|
-
"missing_values",
|
|
128
|
-
"imputation_strategy",
|
|
129
|
-
"fill_value",
|
|
130
|
-
"weight_scale_percentile",
|
|
131
|
-
"weight_exponent",
|
|
132
|
-
"weight_floor",
|
|
133
|
-
"no_fill_future_values_features",
|
|
134
|
-
"clipped_features",
|
|
135
|
-
"learning_rate",
|
|
136
|
-
"num_boost_round",
|
|
137
|
-
"early_stopping_rounds",
|
|
138
|
-
"reg_alpha",
|
|
139
|
-
"reg_lambda",
|
|
140
|
-
"updater",
|
|
141
|
-
"feature_selector",
|
|
142
|
-
"top_k",
|
|
143
|
-
],
|
|
144
|
-
ModelType.ARIMA: [
|
|
145
|
-
"backtest_max_horizon",
|
|
146
|
-
"order",
|
|
147
|
-
"seasonal_order",
|
|
148
|
-
"trend",
|
|
149
|
-
],
|
|
150
|
-
}
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
class ModelCreator:
|
|
154
|
-
"""Factory object for creating machine learning models."""
|
|
155
|
-
|
|
156
|
-
# Set object mapping
|
|
157
|
-
MODEL_CONSTRUCTORS = {
|
|
158
|
-
ModelType.XGB: XGBOpenstfRegressor,
|
|
159
|
-
ModelType.LGB: LGBMOpenstfRegressor,
|
|
160
|
-
ModelType.XGB_QUANTILE: XGBQuantileOpenstfRegressor,
|
|
161
|
-
ModelType.XGB_MULTIOUTPUT_QUANTILE: XGBMultiOutputQuantileOpenstfRegressor,
|
|
162
|
-
ModelType.LINEAR: LinearOpenstfRegressor,
|
|
163
|
-
ModelType.LINEAR_QUANTILE: LinearQuantileOpenstfRegressor,
|
|
164
|
-
ModelType.GBLINEAR_QUANTILE: GBLinearQuantileOpenstfRegressor,
|
|
165
|
-
ModelType.ARIMA: ARIMAOpenstfRegressor,
|
|
166
|
-
ModelType.FLATLINER: FlatlinerRegressor,
|
|
167
|
-
}
|
|
168
|
-
|
|
169
|
-
@staticmethod
|
|
170
|
-
def create_model(model_type: Union[ModelType, str], **kwargs) -> OpenstfRegressor:
|
|
171
|
-
"""Create a machine learning model based on model type.
|
|
172
|
-
|
|
173
|
-
Args:
|
|
174
|
-
model_type: Model type to construct.
|
|
175
|
-
kwargs: Optional keyword argument to pass to the model.
|
|
176
|
-
|
|
177
|
-
Raises:
|
|
178
|
-
NotImplementedError: When using an invalid model_type.
|
|
179
|
-
|
|
180
|
-
Returns:
|
|
181
|
-
OpenSTEF model
|
|
182
|
-
|
|
183
|
-
"""
|
|
184
|
-
try:
|
|
185
|
-
# This will raise a ValueError when an invalid model_type str is used
|
|
186
|
-
# and nothing when a MLModelType enum is used.
|
|
187
|
-
if is_custom_type(model_type):
|
|
188
|
-
model_class = load_custom_model(model_type)
|
|
189
|
-
valid_kwargs = model_class.valid_kwargs()
|
|
190
|
-
else:
|
|
191
|
-
model_type = ModelType(model_type)
|
|
192
|
-
model_class = ModelCreator.MODEL_CONSTRUCTORS[model_type]
|
|
193
|
-
valid_kwargs = valid_model_kwargs[model_type]
|
|
194
|
-
# Check if model as imported
|
|
195
|
-
if model_class is None:
|
|
196
|
-
raise ImportError(
|
|
197
|
-
f"Constructor not available for '{model_type}'. "
|
|
198
|
-
"Perhaps you forgot to install an optional dependency? "
|
|
199
|
-
"Please refer to the ReadMe for instructions"
|
|
200
|
-
)
|
|
201
|
-
except ValueError as e:
|
|
202
|
-
valid_types = [t.value for t in ModelType]
|
|
203
|
-
raise NotImplementedError(
|
|
204
|
-
f"No constructor for '{model_type}', "
|
|
205
|
-
f"valid model_types are: {valid_types} "
|
|
206
|
-
"or import a custom model"
|
|
207
|
-
) from e
|
|
208
|
-
|
|
209
|
-
# only pass relevant arguments to model constructor to prevent warnings
|
|
210
|
-
model_kwargs = {
|
|
211
|
-
key: value for key, value in kwargs.items() if key in valid_kwargs
|
|
212
|
-
}
|
|
213
|
-
|
|
214
|
-
return model_class(**model_kwargs)
|