openstef 3.4.56__py3-none-any.whl → 4.0.0a3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. openstef-4.0.0a3.dist-info/METADATA +177 -0
  2. openstef-4.0.0a3.dist-info/RECORD +4 -0
  3. {openstef-3.4.56.dist-info → openstef-4.0.0a3.dist-info}/WHEEL +1 -2
  4. openstef/__init__.py +0 -14
  5. openstef/__main__.py +0 -3
  6. openstef/app_settings.py +0 -19
  7. openstef/data/NL_terrestrial_radiation.csv +0 -25585
  8. openstef/data/NL_terrestrial_radiation.csv.license +0 -3
  9. openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z +0 -0
  10. openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z.license +0 -3
  11. openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md +0 -18
  12. openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md.license +0 -3
  13. openstef/data/dutch_holidays.csv +0 -1759
  14. openstef/data/dutch_holidays.csv.license +0 -3
  15. openstef/data/pv_single_coefs.csv +0 -601
  16. openstef/data/pv_single_coefs.csv.license +0 -3
  17. openstef/data_classes/__init__.py +0 -3
  18. openstef/data_classes/data_prep.py +0 -99
  19. openstef/data_classes/model_specifications.py +0 -30
  20. openstef/data_classes/prediction_job.py +0 -135
  21. openstef/data_classes/split_function.py +0 -97
  22. openstef/enums.py +0 -140
  23. openstef/exceptions.py +0 -74
  24. openstef/feature_engineering/__init__.py +0 -3
  25. openstef/feature_engineering/apply_features.py +0 -138
  26. openstef/feature_engineering/bidding_zone_to_country_mapping.py +0 -106
  27. openstef/feature_engineering/cyclic_features.py +0 -161
  28. openstef/feature_engineering/data_preparation.py +0 -152
  29. openstef/feature_engineering/feature_adder.py +0 -206
  30. openstef/feature_engineering/feature_applicator.py +0 -202
  31. openstef/feature_engineering/general.py +0 -141
  32. openstef/feature_engineering/holiday_features.py +0 -231
  33. openstef/feature_engineering/lag_features.py +0 -165
  34. openstef/feature_engineering/missing_values_transformer.py +0 -141
  35. openstef/feature_engineering/rolling_features.py +0 -58
  36. openstef/feature_engineering/weather_features.py +0 -492
  37. openstef/metrics/__init__.py +0 -3
  38. openstef/metrics/figure.py +0 -303
  39. openstef/metrics/metrics.py +0 -486
  40. openstef/metrics/reporter.py +0 -222
  41. openstef/model/__init__.py +0 -3
  42. openstef/model/basecase.py +0 -82
  43. openstef/model/confidence_interval_applicator.py +0 -242
  44. openstef/model/fallback.py +0 -77
  45. openstef/model/metamodels/__init__.py +0 -3
  46. openstef/model/metamodels/feature_clipper.py +0 -90
  47. openstef/model/metamodels/grouped_regressor.py +0 -222
  48. openstef/model/metamodels/missing_values_handler.py +0 -138
  49. openstef/model/model_creator.py +0 -214
  50. openstef/model/objective.py +0 -426
  51. openstef/model/objective_creator.py +0 -65
  52. openstef/model/regressors/__init__.py +0 -3
  53. openstef/model/regressors/arima.py +0 -197
  54. openstef/model/regressors/custom_regressor.py +0 -64
  55. openstef/model/regressors/dazls.py +0 -116
  56. openstef/model/regressors/flatliner.py +0 -95
  57. openstef/model/regressors/gblinear_quantile.py +0 -334
  58. openstef/model/regressors/lgbm.py +0 -29
  59. openstef/model/regressors/linear.py +0 -90
  60. openstef/model/regressors/linear_quantile.py +0 -305
  61. openstef/model/regressors/regressor.py +0 -114
  62. openstef/model/regressors/xgb.py +0 -52
  63. openstef/model/regressors/xgb_multioutput_quantile.py +0 -261
  64. openstef/model/regressors/xgb_quantile.py +0 -228
  65. openstef/model/serializer.py +0 -431
  66. openstef/model/standard_deviation_generator.py +0 -81
  67. openstef/model_selection/__init__.py +0 -3
  68. openstef/model_selection/model_selection.py +0 -311
  69. openstef/monitoring/__init__.py +0 -3
  70. openstef/monitoring/performance_meter.py +0 -92
  71. openstef/monitoring/teams.py +0 -203
  72. openstef/pipeline/__init__.py +0 -3
  73. openstef/pipeline/create_basecase_forecast.py +0 -133
  74. openstef/pipeline/create_component_forecast.py +0 -168
  75. openstef/pipeline/create_forecast.py +0 -171
  76. openstef/pipeline/optimize_hyperparameters.py +0 -317
  77. openstef/pipeline/train_create_forecast_backtest.py +0 -163
  78. openstef/pipeline/train_model.py +0 -561
  79. openstef/pipeline/utils.py +0 -52
  80. openstef/postprocessing/__init__.py +0 -3
  81. openstef/postprocessing/postprocessing.py +0 -275
  82. openstef/preprocessing/__init__.py +0 -3
  83. openstef/preprocessing/preprocessing.py +0 -42
  84. openstef/settings.py +0 -15
  85. openstef/tasks/__init__.py +0 -3
  86. openstef/tasks/calculate_kpi.py +0 -324
  87. openstef/tasks/create_basecase_forecast.py +0 -118
  88. openstef/tasks/create_components_forecast.py +0 -162
  89. openstef/tasks/create_forecast.py +0 -145
  90. openstef/tasks/create_solar_forecast.py +0 -420
  91. openstef/tasks/create_wind_forecast.py +0 -80
  92. openstef/tasks/optimize_hyperparameters.py +0 -135
  93. openstef/tasks/split_forecast.py +0 -273
  94. openstef/tasks/train_model.py +0 -224
  95. openstef/tasks/utils/__init__.py +0 -3
  96. openstef/tasks/utils/dependencies.py +0 -107
  97. openstef/tasks/utils/predictionjobloop.py +0 -243
  98. openstef/tasks/utils/taskcontext.py +0 -160
  99. openstef/validation/__init__.py +0 -3
  100. openstef/validation/validation.py +0 -322
  101. openstef-3.4.56.dist-info/METADATA +0 -154
  102. openstef-3.4.56.dist-info/RECORD +0 -102
  103. openstef-3.4.56.dist-info/top_level.txt +0 -1
  104. /openstef-3.4.56.dist-info/LICENSE → /openstef-4.0.0a3.dist-info/licenses/LICENSE.md +0 -0
@@ -1,222 +0,0 @@
1
- # SPDX-FileCopyrightText: 2017-2021 Alliander N.V. <korte.termijn.prognoses@alliander.com> # noqa E501>
2
- #
3
- # SPDX-License-Identifier: MPL-2.0
4
- """This module defines the grouped regressor."""
5
- from typing import Any, Callable, Union
6
-
7
- import joblib
8
- import numpy as np
9
- import pandas as pd
10
- from pandas.core.groupby.generic import DataFrameGroupBy
11
- from sklearn.base import BaseEstimator, MetaEstimatorMixin, RegressorMixin, clone
12
- from sklearn.utils.validation import check_is_fitted
13
-
14
-
15
- class GroupedRegressor(BaseEstimator, RegressorMixin, MetaEstimatorMixin):
16
- """Meta-model that trains an instance of the base estimator for each key of a groupby operation applied on the data.
17
-
18
- The base estimator is a sklearn regressor, the groupby is performed on the columns specified in parameters.
19
- Moreover fit and predict methods can be performed in parallel for each group key thanks to joblib.
20
-
21
- Example:
22
-
23
- .. code-block:: md
24
-
25
- data = | index | group | x0 | x1 | x3 | y |
26
- | 0 | 1 | .. | .. | .. | . |
27
- | 1 | 2 | .. | .. | .. | . |
28
- | 2 | 1 | .. | .. | .. | . |
29
- | 3 | 2 | .. | .. | .. | . |
30
-
31
- [ X ][ Y ]
32
-
33
-
34
- The GroupedRegressor on the data with the group_columns='group' fits 2 models:
35
- - The model 1 with the row 0 and 2, columns x0, x1 and x3 as the features and column y as the target.
36
- - The model 2 with the row 1 and 3, columns x0, x1 and x3 as the features and column y as the target.
37
-
38
- Args:
39
- base_estimator: Regressor .
40
-
41
- group_columns: Name(s) of the column(s) used as the key for groupby operation.
42
-
43
- n_jobs: default=1
44
- The maximum number of concurrently running jobs,
45
- such as the number of Python worker processes when backend=”multiprocessing”
46
- or the size of the thread-pool when backend=”threading
47
-
48
- Attributes:
49
- feature_names_: All input feature (without group_columns).
50
-
51
- estimators_:
52
- Dictionnary that stocks fitted estimators for each group.
53
- The keys are the keys of grouping and the values are the regressors fitted on the grouped data.
54
-
55
- """
56
-
57
- def __init__(
58
- self,
59
- base_estimator: RegressorMixin,
60
- group_columns: Union[str, int, list[str], list[int]],
61
- n_jobs: int = 1,
62
- ):
63
- """Initialize meta model."""
64
- self.base_estimator = base_estimator
65
- if type(group_columns) in [int, str]:
66
- self.group_columns = [group_columns]
67
- else:
68
- self.group_columns = group_columns
69
- self.n_jobs = n_jobs
70
-
71
- def _get_tags(self):
72
- return self.base_estimator._get_tags()
73
-
74
- def _check_group_columns(self, df):
75
- if type(self.group_columns) is not list:
76
- raise ValueError(
77
- "The group columns parameter should be a list, it gets a {}".format(
78
- type(self.group_columns)
79
- )
80
- )
81
- for c in self.group_columns:
82
- if c not in list(df.columns):
83
- raise ValueError("The group column {} is missing!".format(c))
84
-
85
- def _partial_fit(
86
- self, group: Any, df_group: pd.DataFrame, eval_set=None, **kwargs
87
- ) -> tuple[Any, BaseEstimator]:
88
- estimator = clone(self.base_estimator)
89
- X = df_group.loc[:, self.feature_names_]
90
- y = df_group.loc[:, "__target__"]
91
-
92
- if eval_set is None:
93
- estimator_fitted = estimator.fit(X, y, **kwargs)
94
- else:
95
- estimator_fitted = estimator.fit(
96
- X,
97
- y,
98
- eval_set=[
99
- (
100
- df.loc[
101
- (df[self.group_columns] == group).to_numpy().flatten(),
102
- self.feature_names_,
103
- ],
104
- df.loc[
105
- (df[self.group_columns] == group).to_numpy().flatten(),
106
- "__target__",
107
- ],
108
- )
109
- for df in eval_set
110
- ],
111
- **kwargs
112
- )
113
-
114
- return (group, estimator_fitted)
115
-
116
- def _partial_predict(self, group, df_group, **kwargs):
117
- return self.estimators_[group].predict(df_group, **kwargs)
118
-
119
- @classmethod
120
- def grouped_compute(
121
- cls,
122
- df: pd.DataFrame,
123
- group_columns: Union[list[str], list[int]],
124
- func: Callable[[tuple, pd.DataFrame], np.array],
125
- n_jobs: int = 1,
126
- eval_set=None,
127
- ) -> tuple[tuple[np.array, ...], DataFrameGroupBy, pd.DataFrame]:
128
- """Computes the specified function on each group defined by the grouping columns.
129
-
130
- It is an utility function used to perform fit and predict on each group.
131
- The df_res is the final dataframe that aggregate the results for each
132
- group. The group_res is a tuple where each field is corresponding to a results for a group. The gb is the
133
- grouping object.
134
-
135
- Args:
136
- df: DataFrame containing the input data necessary for the computation .
137
- group_columns: List of the columns used for the groupby operation
138
- func: Function that take the group key and the conrresponding data of this group
139
- and perform the computation on this group.
140
- n_jobs: The maximum number of concurrently running jobs,
141
-
142
- Returns:
143
- The tuple of the results of each group, the grouping dataframe and the global dataframe of results.
144
-
145
- """
146
- index_name = df.index.name or "index"
147
- df_reset = df.reset_index()
148
-
149
- df_res = pd.DataFrame(index=df_reset.index)
150
-
151
- gb = df_reset.groupby(group_columns)
152
-
153
- if n_jobs > 1:
154
- # Preferred scaling is at cluster level (e.g. k8s/serverless) instead of process level
155
- group_res = joblib.Parallel(n_jobs=n_jobs)(
156
- joblib.delayed(func)(
157
- group, df_group.set_index(index_name).drop(group_columns, axis=1)
158
- )
159
- for group, df_group in gb
160
- )
161
- else:
162
- group_res = (
163
- func(group, df_group.set_index(index_name).drop(group_columns, axis=1))
164
- for group, df_group in gb
165
- )
166
- return group_res, gb, df_res
167
-
168
- def _grouped_predict(self, df: pd.DataFrame, n_jobs: int = 1, **kwargs) -> np.array:
169
- group_res, gb, df_res = self.grouped_compute(
170
- df,
171
- self.group_columns,
172
- lambda group, df_group: self._partial_predict(group, df_group, **kwargs),
173
- n_jobs,
174
- )
175
-
176
- for (group, group_index), result in zip(gb.groups.items(), group_res):
177
- df_res.loc[group_index, "__result__"] = np.array(result)
178
-
179
- return df_res["__result__"].to_numpy()
180
-
181
- def _grouped_fit(
182
- self, df: pd.DataFrame, n_jobs: int = 1, eval_set=None, **kwargs
183
- ) -> dict[Any, BaseEstimator]:
184
- group_res, _, _ = self.grouped_compute(
185
- df,
186
- self.group_columns,
187
- lambda group, df_group: self._partial_fit(
188
- group, df_group, eval_set=eval_set, **kwargs
189
- ),
190
- n_jobs,
191
- )
192
- return dict(group_res)
193
-
194
- def fit(self, x: np.ndarray, y: np.ndarray, eval_set=None, **kwargs):
195
- """Fit the model."""
196
- df = pd.DataFrame(x).copy(deep=True)
197
- self._check_group_columns(df)
198
-
199
- eval_df = None
200
- if eval_set is not None:
201
- eval_df = []
202
- for x_set, y_set in eval_set:
203
- self._check_group_columns(x_set)
204
- df_set = pd.DataFrame(x_set).copy(deep=True)
205
- df_set["__target__"] = y_set
206
- eval_df.append(df_set)
207
-
208
- self.feature_names_ = [
209
- c for c in list(df.columns) if c not in self.group_columns
210
- ]
211
- df.loc[:, "__target__"] = y
212
- self.estimators_ = self._grouped_fit(
213
- df, self.n_jobs, eval_set=eval_df, **kwargs
214
- )
215
- return self
216
-
217
- def predict(self, x: np.ndarray, **kwargs) -> np.ndarray:
218
- """Make a predicion."""
219
- check_is_fitted(self)
220
- df = pd.DataFrame(x)
221
- self._check_group_columns(df)
222
- return self._grouped_predict(df, self.n_jobs, **kwargs)
@@ -1,138 +0,0 @@
1
- # SPDX-FileCopyrightText: 2017-2023 Alliander N.V. <korte.termijn.prognoses@alliander.com> # noqa E501>
2
- #
3
- # SPDX-License-Identifier: MPL-2.0
4
- """This module defines the missing value handler."""
5
- from typing import Union
6
-
7
- import numpy as np
8
- import pandas as pd
9
- from sklearn.base import BaseEstimator, MetaEstimatorMixin, RegressorMixin, clone
10
- from sklearn.impute import SimpleImputer
11
- from sklearn.pipeline import Pipeline
12
- from sklearn.preprocessing import FunctionTransformer
13
- from sklearn.utils.validation import check_array, check_is_fitted, check_X_y
14
-
15
-
16
- class MissingValuesHandler(BaseEstimator, RegressorMixin, MetaEstimatorMixin):
17
- """Class for a meta-models that handles missing values and removes columns filled exclusively by NaN.
18
-
19
- It's a pipeline of:
20
-
21
- - An Imputation transformer for completing missing values.
22
- - A Regressor fitted on the filled data.
23
-
24
- Args:
25
- base_estimator: Regressor used in the pipeline.
26
- missing_values: The placeholder for the missing values. All occurrences of
27
- `missing_values` will be imputed. For pandas' dataframes with
28
- nullable integer dtypes with missing values, `missing_values`
29
- should be set to `np.nan`, since `pd.NA` will be converted to `np.nan`.
30
- imputation_strategy: The imputation strategy.
31
- - If None no imputation is performed.
32
- - If "mean", then replace missing values using the mean along
33
- each column. Can only be used with numeric data.
34
- - If "median", then replace missing values using the median along
35
- each column. Can only be used with numeric data.
36
- - If "most_frequent", then replace missing using the most frequent
37
- value along each column. Can be used with strings or numeric data.
38
- If there is more than one such value, only the smallest is returned.
39
- - If "constant", then replace missing values with fill_value. Can be
40
- used with strings or numeric data.
41
- fill_value: When strategy == "constant", fill_value is used to replace all
42
- occurrences of missing_values.
43
- If left to the default, fill_value will be 0 when imputing numerical
44
- data and "missing_value" for strings or object data types.
45
-
46
- Attributes:
47
- feature_names:
48
- All input feature.
49
-
50
- non_null_columns_:
51
- Valid features used by the regressor.
52
-
53
- n_features_in_:
54
- Number of input features.
55
-
56
- regressor_: RegressorMixin
57
- Regressor fitted on valid columns.
58
-
59
- imputer_: SimpleImputer
60
- Imputer for missig value fitted on valid columns.
61
-
62
- pipeline_: Pipeline
63
- Pipeline that chains the imputer and the regressor.
64
-
65
- feature_importances_: ndarray (n_features_in_, )
66
- The feature importances from the regressor for valid features and zero otherwise.
67
-
68
- """
69
-
70
- def __init__(
71
- self,
72
- base_estimator: RegressorMixin,
73
- missing_values: Union[int, float, str, None] = np.nan,
74
- imputation_strategy: str = None,
75
- fill_value: Union[str, int, float] = None,
76
- ):
77
- """Initialize missing values handler."""
78
- self.base_estimator = base_estimator
79
- self.missing_values = missing_values
80
- self.imputation_strategy = imputation_strategy
81
- self.fill_value = fill_value
82
-
83
- def _get_tags(self):
84
- tags = self.base_estimator._get_tags()
85
- tags["requires_y"] = True
86
- tags["multioutput"] = False
87
- tags["allow_nan"] = self.imputation_strategy is not None
88
- return tags
89
-
90
- def fit(self, x, y):
91
- """Fit model."""
92
- _, y = check_X_y(x, y, force_all_finite="allow-nan", y_numeric=True)
93
- if type(x) != pd.DataFrame:
94
- x = pd.DataFrame(np.asarray(x))
95
- self.feature_in_names_ = list(x.columns)
96
- self.n_features_in_ = x.shape[1]
97
-
98
- # Remove always null columns
99
- is_column_null = x.isnull().all(axis="index")
100
- self.non_null_columns_ = list(x.columns[~is_column_null])
101
-
102
- self.regressor_ = clone(self.base_estimator)
103
-
104
- # Build the proper imputation transformer
105
- # - Identity function if strategy is None
106
- # - SimpleImputer with the dedicated strategy
107
- if self.imputation_strategy is None:
108
- self.imputer_ = FunctionTransformer(func=self._identity)
109
- else:
110
- self.imputer_ = SimpleImputer(
111
- missing_values=self.missing_values,
112
- strategy=self.imputation_strategy,
113
- fill_value=self.fill_value,
114
- )
115
-
116
- self.pipeline_ = Pipeline(
117
- [("imputer", self.imputer_), ("regressor", self.regressor_)]
118
- )
119
-
120
- # Fit only on non_null_columns
121
- self.pipeline_.fit(x[self.non_null_columns_], y)
122
-
123
- return self
124
-
125
- @classmethod
126
- def _identity(cls, x):
127
- return x
128
-
129
- def predict(self, x):
130
- """Make a prediction."""
131
- check_is_fitted(self)
132
- check_array(
133
- x,
134
- force_all_finite="allow-nan",
135
- )
136
- if type(x) != pd.DataFrame:
137
- x = pd.DataFrame(np.array(x))
138
- return self.pipeline_.predict(x[self.non_null_columns_])
@@ -1,214 +0,0 @@
1
- # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
- #
3
- # SPDX-License-Identifier: MPL-2.0
4
- import logging
5
- from typing import Union
6
-
7
- import structlog
8
-
9
- from openstef.enums import ModelType
10
- from openstef.model.regressors.arima import ARIMAOpenstfRegressor
11
- from openstef.model.regressors.custom_regressor import is_custom_type, load_custom_model
12
- from openstef.model.regressors.gblinear_quantile import GBLinearQuantileOpenstfRegressor
13
- from openstef.model.regressors.lgbm import LGBMOpenstfRegressor
14
- from openstef.model.regressors.linear import LinearOpenstfRegressor
15
- from openstef.model.regressors.linear_quantile import LinearQuantileOpenstfRegressor
16
- from openstef.model.regressors.regressor import OpenstfRegressor
17
- from openstef.model.regressors.flatliner import FlatlinerRegressor
18
- from openstef.model.regressors.xgb import XGBOpenstfRegressor
19
- from openstef.model.regressors.xgb_quantile import XGBQuantileOpenstfRegressor
20
- from openstef.model.regressors.xgb_multioutput_quantile import (
21
- XGBMultiOutputQuantileOpenstfRegressor,
22
- )
23
- from openstef.settings import Settings
24
-
25
- structlog.configure(
26
- wrapper_class=structlog.make_filtering_bound_logger(
27
- logging.getLevelName(Settings.log_level)
28
- )
29
- )
30
- logger = structlog.get_logger(__name__)
31
-
32
- valid_model_kwargs = {
33
- ModelType.XGB: [
34
- "n_estimators",
35
- "objective",
36
- "max_depth",
37
- "learning_rate",
38
- "verbosity",
39
- "booster",
40
- "tree_method",
41
- "gamma",
42
- "min_child_weight",
43
- "max_delta_step",
44
- "subsample",
45
- "colsample_bytree",
46
- "colsample_bylevel",
47
- "colsample_bynode",
48
- "reg_alpha",
49
- "reg_lambda",
50
- "scale_pos_weight",
51
- "base_score",
52
- "missing",
53
- "num_parallel_tree",
54
- "kwargs",
55
- "random_state",
56
- "n_jobs",
57
- "monotone_constraints",
58
- "interaction_constraints",
59
- "importance_type",
60
- "gpu_id",
61
- "validate_parameters",
62
- "early_stopping_rounds",
63
- ],
64
- ModelType.LGB: [
65
- "boosting_type",
66
- "objective",
67
- "num_leaves",
68
- "max_depth",
69
- "learning_rate",
70
- "n_estimators",
71
- "subsample_for_bin",
72
- "min_split_gain",
73
- "min_child_weight",
74
- "min_child_samples",
75
- "subsample",
76
- "subsample_freq",
77
- "colsample_bytree",
78
- "reg_alpha",
79
- "reg_lambda",
80
- "random_state",
81
- "n_jobs",
82
- "silent",
83
- "importance_type",
84
- "early_stopping_rounds",
85
- ],
86
- ModelType.XGB_QUANTILE: [
87
- "quantiles",
88
- "gamma",
89
- "colsample_bytree",
90
- "subsample",
91
- "min_child_weight",
92
- "max_depth",
93
- "early_stopping_rounds",
94
- ],
95
- ModelType.XGB_MULTIOUTPUT_QUANTILE: [
96
- "quantiles",
97
- "gamma",
98
- "colsample_bytree",
99
- "subsample",
100
- "min_child_weight",
101
- "max_depth",
102
- "early_stopping_rounds",
103
- "arctan_smoothing",
104
- ],
105
- ModelType.LINEAR: [
106
- "missing_values",
107
- "imputation_strategy",
108
- "fill_value",
109
- ],
110
- ModelType.FLATLINER: [
111
- "quantiles",
112
- ],
113
- ModelType.LINEAR_QUANTILE: [
114
- "alpha",
115
- "quantiles",
116
- "solver",
117
- "missing_values",
118
- "imputation_strategy",
119
- "fill_value",
120
- "weight_scale_percentile",
121
- "weight_exponent",
122
- "weight_floor",
123
- "no_fill_future_values_features",
124
- ],
125
- ModelType.GBLINEAR_QUANTILE: [
126
- "quantiles",
127
- "missing_values",
128
- "imputation_strategy",
129
- "fill_value",
130
- "weight_scale_percentile",
131
- "weight_exponent",
132
- "weight_floor",
133
- "no_fill_future_values_features",
134
- "clipped_features",
135
- "learning_rate",
136
- "num_boost_round",
137
- "early_stopping_rounds",
138
- "reg_alpha",
139
- "reg_lambda",
140
- "updater",
141
- "feature_selector",
142
- "top_k",
143
- ],
144
- ModelType.ARIMA: [
145
- "backtest_max_horizon",
146
- "order",
147
- "seasonal_order",
148
- "trend",
149
- ],
150
- }
151
-
152
-
153
- class ModelCreator:
154
- """Factory object for creating machine learning models."""
155
-
156
- # Set object mapping
157
- MODEL_CONSTRUCTORS = {
158
- ModelType.XGB: XGBOpenstfRegressor,
159
- ModelType.LGB: LGBMOpenstfRegressor,
160
- ModelType.XGB_QUANTILE: XGBQuantileOpenstfRegressor,
161
- ModelType.XGB_MULTIOUTPUT_QUANTILE: XGBMultiOutputQuantileOpenstfRegressor,
162
- ModelType.LINEAR: LinearOpenstfRegressor,
163
- ModelType.LINEAR_QUANTILE: LinearQuantileOpenstfRegressor,
164
- ModelType.GBLINEAR_QUANTILE: GBLinearQuantileOpenstfRegressor,
165
- ModelType.ARIMA: ARIMAOpenstfRegressor,
166
- ModelType.FLATLINER: FlatlinerRegressor,
167
- }
168
-
169
- @staticmethod
170
- def create_model(model_type: Union[ModelType, str], **kwargs) -> OpenstfRegressor:
171
- """Create a machine learning model based on model type.
172
-
173
- Args:
174
- model_type: Model type to construct.
175
- kwargs: Optional keyword argument to pass to the model.
176
-
177
- Raises:
178
- NotImplementedError: When using an invalid model_type.
179
-
180
- Returns:
181
- OpenSTEF model
182
-
183
- """
184
- try:
185
- # This will raise a ValueError when an invalid model_type str is used
186
- # and nothing when a MLModelType enum is used.
187
- if is_custom_type(model_type):
188
- model_class = load_custom_model(model_type)
189
- valid_kwargs = model_class.valid_kwargs()
190
- else:
191
- model_type = ModelType(model_type)
192
- model_class = ModelCreator.MODEL_CONSTRUCTORS[model_type]
193
- valid_kwargs = valid_model_kwargs[model_type]
194
- # Check if model as imported
195
- if model_class is None:
196
- raise ImportError(
197
- f"Constructor not available for '{model_type}'. "
198
- "Perhaps you forgot to install an optional dependency? "
199
- "Please refer to the ReadMe for instructions"
200
- )
201
- except ValueError as e:
202
- valid_types = [t.value for t in ModelType]
203
- raise NotImplementedError(
204
- f"No constructor for '{model_type}', "
205
- f"valid model_types are: {valid_types} "
206
- "or import a custom model"
207
- ) from e
208
-
209
- # only pass relevant arguments to model constructor to prevent warnings
210
- model_kwargs = {
211
- key: value for key, value in kwargs.items() if key in valid_kwargs
212
- }
213
-
214
- return model_class(**model_kwargs)