PyPI - openstef - Versions diffs - 3.4.9__py3-none-any.whl → 3.4.29__py3-none-any.whl - Mend

openstef 3.4.9py3-none-any.whl → 3.4.29py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (72) hide show

openstef/app_settings.py +19 -0
openstef/data_classes/data_prep.py +1 -1
openstef/data_classes/prediction_job.py +12 -8
openstef/enums.py +3 -7
openstef/exceptions.py +1 -1
openstef/feature_engineering/apply_features.py +0 -6
openstef/feature_engineering/data_preparation.py +12 -5
openstef/feature_engineering/feature_applicator.py +1 -5
openstef/feature_engineering/general.py +14 -0
openstef/feature_engineering/lag_features.py +1 -1
openstef/feature_engineering/missing_values_transformer.py +99 -0
openstef/feature_engineering/weather_features.py +7 -0
openstef/metrics/figure.py +3 -0
openstef/metrics/metrics.py +58 -1
openstef/metrics/reporter.py +7 -0
openstef/model/confidence_interval_applicator.py +28 -3
openstef/model/model_creator.py +36 -27
openstef/model/objective.py +11 -28
openstef/model/objective_creator.py +4 -3
openstef/model/regressors/arima.py +1 -1
openstef/model/regressors/dazls.py +35 -96
openstef/model/regressors/flatliner.py +100 -0
openstef/model/regressors/linear_quantile.py +247 -0
openstef/model/regressors/xgb_multioutput_quantile.py +261 -0
openstef/model/regressors/xgb_quantile.py +3 -0
openstef/model/serializer.py +10 -0
openstef/model/standard_deviation_generator.py +3 -2
openstef/model_selection/model_selection.py +3 -0
openstef/monitoring/performance_meter.py +1 -2
openstef/monitoring/teams.py +11 -0
openstef/pipeline/create_basecase_forecast.py +11 -1
openstef/pipeline/create_component_forecast.py +11 -22
openstef/pipeline/create_forecast.py +20 -1
openstef/pipeline/optimize_hyperparameters.py +18 -16
openstef/pipeline/train_create_forecast_backtest.py +11 -1
openstef/pipeline/train_model.py +23 -7
openstef/pipeline/utils.py +3 -0
openstef/postprocessing/postprocessing.py +29 -0
openstef/settings.py +15 -0
openstef/tasks/calculate_kpi.py +20 -17
openstef/tasks/create_basecase_forecast.py +13 -5
openstef/tasks/create_components_forecast.py +20 -4
openstef/tasks/create_forecast.py +5 -2
openstef/tasks/split_forecast.py +7 -0
openstef/tasks/train_model.py +7 -5
openstef/tasks/utils/taskcontext.py +7 -0
openstef/validation/validation.py +27 -2
{openstef-3.4.9.dist-info → openstef-3.4.29.dist-info}/METADATA +34 -38
openstef-3.4.29.dist-info/RECORD +91 -0
{openstef-3.4.9.dist-info → openstef-3.4.29.dist-info}/WHEEL +1 -1
openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model.z +0 -0
openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model.z.license +0 -3
openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_features.z +0 -0
openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_features.z.license +0 -3
openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_scaler.z +0 -0
openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_scaler.z.license +0 -3
openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model.z +0 -0
openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model.z.license +0 -3
openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_features.z +0 -2
openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_features.z.license +0 -3
openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_scaler.z +0 -0
openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_scaler.z.license +0 -3
openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target.z +0 -0
openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target.z.license +0 -3
openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target_scaler.z +0 -6
openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target_scaler.z.license +0 -3
openstef/feature_engineering/historic_features.py +0 -40
openstef/model/regressors/proloaf.py +0 -281
openstef/tasks/run_tracy.py +0 -145
openstef-3.4.9.dist-info/RECORD +0 -104
{openstef-3.4.9.dist-info → openstef-3.4.29.dist-info}/LICENSE +0 -0
{openstef-3.4.9.dist-info → openstef-3.4.29.dist-info}/top_level.txt +0 -0

openstef/model/objective.py CHANGED Viewed

@@ -2,7 +2,7 @@
 #
 # SPDX-License-Identifier: MPL-2.0
 import copy
-from datetime import datetime
+from datetime import datetime, timezone
 from typing import Any, Callable, Optional
 import optuna
@@ -59,7 +59,7 @@ class RegressorObjective:
         self.validation_data = None
         self.test_data = None
         self.model = model
-        self.start_time = datetime.utcnow()
+        self.start_time = datetime.now(timezone.utc)
         self.test_fraction = test_fraction
         self.validation_fraction = validation_fraction
         self.eval_metric = eval_metric
@@ -94,7 +94,7 @@ class RegressorObjective:
         split_args = self.split_args
         if split_args is None:
             split_args = {
-                "stratification_min_max": self.model_type != MLModelType.ProLoaf,
+                "stratification_min_max": True,
                 "back_test": True,
             }
         (
@@ -349,13 +349,13 @@ class XGBQuantileRegressorObjective(RegressorObjective):
         )
-class ProLoafRegressorObjective(RegressorObjective):
+class XGBMultioutputQuantileRegressorObjective(RegressorObjective):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
-        self.model_type = MLModelType.ProLoaf
+        self.model_type = MLModelType.XGB_QUANTILE
     def get_params(self, trial: optuna.trial.FrozenTrial) -> dict:
-        """Get parameters for ProLoaf Regressor Objective with objective specific parameters.
+        """Get parameters for XGB Multioutput Quantile Regressor Objective with objective specific parameters.
         Args: trial
@@ -366,33 +366,16 @@ class ProLoafRegressorObjective(RegressorObjective):
         # Filtered default parameters
         model_params = super().get_params(trial)
-        # ProLoaf specific parameters
+        # XGB specific parameters
         params = {
-            # TODO: look into optimizing this pipeline for proloaf
-            # "relu_leak": trial.suggest_float("relu_leak", 0.1, 1.0),
-            # "core_layers": trial.suggest_int("core_layers", 1, 3),
-            # "rel_linear_hidden_size": trial.suggest_float(
-            #    "rel_linear_hidden_size", 0.1, 1
-            # ),
-            # "rel_core_hidden_size": trial.suggest_float("rel_core_hidden_size", 0.1, 1),
-            # "dropout_fc": trial.suggest_float("dropout_fc", 0.1, 0.9),
-            # "dropout_core": trial.suggest_float("dropout_core", 0.1, 0.9),
-            # "early_stopping_patience": trial.suggest_int(
-            #    "early_stopping_patience", 5, 10
-            # ),
-            # "early_stopping_margin": trial.suggest_float(
-            #    "early_stopping_margin", 0.1, 0.9
-            # ),
-            "max_epochs": trial.suggest_int(
-                "max_epochs", 1, 1
-            ),  # TODO: change after having availability to gpu resource
-            "batch_size": trial.suggest_int("batch_size", 1, 24),
+            "gamma": trial.suggest_float("gamma", 1e-8, 1.0),
+            "arctan_smoothing": trial.suggest_float("arctan_smoothing", 0.025, 0.15),
         }
         return {**model_params, **params}
     def get_pruning_callback(self, trial: optuna.trial.FrozenTrial):
-        return optuna.integration.PyTorchLightningPruningCallback(
-            trial, monitor="val_loss"
+        return optuna.integration.XGBoostPruningCallback(
+            trial, observation_key=f"validation_1-{self.eval_metric}"
         )

openstef/model/objective_creator.py CHANGED Viewed

@@ -6,13 +6,13 @@ from typing import Union
 from openstef.enums import MLModelType
 from openstef.model.objective import (
+    ARIMARegressorObjective,
     LGBRegressorObjective,
     LinearRegressorObjective,
-    ProLoafRegressorObjective,
     RegressorObjective,
     XGBQuantileRegressorObjective,
     XGBRegressorObjective,
-    ARIMARegressorObjective,
+    XGBMultioutputQuantileRegressorObjective,
 )
 from openstef.model.regressors.custom_regressor import (
     create_custom_objective,
@@ -25,8 +25,9 @@ class ObjectiveCreator:
         MLModelType.XGB: XGBRegressorObjective,
         MLModelType.LGB: LGBRegressorObjective,
         MLModelType.XGB_QUANTILE: XGBQuantileRegressorObjective,
-        MLModelType.ProLoaf: ProLoafRegressorObjective,
+        MLModelType.XGB_MULTIOUTPUT_QUANTILE: XGBMultioutputQuantileRegressorObjective,
         MLModelType.LINEAR: LinearRegressorObjective,
+        MLModelType.LINEAR_QUANTILE: LinearRegressorObjective,
         MLModelType.ARIMA: ARIMARegressorObjective,
     }

openstef/model/regressors/arima.py CHANGED Viewed

@@ -5,9 +5,9 @@
 import numpy as np
 import pandas as pd
 import statsmodels.api as sm
 from sklearn.metrics import r2_score
 from sklearn.model_selection import TimeSeriesSplit
 from openstef.model.regressors.regressor import OpenstfRegressor

openstef/model/regressors/dazls.py CHANGED Viewed

@@ -4,65 +4,41 @@
 """This module defines the DAZL model."""
 import numpy as np
 from sklearn.base import BaseEstimator
+from sklearn.compose import TransformedTargetRegressor
+from sklearn.linear_model import LinearRegression
 from sklearn.metrics import mean_squared_error, r2_score
-from sklearn.neighbors import KNeighborsRegressor
+from sklearn.pipeline import Pipeline
 from sklearn.preprocessing import MinMaxScaler
-from sklearn.utils import shuffle
 class Dazls(BaseEstimator):
     """DAZLS model.
-    The model carries out wind and solar power prediction for unseen target substations using training data from
-    other substations with known components.
-    Any data-driven model can be plugged and used as the base for the domain and the adaptation model.
-    For a full reference, see:
-    Teng, S.Y., van Nooten, C. C., van Doorn, J.M., Ottenbros, A., Huijbregts, M., Jansen, J.J.
-    Improving Near Real-Time Predictions of Renewable Electricity Production at Substation Level (Submitted)
+    The model carries out wind and solar power prediction for unseen target substations using training data from other
+    substations with known components.
     """
+    model_: Pipeline
     def __init__(self):
         """Initialize DAZL model."""
         self.__name__ = "DAZLS"
-        self.domain_model_scaler = MinMaxScaler(clip=True)
-        self.adaptation_model_scaler = MinMaxScaler(clip=True)
-        self.target_scaler = MinMaxScaler(clip=True)
-        self.domain_model = KNeighborsRegressor(n_neighbors=20, weights="uniform")
-        self.adaptation_model = KNeighborsRegressor(n_neighbors=20, weights="uniform")
+        regressor = TransformedTargetRegressor(
+            regressor=LinearRegression(),
+            transformer=MinMaxScaler(clip=True),
+        )
+        self.model_ = Pipeline(
+            [("scaler", MinMaxScaler(clip=True)), ("regressor", regressor)]
+        )
         # The input columns for the domain and adaptation models (with description)
-        self.domain_model_input_columns = [
+        self.baseline_input_columns = [
             "radiation",  # Weather parameter
             "windspeed_100m",  # Weather parameter
-            "total_substation",  # Substation's measured total load
-            "lat",  # Latitude
-            "lon",  # Longitude
-            "solar_on",  # Solar installed on substation: yes=1, no=0
-            "wind_on",  # Wind installed on substation: yes=1, no=0
-            "hour",  # Hour of the day
-            "minute",  # Minute of the hour
-            "var0",  # Variance of the total load
-            "var1",  # Variance of the total pv load (only available for calibration substations)
-            "var2",  # Variance of the total wind load (only available for calibration substations)
-            "sem0",  # Standard Error of the Mean of the total load
-            "sem1",  # Standard Error of the Mean of the total PV load (only available for calibration substations)
-        ]
-        self.adaptation_model_input_columns = [
-            "total_substation",
-            "lat",
-            "lon",
-            "solar_on",
-            "wind_on",
-            "hour",
-            "minute",
-            "var0",
-            "var1",
-            "var2",
-            "sem0",
-            "sem1",
+            "total_load",
         ]
         self.target_columns = ["total_wind_part", "total_solar_part"]
@@ -78,30 +54,12 @@ class Dazls(BaseEstimator):
             target: the expected output (y_train)
         """
-        x, x2, y = (
-            features.loc[:, self.domain_model_input_columns],
-            features.loc[:, self.adaptation_model_input_columns],
+        x, y = (
+            features.loc[:, self.baseline_input_columns],
             target.loc[:, self.target_columns],
         )
-        domain_model_input, adaptation_model_input, y_train = shuffle(
-            x, x2, y, random_state=999
-        )  # just shuffling
-        self.domain_model_scaler.fit(domain_model_input)
-        self.adaptation_model_scaler.fit(adaptation_model_input)
-        self.target_scaler.fit(y_train)
-        domain_model_input = self.domain_model_scaler.transform(domain_model_input)
-        adaptation_model_input = self.adaptation_model_scaler.transform(
-            adaptation_model_input
-        )
-        y_train = self.target_scaler.transform(y_train)
-        self.domain_model.fit(domain_model_input, y_train)
-        domain_model_pred = self.domain_model.predict(domain_model_input)
-        adaptation_model_input = np.concatenate(
-            (adaptation_model_input, domain_model_pred), axis=1
-        )
-        self.adaptation_model.fit(adaptation_model_input, y_train)
+        self.model_.fit(x, y)
     def predict(self, x: np.array):
         """Make a prediction.
@@ -109,37 +67,21 @@ class Dazls(BaseEstimator):
         For the prediction we use the test data x. We use domain_model_input_columns and
         adaptation_model_input_columns to separate x in test data for domain model and adaptation model respectively.
+        There is an option available to return the domain model and adaptation model predictions separately to more
+        easily investigate the effectiveness of the models.
         Args:
             x: domain_model_test_data, adaptation_model_test_data
+            return_sub_preds : a flag value indicating to return the predictions of the domain model and adaptation
+                               model separately. (Default: False.)
+        Returns:
             prediction: The output prediction after both models.
         """
-        domain_model_test_data, adaptation_model_test_data = (
-            x.loc[:, self.domain_model_input_columns],
-            x.loc[:, self.adaptation_model_input_columns],
-        )
-        # Rescale test data for both models (if required)
-        domain_model_test_data_scaled = self.domain_model_scaler.transform(
-            domain_model_test_data
-        )
-        adaptation_model_test_data_scaled = self.adaptation_model_scaler.transform(
-            adaptation_model_test_data
-        )
-        # Use the scaled data to make domain_model_prediction
-        domain_model_test_data_pred = self.domain_model.predict(
-            domain_model_test_data_scaled
-        )
-        # Use the domain_model_prediction to make adaptation_model_prediction
-        adaptation_model_test_data_pred = self.adaptation_model.predict(
-            np.concatenate(
-                [adaptation_model_test_data_scaled, domain_model_test_data_pred], axis=1
-            )
-        )
-        # Rescale adaptation_model_prediction (if required)
-        prediction = self.target_scaler.inverse_transform(
-            adaptation_model_test_data_pred
-        )
-        return prediction
+        model_test_data = x.loc[:, self.baseline_input_columns]
+        return self.model_.predict(model_test_data)
     def score(self, truth, prediction):
         """Evaluation of the prediction's output.
@@ -165,13 +107,10 @@ class Dazls(BaseEstimator):
         """
         summary_str = (
             f"{self.__name__} model summary:\n\n"
-            f"Domain Model: {self.domain_model} \n"
-            f"\tInput columns: {self.domain_model_input_columns} \n"
-            f"\tScaler: {self.domain_model_scaler} \n\n"
-            f"Adaptation Model: {self.adaptation_model} \n"
-            f"\tInput columns: {self.adaptation_model_input_columns} \n"
-            f"\tScaler: {self.adaptation_model_scaler} \n\n"
-            f"Target columns: {self.target_columns}"
+            f"Model: {self.model_} \n"
+            f"\tInput columns: {self.baseline_input_columns} \n"
+            f"\tScaler: {self.model_['scaler']} \n\n"
+            f"\tRegressor: {self.model_['regressor']} \n\n"
         )
         return summary_str

openstef/model/regressors/flatliner.py ADDED Viewed

@@ -0,0 +1,100 @@
+# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
+#
+# SPDX-License-Identifier: MPL-2.0
+import re
+from typing import Dict, Union, Set, Optional, List
+import numpy as np
+import pandas as pd
+from sklearn.base import RegressorMixin
+from sklearn.linear_model import QuantileRegressor
+from sklearn.preprocessing import MinMaxScaler
+from sklearn.utils.validation import check_is_fitted
+from openstef.feature_engineering.missing_values_transformer import (
+    MissingValuesTransformer,
+)
+from openstef.model.regressors.regressor import OpenstfRegressor
+class FlatlinerRegressor(OpenstfRegressor, RegressorMixin):
+    feature_names_: List[str] = []
+    def __init__(self, quantiles=None):
+        """Initialize FlatlinerRegressor.
+        The model always predicts 0.0, regardless of the input features. The model is
+        meant to be used for flatliner locations that still expect a prediction while
+        preserving the prediction interface.
+        """
+        super().__init__()
+        self.quantiles = quantiles
+    @property
+    def feature_names(self) -> list:
+        """The names of the features used to train the model."""
+        check_is_fitted(self)
+        return self.feature_names_
+    @staticmethod
+    def _get_importance_names():
+        return {
+            "gain_importance_name": "total_gain",
+            "weight_importance_name": "weight",
+        }
+    @property
+    def can_predict_quantiles(self) -> bool:
+        """Attribute that indicates if the model predict particular quantiles."""
+        return True
+    def fit(self, x: pd.DataFrame, y: pd.Series, **kwargs) -> RegressorMixin:
+        """Fits flatliner model.
+        Args:
+            x: Feature matrix
+            y: Labels
+        Returns:
+            Fitted LinearQuantile model
+        """
+        self.feature_names_ = list(x.columns)
+        self.feature_importances_ = np.ones(len(self.feature_names_)) / (
+            len(self.feature_names_) or 1.0
+        )
+        return self
+    def predict(self, x: pd.DataFrame, quantile: float = 0.5, **kwargs) -> np.array:
+        """Makes a prediction for a desired quantile.
+        Args:
+            x: Feature matrix
+            quantile: Quantile for which a prediciton is desired,
+                note that only quantile are available for which a model is trained,
+                and that this is a quantile-model specific keyword
+        Returns:
+            Prediction
+        Raises:
+            ValueError in case no model is trained for the requested quantile
+        """
+        check_is_fitted(self)
+        return np.zeros(x.shape[0])
+    def _get_feature_importance_from_linear(self, quantile: float = 0.5) -> np.array:
+        check_is_fitted(self)
+        return np.array([0.0 for _ in self.feature_names_])
+    @classmethod
+    def _get_param_names(cls):
+        return [
+            "quantiles",
+        ]
+    def __sklearn_is_fitted__(self) -> bool:
+        return True

openstef/model/regressors/linear_quantile.py ADDED Viewed

@@ -0,0 +1,247 @@
+# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
+#
+# SPDX-License-Identifier: MPL-2.0
+import re
+from typing import Dict, Union, Set, Optional
+import numpy as np
+import pandas as pd
+from sklearn.base import RegressorMixin
+from sklearn.linear_model import QuantileRegressor
+from sklearn.preprocessing import MinMaxScaler
+from sklearn.utils.validation import check_is_fitted
+from openstef.feature_engineering.missing_values_transformer import (
+    MissingValuesTransformer,
+)
+from openstef.model.regressors.regressor import OpenstfRegressor
+DEFAULT_QUANTILES: tuple[float, ...] = (0.9, 0.5, 0.1)
+class LinearQuantileOpenstfRegressor(OpenstfRegressor, RegressorMixin):
+    quantiles: tuple[float, ...]
+    alpha: float
+    solver: str
+    imputer_: MissingValuesTransformer
+    x_scaler_: MinMaxScaler
+    y_scaler_: MinMaxScaler
+    models_: Dict[float, QuantileRegressor]
+    is_fitted_: bool = False
+    FEATURE_IGNORE_LIST: Set[str] = {
+        "IsWeekendDay",
+        "IsWeekDay",
+        "IsSunday",
+        "Month",
+        "Quarter",
+    }
+    def __init__(
+        self,
+        quantiles: tuple[float, ...] = DEFAULT_QUANTILES,
+        alpha: float = 0.0,
+        solver: str = "highs",
+        missing_values: Union[int, float, str, None] = np.nan,
+        imputation_strategy: Optional[str] = "mean",
+        fill_value: Union[str, int, float] = None,
+    ):
+        """Initialize LinearQuantileOpenstfRegressor.
+        Model that provides quantile regression with SKLearn QuantileRegressor.
+        For each desired quantile an QuantileRegressor model is trained,
+        these can later be used to predict quantiles.
+        This model is sensitive to feature quality and therefore has logic to remove
+        some custom features produced by OpenSTEF. The features that are removed are:
+        - Holiday features (is_christmas, is_*)
+        - Lagged features (T-1d, T-*)
+        - Point in time features (IsWeekendDay, IsWeekDay, IsSunday, Month, Quarter)
+        - Infeed MFFBAS profiles (E*_I)
+        Args:
+            quantiles: Tuple with desired quantiles, quantile 0.5 is required.
+                For example: (0.1, 0.5, 0.9)
+            alpha: Regularization constant for L1 regularization
+            solver: Solver to use for optimization
+            missing_values: Value to be considered as missing value
+            imputation_strategy: Imputation strategy
+            fill_value: Fill value
+        """
+        super().__init__()
+        # Check if quantile 0.5 is present. This is required.
+        if 0.5 not in quantiles:
+            raise ValueError(
+                "Cannot train quantile model as 0.5 is not in requested quantiles!"
+            )
+        self.quantiles = quantiles
+        self.alpha = alpha
+        self.solver = solver
+        self.imputer_ = MissingValuesTransformer(
+            missing_values=missing_values,
+            imputation_strategy=imputation_strategy,
+            fill_value=fill_value,
+        )
+        self.x_scaler_ = MinMaxScaler(feature_range=(-1, 1))
+        self.y_scaler_ = MinMaxScaler(feature_range=(-1, 1))
+        self.models_ = {
+            quantile: QuantileRegressor(alpha=alpha, quantile=quantile, solver=solver)
+            for quantile in quantiles
+        }
+    @property
+    def feature_names(self) -> list:
+        """The names of the features used to train the model."""
+        check_is_fitted(self)
+        return self.imputer_.non_null_feature_names
+    @staticmethod
+    def _get_importance_names():
+        return {
+            "gain_importance_name": "total_gain",
+            "weight_importance_name": "weight",
+        }
+    @property
+    def can_predict_quantiles(self) -> bool:
+        """Attribute that indicates if the model predict particular quantiles."""
+        return True
+    def _is_feature_ignored(self, feature_name: str) -> bool:
+        """Check if a feature is ignored by the model.
+        Args:
+            feature_name: Feature name
+        Returns:
+            True if the feature is ignored, False otherwise
+        """
+        return (
+            # Ignore named features
+            feature_name in self.FEATURE_IGNORE_LIST
+            or
+            # Ignore holiday features
+            re.match(r"is_", feature_name) is not None
+            or
+            # Ignore lag features
+            re.match(r"T-", feature_name) is not None
+            or
+            # Ignore infeed MFFBAS profiles
+            re.match(r"E\d.*_I", feature_name) is not None
+        )
+    def _remove_ignored_features(self, x: pd.DataFrame) -> pd.DataFrame:
+        """Remove ignored features from the input data.
+        Args:
+            x: Input data
+        Returns:
+            Data without ignored features
+        """
+        return x.drop(columns=[c for c in x.columns if self._is_feature_ignored(c)])
+    def fit(self, x: pd.DataFrame, y: pd.Series, **kwargs) -> RegressorMixin:
+        """Fits linear quantile model.
+        Args:
+            x: Feature matrix
+            y: Labels
+        Returns:
+            Fitted LinearQuantile model
+        """
+        if not isinstance(y, pd.Series):
+            y = pd.Series(np.asarray(y), name="load")
+        x = self._remove_ignored_features(x)
+        # Fix nan columns
+        x = self.imputer_.fit_transform(x)
+        if x.isna().any().any():
+            raise ValueError(
+                "There are nan values in the input data. Set "
+                "imputation_strategy to solve them."
+            )
+        # Apply feature scaling
+        x_scaled = self.x_scaler_.fit_transform(x)
+        y_scaled = self.y_scaler_.fit_transform(y.to_frame())[:, 0]
+        # Add more focus on extreme / peak values
+        sample_weight = np.abs(y_scaled)
+        # Fit quantile regressors
+        for quantile in self.quantiles:
+            self.models_[quantile].fit(
+                X=x_scaled, y=y_scaled, sample_weight=sample_weight
+            )
+        self.is_fitted_ = True
+        self.feature_importances_ = self._get_feature_importance_from_linear()
+        return self
+    def predict(self, x: pd.DataFrame, quantile: float = 0.5, **kwargs) -> np.array:
+        """Makes a prediction for a desired quantile.
+        Args:
+            x: Feature matrix
+            quantile: Quantile for which a prediciton is desired,
+                note that only quantile are available for which a model is trained,
+                and that this is a quantile-model specific keyword
+        Returns:
+            Prediction
+        Raises:
+            ValueError in case no model is trained for the requested quantile
+        """
+        check_is_fitted(self)
+        # Preprocess input data
+        x = self._remove_ignored_features(x)
+        x = self.imputer_.transform(x)
+        x_scaled = self.x_scaler_.transform(x)
+        # Make prediction
+        y_pred = self.models_[quantile].predict(X=x_scaled)
+        # Inverse scaling
+        y_pred = self.y_scaler_.inverse_transform(y_pred.reshape(-1, 1))[:, 0]
+        return y_pred
+    def _get_feature_importance_from_linear(self, quantile: float = 0.5) -> np.array:
+        check_is_fitted(self)
+        feature_importance_linear = np.abs(self.models_[quantile].coef_)
+        reg_feature_importances_dict = dict(
+            zip(self.imputer_.non_null_feature_names, feature_importance_linear)
+        )
+        return np.array(
+            [
+                reg_feature_importances_dict.get(c, 0)
+                for c in self.imputer_.in_feature_names
+            ]
+        )
+    @classmethod
+    def _get_param_names(cls):
+        return [
+            "quantiles",
+            "alpha",
+            "solver",
+        ]
+    def __sklearn_is_fitted__(self) -> bool:
+        return self.is_fitted_

openstef 3.4.9__py3-none-any.whl → 3.4.29__py3-none-any.whl

openstef 3.4.9py3-none-any.whl → 3.4.29py3-none-any.whl