openstef 3.4.10__py3-none-any.whl → 3.4.29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openstef/app_settings.py +19 -0
- openstef/data_classes/data_prep.py +1 -1
- openstef/data_classes/prediction_job.py +12 -8
- openstef/enums.py +3 -7
- openstef/exceptions.py +1 -1
- openstef/feature_engineering/apply_features.py +0 -6
- openstef/feature_engineering/data_preparation.py +12 -5
- openstef/feature_engineering/feature_applicator.py +1 -5
- openstef/feature_engineering/general.py +14 -0
- openstef/feature_engineering/missing_values_transformer.py +99 -0
- openstef/feature_engineering/weather_features.py +7 -0
- openstef/metrics/figure.py +3 -0
- openstef/metrics/metrics.py +58 -1
- openstef/metrics/reporter.py +7 -0
- openstef/model/confidence_interval_applicator.py +28 -3
- openstef/model/model_creator.py +36 -27
- openstef/model/objective.py +11 -28
- openstef/model/objective_creator.py +4 -3
- openstef/model/regressors/arima.py +1 -1
- openstef/model/regressors/dazls.py +35 -96
- openstef/model/regressors/flatliner.py +100 -0
- openstef/model/regressors/linear_quantile.py +247 -0
- openstef/model/regressors/xgb_multioutput_quantile.py +261 -0
- openstef/model/regressors/xgb_quantile.py +3 -0
- openstef/model/serializer.py +10 -0
- openstef/model_selection/model_selection.py +3 -0
- openstef/monitoring/performance_meter.py +1 -2
- openstef/monitoring/teams.py +11 -0
- openstef/pipeline/create_basecase_forecast.py +11 -1
- openstef/pipeline/create_component_forecast.py +11 -22
- openstef/pipeline/create_forecast.py +20 -1
- openstef/pipeline/optimize_hyperparameters.py +18 -16
- openstef/pipeline/train_create_forecast_backtest.py +11 -1
- openstef/pipeline/train_model.py +23 -7
- openstef/pipeline/utils.py +3 -0
- openstef/postprocessing/postprocessing.py +29 -0
- openstef/settings.py +15 -0
- openstef/tasks/calculate_kpi.py +20 -17
- openstef/tasks/create_basecase_forecast.py +13 -5
- openstef/tasks/create_components_forecast.py +20 -4
- openstef/tasks/create_forecast.py +5 -2
- openstef/tasks/split_forecast.py +7 -0
- openstef/tasks/train_model.py +7 -5
- openstef/tasks/utils/taskcontext.py +7 -0
- openstef/validation/validation.py +27 -2
- {openstef-3.4.10.dist-info → openstef-3.4.29.dist-info}/METADATA +34 -38
- openstef-3.4.29.dist-info/RECORD +91 -0
- {openstef-3.4.10.dist-info → openstef-3.4.29.dist-info}/WHEEL +1 -1
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model.z +0 -0
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model.z.license +0 -3
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_features.z +0 -0
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_features.z.license +0 -3
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_scaler.z +0 -0
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_scaler.z.license +0 -3
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model.z +0 -0
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model.z.license +0 -3
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_features.z +0 -2
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_features.z.license +0 -3
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_scaler.z +0 -0
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_scaler.z.license +0 -3
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target.z +0 -0
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target.z.license +0 -3
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target_scaler.z +0 -6
- openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target_scaler.z.license +0 -3
- openstef/feature_engineering/historic_features.py +0 -40
- openstef/model/regressors/proloaf.py +0 -281
- openstef/tasks/run_tracy.py +0 -145
- openstef-3.4.10.dist-info/RECORD +0 -104
- {openstef-3.4.10.dist-info → openstef-3.4.29.dist-info}/LICENSE +0 -0
- {openstef-3.4.10.dist-info → openstef-3.4.29.dist-info}/top_level.txt +0 -0
    
        openstef/app_settings.py
    ADDED
    
    | @@ -0,0 +1,19 @@ | |
| 1 | 
            +
            # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
         | 
| 2 | 
            +
            #
         | 
| 3 | 
            +
            # SPDX-License-Identifier: MPL-2.0
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            from pydantic import Field
         | 
| 6 | 
            +
            from pydantic_settings import BaseSettings, SettingsConfigDict
         | 
| 7 | 
            +
             | 
| 8 | 
            +
             | 
| 9 | 
            +
            class AppSettings(BaseSettings):
         | 
| 10 | 
            +
                """Global app settings."""
         | 
| 11 | 
            +
             | 
| 12 | 
            +
                model_config = SettingsConfigDict(
         | 
| 13 | 
            +
                    env_prefix="openstef_", env_file=".env", extra="ignore"
         | 
| 14 | 
            +
                )
         | 
| 15 | 
            +
             | 
| 16 | 
            +
                post_teams_messages: bool = True
         | 
| 17 | 
            +
             | 
| 18 | 
            +
                # Logging settings.
         | 
| 19 | 
            +
                log_level: str = Field("INFO", description="Log level used for logging statements.")
         | 
| @@ -6,9 +6,9 @@ from typing import Optional, Union | |
| 6 6 |  | 
| 7 7 | 
             
            from pydantic.v1 import BaseModel
         | 
| 8 8 |  | 
| 9 | 
            +
            from openstef.data_classes.data_prep import DataPrepDataClass
         | 
| 9 10 | 
             
            from openstef.data_classes.model_specifications import ModelSpecificationDataClass
         | 
| 10 11 | 
             
            from openstef.data_classes.split_function import SplitFuncDataClass
         | 
| 11 | 
            -
            from openstef.data_classes.data_prep import DataPrepDataClass
         | 
| 12 12 | 
             
            from openstef.enums import PipelineType
         | 
| 13 13 |  | 
| 14 14 |  | 
| @@ -25,11 +25,15 @@ class PredictionJobDataClass(BaseModel): | |
| 25 25 | 
             
                    - ``"xgb_quantile"``
         | 
| 26 26 | 
             
                    - ``"lgb"``
         | 
| 27 27 | 
             
                    - ``"linear"``
         | 
| 28 | 
            -
                    - ``" | 
| 28 | 
            +
                    - ``"linear_quantile"``
         | 
| 29 | 
            +
                    - ``"xgb_multioutput_quantile"``
         | 
| 30 | 
            +
                    - ``"flatliner"``
         | 
| 29 31 |  | 
| 30 32 | 
             
                If unsure what to pick, choose ``"xgb"``.
         | 
| 31 33 |  | 
| 32 34 | 
             
                """
         | 
| 35 | 
            +
                model_kwargs: Optional[dict]
         | 
| 36 | 
            +
                """The model parameters that should be used."""
         | 
| 33 37 | 
             
                forecast_type: str
         | 
| 34 38 | 
             
                """The type of forecasts that should be made.
         | 
| 35 39 |  | 
| @@ -41,14 +45,14 @@ class PredictionJobDataClass(BaseModel): | |
| 41 45 | 
             
                If unsure what to pick, choose ``"demand"``.
         | 
| 42 46 |  | 
| 43 47 | 
             
                """
         | 
| 44 | 
            -
                horizon_minutes: int = 2880
         | 
| 45 | 
            -
                """The horizon of the desired forecast in minutes. Defaults to 2880 minutes (i.e. 2 days)."""
         | 
| 48 | 
            +
                horizon_minutes: Optional[int] = 2880
         | 
| 49 | 
            +
                """The horizon of the desired forecast in minutes used in tasks. Defaults to 2880 minutes (i.e. 2 days)."""
         | 
| 46 50 | 
             
                resolution_minutes: int
         | 
| 47 51 | 
             
                """The resolution of the desired forecast in minutes."""
         | 
| 48 | 
            -
                lat: float
         | 
| 49 | 
            -
                """Latitude of the forecasted location in degrees."""
         | 
| 50 | 
            -
                lon: float
         | 
| 51 | 
            -
                """Longitude of the forecasted location in degrees."""
         | 
| 52 | 
            +
                lat: Optional[float] = 52.132633
         | 
| 53 | 
            +
                """Latitude of the forecasted location in degrees. Used for fetching weather data in tasks, calculating derrived features and component splitting."""
         | 
| 54 | 
            +
                lon: Optional[float] = 5.291266
         | 
| 55 | 
            +
                """Longitude of the forecasted location in degrees. Used for fetching weather data in tasks, calculating derrived features and component splitting."""
         | 
| 52 56 | 
             
                name: str
         | 
| 53 57 | 
             
                """Name of the forecast, e.g. the location name."""
         | 
| 54 58 | 
             
                train_components: Optional[bool]
         | 
    
        openstef/enums.py
    CHANGED
    
    | @@ -8,10 +8,12 @@ from enum import Enum | |
| 8 8 | 
             
            class MLModelType(Enum):
         | 
| 9 9 | 
             
                XGB = "xgb"
         | 
| 10 10 | 
             
                XGB_QUANTILE = "xgb_quantile"
         | 
| 11 | 
            +
                XGB_MULTIOUTPUT_QUANTILE = "xgb_multioutput_quantile"
         | 
| 11 12 | 
             
                LGB = "lgb"
         | 
| 12 13 | 
             
                LINEAR = "linear"
         | 
| 13 | 
            -
                 | 
| 14 | 
            +
                LINEAR_QUANTILE = "linear_quantile"
         | 
| 14 15 | 
             
                ARIMA = "arima"
         | 
| 16 | 
            +
                FLATLINER = "flatliner"
         | 
| 15 17 |  | 
| 16 18 |  | 
| 17 19 | 
             
            class ForecastType(Enum):
         | 
| @@ -21,12 +23,6 @@ class ForecastType(Enum): | |
| 21 23 | 
             
                BASECASE = "basecase"
         | 
| 22 24 |  | 
| 23 25 |  | 
| 24 | 
            -
            class TracyJobResult(Enum):
         | 
| 25 | 
            -
                SUCCESS = "success"
         | 
| 26 | 
            -
                FAILED = "failed"
         | 
| 27 | 
            -
                UNKNOWN = "unknown"
         | 
| 28 | 
            -
             | 
| 29 | 
            -
             | 
| 30 26 | 
             
            class PipelineType(Enum):
         | 
| 31 27 | 
             
                FORECAST = "forecast"
         | 
| 32 28 | 
             
                TRAIN = "train"
         | 
    
        openstef/exceptions.py
    CHANGED
    
    
| @@ -14,9 +14,6 @@ Examples of features that are added: | |
| 14 14 | 
             
            import pandas as pd
         | 
| 15 15 |  | 
| 16 16 | 
             
            from openstef.data_classes.prediction_job import PredictionJobDataClass
         | 
| 17 | 
            -
            from openstef.feature_engineering.historic_features import (
         | 
| 18 | 
            -
                add_historic_load_as_a_feature,
         | 
| 19 | 
            -
            )
         | 
| 20 17 | 
             
            from openstef.feature_engineering.holiday_features import (
         | 
| 21 18 | 
             
                generate_holiday_feature_functions,
         | 
| 22 19 | 
             
            )
         | 
| @@ -69,9 +66,6 @@ def apply_features( | |
| 69 66 | 
             
                                        np.random.uniform(0.7,1.7, 200)))
         | 
| 70 67 |  | 
| 71 68 | 
             
                """
         | 
| 72 | 
            -
                # Add if needed the proloaf feature (historic_load)
         | 
| 73 | 
            -
                data = add_historic_load_as_a_feature(data, pj)
         | 
| 74 | 
            -
             | 
| 75 69 | 
             
                # Get lag feature functions
         | 
| 76 70 | 
             
                feature_functions = generate_lag_feature_functions(feature_names, horizon)
         | 
| 77 71 |  | 
| @@ -1,25 +1,27 @@ | |
| 1 1 | 
             
            # SPDX-FileCopyrightText: 2017-2023 Alliander N.V. <korte.termijn.prognoses@alliander.com> # noqa E501>
         | 
| 2 2 | 
             
            #
         | 
| 3 3 | 
             
            # SPDX-License-Identifier: MPL-2.0
         | 
| 4 | 
            -
            import  | 
| 5 | 
            -
             | 
| 4 | 
            +
            import logging
         | 
| 6 5 | 
             
            from abc import ABC, abstractmethod
         | 
| 6 | 
            +
            from datetime import timedelta
         | 
| 7 7 | 
             
            from typing import Optional
         | 
| 8 8 |  | 
| 9 9 | 
             
            import pandas as pd
         | 
| 10 | 
            -
             | 
| 10 | 
            +
            import structlog
         | 
| 11 | 
            +
             | 
| 11 12 | 
             
            from openstef.data_classes.model_specifications import ModelSpecificationDataClass
         | 
| 12 13 | 
             
            from openstef.data_classes.prediction_job import PredictionJobDataClass
         | 
| 13 | 
            -
            from openstef.model.regressors.regressor import OpenstfRegressor
         | 
| 14 14 | 
             
            from openstef.feature_engineering.feature_applicator import (
         | 
| 15 | 
            -
                TrainFeatureApplicator,
         | 
| 16 15 | 
             
                OperationalPredictFeatureApplicator,
         | 
| 16 | 
            +
                TrainFeatureApplicator,
         | 
| 17 17 | 
             
            )
         | 
| 18 18 | 
             
            from openstef.feature_engineering.general import (
         | 
| 19 19 | 
             
                enforce_feature_order,
         | 
| 20 20 | 
             
                remove_non_requested_feature_columns,
         | 
| 21 21 | 
             
            )
         | 
| 22 | 
            +
            from openstef.model.regressors.regressor import OpenstfRegressor
         | 
| 22 23 | 
             
            from openstef.pipeline.utils import generate_forecast_datetime_range
         | 
| 24 | 
            +
            from openstef.settings import Settings
         | 
| 23 25 |  | 
| 24 26 |  | 
| 25 27 | 
             
            class AbstractDataPreparation(ABC):
         | 
| @@ -120,6 +122,11 @@ class ARDataPreparation(AbstractDataPreparation): | |
| 120 122 | 
             
                def prepare_forecast_data(
         | 
| 121 123 | 
             
                    self, data: pd.DataFrame
         | 
| 122 124 | 
             
                ) -> tuple[pd.DataFrame, pd.DataFrame]:
         | 
| 125 | 
            +
                    structlog.configure(
         | 
| 126 | 
            +
                        wrapper_class=structlog.make_filtering_bound_logger(
         | 
| 127 | 
            +
                            logging.getLevelName(Settings.log_level)
         | 
| 128 | 
            +
                        )
         | 
| 129 | 
            +
                    )
         | 
| 123 130 | 
             
                    logger = structlog.get_logger(__name__)
         | 
| 124 131 | 
             
                    self.check_model()
         | 
| 125 132 | 
             
                    # Prep forecast input by selecting only the forecast datetime interval (this is much smaller than the input range)
         | 
| @@ -149,11 +149,7 @@ class TrainFeatureApplicator(AbstractFeatureApplicator): | |
| 149 149 |  | 
| 150 150 | 
             
                    # NOTE this is required since apply_features could add additional features
         | 
| 151 151 | 
             
                    if self.feature_names is not None:
         | 
| 152 | 
            -
                         | 
| 153 | 
            -
                        if pj.get("model") == "proloaf":
         | 
| 154 | 
            -
                            features = self.feature_names + ["historic_load"] + ["horizon"]
         | 
| 155 | 
            -
                        else:
         | 
| 156 | 
            -
                            features = self.feature_names + ["horizon"]
         | 
| 152 | 
            +
                        features = self.feature_names + ["horizon"]
         | 
| 157 153 | 
             
                        result = remove_non_requested_feature_columns(result, features)
         | 
| 158 154 |  | 
| 159 155 | 
             
                    # Sort all features except for the (first) load and (last) horizon columns
         | 
| @@ -3,10 +3,14 @@ | |
| 3 3 | 
             
            # SPDX-License-Identifier: MPL-2.0
         | 
| 4 4 | 
             
            """This modelu contains various helper functions."""
         | 
| 5 5 |  | 
| 6 | 
            +
            import logging
         | 
| 7 | 
            +
             | 
| 6 8 | 
             
            import numpy as np
         | 
| 7 9 | 
             
            import pandas as pd
         | 
| 8 10 | 
             
            import structlog
         | 
| 9 11 |  | 
| 12 | 
            +
            from openstef.settings import Settings
         | 
| 13 | 
            +
             | 
| 10 14 |  | 
| 11 15 | 
             
            def add_missing_feature_columns(
         | 
| 12 16 | 
             
                input_data: pd.DataFrame, features: list[str]
         | 
| @@ -30,6 +34,11 @@ def add_missing_feature_columns( | |
| 30 34 | 
             
                    Input dataframe with missing columns filled with ``np.N=nan``.
         | 
| 31 35 |  | 
| 32 36 | 
             
                """
         | 
| 37 | 
            +
                structlog.configure(
         | 
| 38 | 
            +
                    wrapper_class=structlog.make_filtering_bound_logger(
         | 
| 39 | 
            +
                        logging.getLevelName(Settings.log_level)
         | 
| 40 | 
            +
                    )
         | 
| 41 | 
            +
                )
         | 
| 33 42 | 
             
                logger = structlog.get_logger(__name__)
         | 
| 34 43 |  | 
| 35 44 | 
             
                if features is None:
         | 
| @@ -61,6 +70,11 @@ def remove_non_requested_feature_columns( | |
| 61 70 | 
             
                    Model input data with features.
         | 
| 62 71 |  | 
| 63 72 | 
             
                """
         | 
| 73 | 
            +
                structlog.configure(
         | 
| 74 | 
            +
                    wrapper_class=structlog.make_filtering_bound_logger(
         | 
| 75 | 
            +
                        logging.getLevelName(Settings.log_level)
         | 
| 76 | 
            +
                    )
         | 
| 77 | 
            +
                )
         | 
| 64 78 | 
             
                logger = structlog.get_logger(__name__)
         | 
| 65 79 |  | 
| 66 80 | 
             
                if requested_features is None:
         | 
| @@ -0,0 +1,99 @@ | |
| 1 | 
            +
            # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
         | 
| 2 | 
            +
            #
         | 
| 3 | 
            +
            # SPDX-License-Identifier: MPL-2.0
         | 
| 4 | 
            +
            from typing import Union, List, Optional
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            import numpy as np
         | 
| 7 | 
            +
            import pandas as pd
         | 
| 8 | 
            +
            from sklearn.impute import SimpleImputer
         | 
| 9 | 
            +
            from sklearn.preprocessing import FunctionTransformer
         | 
| 10 | 
            +
            from sklearn.utils.validation import check_array
         | 
| 11 | 
            +
             | 
| 12 | 
            +
             | 
| 13 | 
            +
            class MissingValuesTransformer:
         | 
| 14 | 
            +
                """MissingValuesTransformer handles missing values in data by imputing them with a given strategy.
         | 
| 15 | 
            +
             | 
| 16 | 
            +
                It also removes columns that are always null from the data.
         | 
| 17 | 
            +
             | 
| 18 | 
            +
                """
         | 
| 19 | 
            +
             | 
| 20 | 
            +
                in_feature_names: Optional[List[str]] = None
         | 
| 21 | 
            +
                _n_in_features: Optional[int] = None
         | 
| 22 | 
            +
             | 
| 23 | 
            +
                non_null_feature_names: List[str] = None
         | 
| 24 | 
            +
             | 
| 25 | 
            +
                def __init__(
         | 
| 26 | 
            +
                    self,
         | 
| 27 | 
            +
                    missing_values: Union[int, float, str, None] = np.nan,
         | 
| 28 | 
            +
                    imputation_strategy: str = None,
         | 
| 29 | 
            +
                    fill_value: Union[str, int, float] = None,
         | 
| 30 | 
            +
                ):
         | 
| 31 | 
            +
                    """Initialize missing values handler.
         | 
| 32 | 
            +
             | 
| 33 | 
            +
                    Args:
         | 
| 34 | 
            +
                        missing_values: The placeholder for the missing values. All occurrences of
         | 
| 35 | 
            +
                            `missing_values` will be imputed.
         | 
| 36 | 
            +
                        imputation_strategy: The imputation strategy to use
         | 
| 37 | 
            +
                            Can be one of "mean", "median", "most_frequent", "constant" or None.
         | 
| 38 | 
            +
                        fill_value: When strategy == "constant", fill_value is used to replace all
         | 
| 39 | 
            +
                            occurrences of missing_values.
         | 
| 40 | 
            +
             | 
| 41 | 
            +
                    """
         | 
| 42 | 
            +
                    self.missing_values = missing_values
         | 
| 43 | 
            +
                    self.imputation_strategy = imputation_strategy
         | 
| 44 | 
            +
                    self.fill_value = fill_value
         | 
| 45 | 
            +
             | 
| 46 | 
            +
                def fit(self, x, y=None):
         | 
| 47 | 
            +
                    """Fit the imputer on the input data."""
         | 
| 48 | 
            +
                    _ = check_array(x, force_all_finite="allow-nan")
         | 
| 49 | 
            +
                    if not isinstance(x, pd.DataFrame):
         | 
| 50 | 
            +
                        x = pd.DataFrame(np.asarray(x))
         | 
| 51 | 
            +
             | 
| 52 | 
            +
                    self.in_feature_names = list(x.columns)
         | 
| 53 | 
            +
                    self._n_in_features = x.shape[1]
         | 
| 54 | 
            +
             | 
| 55 | 
            +
                    # Remove always null columns
         | 
| 56 | 
            +
                    is_column_null = x.isnull().all(axis="index")
         | 
| 57 | 
            +
                    self.non_null_feature_names = list(x.columns[~is_column_null])
         | 
| 58 | 
            +
             | 
| 59 | 
            +
                    # Build the proper imputation transformer
         | 
| 60 | 
            +
                    # - Identity function if strategy is None
         | 
| 61 | 
            +
                    # - SimpleImputer with the dedicated strategy
         | 
| 62 | 
            +
                    if self.imputation_strategy is None:
         | 
| 63 | 
            +
                        self.imputer_ = FunctionTransformer(func=self._identity)
         | 
| 64 | 
            +
                    else:
         | 
| 65 | 
            +
                        self.imputer_ = SimpleImputer(
         | 
| 66 | 
            +
                            missing_values=self.missing_values,
         | 
| 67 | 
            +
                            strategy=self.imputation_strategy,
         | 
| 68 | 
            +
                            fill_value=self.fill_value,
         | 
| 69 | 
            +
                        ).set_output(transform="pandas")
         | 
| 70 | 
            +
             | 
| 71 | 
            +
                    # Imputers do not support labels
         | 
| 72 | 
            +
                    self.imputer_.fit(X=x, y=None)
         | 
| 73 | 
            +
             | 
| 74 | 
            +
                def transform(self, x) -> pd.DataFrame:
         | 
| 75 | 
            +
                    """Transform the input data by imputing missing values."""
         | 
| 76 | 
            +
                    _ = check_array(x, force_all_finite="allow-nan")
         | 
| 77 | 
            +
                    if not isinstance(x, pd.DataFrame):
         | 
| 78 | 
            +
                        x = pd.DataFrame(np.asarray(x))
         | 
| 79 | 
            +
             | 
| 80 | 
            +
                    x = x[self.non_null_feature_names]
         | 
| 81 | 
            +
             | 
| 82 | 
            +
                    return self.imputer_.transform(x)
         | 
| 83 | 
            +
             | 
| 84 | 
            +
                def fit_transform(self, x, y=None):
         | 
| 85 | 
            +
                    """Fit the imputer on the input data and transform it.
         | 
| 86 | 
            +
             | 
| 87 | 
            +
                    Returns:
         | 
| 88 | 
            +
                        The data with missing values imputed.
         | 
| 89 | 
            +
             | 
| 90 | 
            +
                    """
         | 
| 91 | 
            +
                    self.fit(x, y)
         | 
| 92 | 
            +
                    return self.transform(x)
         | 
| 93 | 
            +
             | 
| 94 | 
            +
                @classmethod
         | 
| 95 | 
            +
                def _identity(cls, x):
         | 
| 96 | 
            +
                    return x
         | 
| 97 | 
            +
             | 
| 98 | 
            +
                def __sklearn_is_fitted__(self) -> bool:
         | 
| 99 | 
            +
                    return self.in_feature_names is not None
         | 
| @@ -3,6 +3,7 @@ | |
| 3 3 | 
             
            # SPDX-License-Identifier: MPL-2.0
         | 
| 4 4 |  | 
| 5 5 | 
             
            """This module contains all wheather related functions used for feature engineering."""
         | 
| 6 | 
            +
            import logging
         | 
| 6 7 | 
             
            from typing import Union
         | 
| 7 8 |  | 
| 8 9 | 
             
            import numpy as np
         | 
| @@ -12,7 +13,13 @@ import structlog | |
| 12 13 | 
             
            from pvlib.location import Location
         | 
| 13 14 |  | 
| 14 15 | 
             
            from openstef.data_classes.prediction_job import PredictionJobDataClass
         | 
| 16 | 
            +
            from openstef.settings import Settings
         | 
| 15 17 |  | 
| 18 | 
            +
            structlog.configure(
         | 
| 19 | 
            +
                wrapper_class=structlog.make_filtering_bound_logger(
         | 
| 20 | 
            +
                    logging.getLevelName(Settings.log_level)
         | 
| 21 | 
            +
                )
         | 
| 22 | 
            +
            )
         | 
| 16 23 | 
             
            logger = structlog.get_logger(__name__)
         | 
| 17 24 |  | 
| 18 25 |  | 
    
        openstef/metrics/figure.py
    CHANGED
    
    
    
        openstef/metrics/metrics.py
    CHANGED
    
    | @@ -25,6 +25,9 @@ def get_eval_metric_function(metric_name: str) -> Callable: | |
| 25 25 | 
             
                Returns:
         | 
| 26 26 | 
             
                    Function to calculate the metric.
         | 
| 27 27 |  | 
| 28 | 
            +
                Raises:
         | 
| 29 | 
            +
                    KeyError: If the metric is not available.
         | 
| 30 | 
            +
             | 
| 28 31 | 
             
                """
         | 
| 29 32 | 
             
                evaluation_function = {
         | 
| 30 33 | 
             
                    "rmse": rmse,
         | 
| @@ -130,6 +133,9 @@ def r_mae_highest( | |
| 130 133 |  | 
| 131 134 | 
             
                The range is based on the load range of the previous two weeks.
         | 
| 132 135 |  | 
| 136 | 
            +
                Raises:
         | 
| 137 | 
            +
                    ValueError: If the length of the realised and forecast arrays are not equal.
         | 
| 138 | 
            +
             | 
| 133 139 | 
             
                """
         | 
| 134 140 | 
             
                # Check if length of both arrays is equal
         | 
| 135 141 | 
             
                if len(np.array(realised)) != len(np.array(forecast)):
         | 
| @@ -395,7 +401,7 @@ def xgb_quantile_obj( | |
| 395 401 | 
             
                Args:
         | 
| 396 402 | 
             
                    preds: numpy.ndarray
         | 
| 397 403 | 
             
                    dmatrix: xgboost.DMatrix
         | 
| 398 | 
            -
                    quantile: float
         | 
| 404 | 
            +
                    quantile: float between 0 and 1
         | 
| 399 405 |  | 
| 400 406 | 
             
                Returns:
         | 
| 401 407 | 
             
                    Gradient and Hessian
         | 
| @@ -425,3 +431,54 @@ def xgb_quantile_obj( | |
| 425 431 | 
             
                hess = np.ones_like(preds)
         | 
| 426 432 |  | 
| 427 433 | 
             
                return grad, hess
         | 
| 434 | 
            +
             | 
| 435 | 
            +
             | 
| 436 | 
            +
            def arctan_loss(y_true, y_pred, taus, s=0.1):
         | 
| 437 | 
            +
                """Compute the arctan pinball loss.
         | 
| 438 | 
            +
             | 
| 439 | 
            +
                Note that XGBoost outputs the predictions in a slightly peculiar manner.
         | 
| 440 | 
            +
                Suppose we have 100 data points and we predict 10 quantiles. The predictions
         | 
| 441 | 
            +
                will be an array of size (1000 x 1). We first resize this to a (100x10) array
         | 
| 442 | 
            +
                where each row corresponds to the 10 predicted quantile for a single data
         | 
| 443 | 
            +
                point. We then use a for-loop (over the 10 columns) to calculate the gradients
         | 
| 444 | 
            +
                and second derivatives. Legibility was chosen over efficiency. This part
         | 
| 445 | 
            +
                can be made more efficient.
         | 
| 446 | 
            +
             | 
| 447 | 
            +
                Args:
         | 
| 448 | 
            +
                    y_true: An array containing the true observations.
         | 
| 449 | 
            +
                    y_pred: An array containing the predicted quantiles.
         | 
| 450 | 
            +
                    taus: A list containing the true desired coverage of the quantiles.
         | 
| 451 | 
            +
                    s: A smoothing parameter.
         | 
| 452 | 
            +
             | 
| 453 | 
            +
                Returns:
         | 
| 454 | 
            +
                    grad: An array containing the (negative) gradients with respect to y_pred.
         | 
| 455 | 
            +
                    hess: An array containing the second derivative with respect to y_pred.
         | 
| 456 | 
            +
             | 
| 457 | 
            +
                """
         | 
| 458 | 
            +
                size = len(y_true)
         | 
| 459 | 
            +
                n_dim = len(taus)  # The number of columns
         | 
| 460 | 
            +
                n_rows = size // n_dim
         | 
| 461 | 
            +
             | 
| 462 | 
            +
                # Resize the predictions and targets.
         | 
| 463 | 
            +
                # Each column corresponds to a quantile, each row to a data point.
         | 
| 464 | 
            +
                y_pred = np.reshape(y_pred, (n_rows, n_dim))
         | 
| 465 | 
            +
                y_true = np.reshape(y_true, (n_rows, n_dim))
         | 
| 466 | 
            +
             | 
| 467 | 
            +
                # Calculate the differences
         | 
| 468 | 
            +
                u = y_true - y_pred
         | 
| 469 | 
            +
             | 
| 470 | 
            +
                # Calculate the gradient and second derivatives
         | 
| 471 | 
            +
                grad = np.zeros_like(y_pred)
         | 
| 472 | 
            +
                hess = np.zeros_like(y_pred)
         | 
| 473 | 
            +
                z = u / s
         | 
| 474 | 
            +
                for i, tau in enumerate(taus):
         | 
| 475 | 
            +
                    x = 1 + z[:, i] ** 2
         | 
| 476 | 
            +
                    grad[:, i] = (
         | 
| 477 | 
            +
                        tau - 0.5 + 1 / np.pi * np.arctan(z[:, i]) + z[:, i] / (np.pi) * x**-1
         | 
| 478 | 
            +
                    )
         | 
| 479 | 
            +
                    hess[:, i] = 2 / (np.pi * s) * x ** (-2)
         | 
| 480 | 
            +
             | 
| 481 | 
            +
                # Reshape back to the original shape.
         | 
| 482 | 
            +
                grad = grad.reshape(size)
         | 
| 483 | 
            +
                hess = hess.reshape(size)
         | 
| 484 | 
            +
                return -grad / n_dim, hess / n_dim
         | 
    
        openstef/metrics/reporter.py
    CHANGED
    
    | @@ -2,6 +2,7 @@ | |
| 2 2 | 
             
            #
         | 
| 3 3 | 
             
            # SPDX-License-Identifier: MPL-2.0
         | 
| 4 4 | 
             
            """Defines reporter class."""
         | 
| 5 | 
            +
            import logging
         | 
| 5 6 | 
             
            import os
         | 
| 6 7 | 
             
            import warnings
         | 
| 7 8 | 
             
            from dataclasses import dataclass
         | 
| @@ -16,6 +17,7 @@ from plotly.graph_objects import Figure | |
| 16 17 | 
             
            from openstef.metrics import figure
         | 
| 17 18 | 
             
            from openstef.metrics.metrics import bias, mae, nsme, r_mae, rmse
         | 
| 18 19 | 
             
            from openstef.model.regressors.regressor import OpenstfRegressor
         | 
| 20 | 
            +
            from openstef.settings import Settings
         | 
| 19 21 |  | 
| 20 22 |  | 
| 21 23 | 
             
            @dataclass
         | 
| @@ -167,6 +169,11 @@ class Reporter: | |
| 167 169 | 
             
                def write_report_to_disk(report: Report, report_folder: str):
         | 
| 168 170 | 
             
                    """Write report to disk; e.g. for viewing report of latest models using grafana."""
         | 
| 169 171 | 
             
                    # Initialize logger and serializer
         | 
| 172 | 
            +
                    structlog.configure(
         | 
| 173 | 
            +
                        wrapper_class=structlog.make_filtering_bound_logger(
         | 
| 174 | 
            +
                            logging.getLevelName(Settings.log_level)
         | 
| 175 | 
            +
                        )
         | 
| 176 | 
            +
                    )
         | 
| 170 177 | 
             
                    logger = structlog.get_logger(__name__)
         | 
| 171 178 | 
             
                    if report_folder:
         | 
| 172 179 | 
             
                        # create path if does not exist
         | 
| @@ -1,6 +1,7 @@ | |
| 1 1 | 
             
            # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
         | 
| 2 2 | 
             
            #
         | 
| 3 3 | 
             
            # SPDX-License-Identifier: MPL-2.0
         | 
| 4 | 
            +
            import logging
         | 
| 4 5 | 
             
            from datetime import datetime
         | 
| 5 6 |  | 
| 6 7 | 
             
            import numpy as np
         | 
| @@ -11,12 +12,18 @@ from sklearn.base import RegressorMixin | |
| 11 12 |  | 
| 12 13 | 
             
            from openstef.data_classes.prediction_job import PredictionJobDataClass
         | 
| 13 14 | 
             
            from openstef.exceptions import ModelWithoutStDev
         | 
| 15 | 
            +
            from openstef.settings import Settings
         | 
| 14 16 |  | 
| 15 17 |  | 
| 16 18 | 
             
            class ConfidenceIntervalApplicator:
         | 
| 17 19 | 
             
                def __init__(self, model: RegressorMixin, forecast_input_data: pd.DataFrame):
         | 
| 18 20 | 
             
                    self.model = model
         | 
| 19 21 | 
             
                    self.forecast_input_data = forecast_input_data
         | 
| 22 | 
            +
                    structlog.configure(
         | 
| 23 | 
            +
                        wrapper_class=structlog.make_filtering_bound_logger(
         | 
| 24 | 
            +
                            logging.getLevelName(Settings.log_level)
         | 
| 25 | 
            +
                        )
         | 
| 26 | 
            +
                    )
         | 
| 20 27 | 
             
                    self.logger = structlog.get_logger(self.__class__.__name__)
         | 
| 21 28 |  | 
| 22 29 | 
             
                def add_confidence_interval(
         | 
| @@ -54,9 +61,24 @@ class ConfidenceIntervalApplicator: | |
| 54 61 | 
             
                    temp_forecast = self._add_standard_deviation_to_forecast(forecast)
         | 
| 55 62 |  | 
| 56 63 | 
             
                    if self.model.can_predict_quantiles:
         | 
| 57 | 
            -
                         | 
| 58 | 
            -
             | 
| 59 | 
            -
             | 
| 64 | 
            +
                        # Try to generate the quantiles that were requested
         | 
| 65 | 
            +
                        try:
         | 
| 66 | 
            +
                            result = self._add_quantiles_to_forecast_quantile_regression(
         | 
| 67 | 
            +
                                temp_forecast, pj["quantiles"]
         | 
| 68 | 
            +
                            )
         | 
| 69 | 
            +
                            return result
         | 
| 70 | 
            +
                        except Exception:
         | 
| 71 | 
            +
                            # Fallback on quantiles of the model if the requested quantiles cant be generated by the model.
         | 
| 72 | 
            +
                            # Can happen when the model was trained on different quantiles than are requested
         | 
| 73 | 
            +
                            result = self._add_quantiles_to_forecast_quantile_regression(
         | 
| 74 | 
            +
                                temp_forecast, self.model.quantiles
         | 
| 75 | 
            +
                            )
         | 
| 76 | 
            +
                            self.logger.warning(
         | 
| 77 | 
            +
                                "Quantiles are requested the model was not trained on. Using the quantiles the model was trained on",
         | 
| 78 | 
            +
                                requested_quantiles=pj["quantiles"],
         | 
| 79 | 
            +
                                trained_quantiles=self.model.quantiles,
         | 
| 80 | 
            +
                            )
         | 
| 81 | 
            +
                            return result
         | 
| 60 82 |  | 
| 61 83 | 
             
                    return self._add_quantiles_to_forecast_default(temp_forecast, pj["quantiles"])
         | 
| 62 84 |  | 
| @@ -74,6 +96,9 @@ class ConfidenceIntervalApplicator: | |
| 74 96 | 
             
                        Forecast with added standard deviation. DataFrame with columns:
         | 
| 75 97 | 
             
                            "forecast", "stdev"
         | 
| 76 98 |  | 
| 99 | 
            +
                    Raises:
         | 
| 100 | 
            +
                        ModelWithoutStDev: If the model does not have a valid standard deviation.
         | 
| 101 | 
            +
             | 
| 77 102 | 
             
                    """
         | 
| 78 103 | 
             
                    minimal_resolution: int = 15  # Minimal time resolution in minutes
         | 
| 79 104 | 
             
                    standard_deviation = self.model.standard_deviation
         | 
    
        openstef/model/model_creator.py
    CHANGED
    
    | @@ -1,25 +1,32 @@ | |
| 1 1 | 
             
            # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
         | 
| 2 2 | 
             
            #
         | 
| 3 3 | 
             
            # SPDX-License-Identifier: MPL-2.0
         | 
| 4 | 
            +
            import logging
         | 
| 4 5 | 
             
            from typing import Union
         | 
| 5 6 |  | 
| 6 7 | 
             
            import structlog
         | 
| 7 8 |  | 
| 8 9 | 
             
            from openstef.enums import MLModelType
         | 
| 10 | 
            +
            from openstef.model.regressors.arima import ARIMAOpenstfRegressor
         | 
| 9 11 | 
             
            from openstef.model.regressors.custom_regressor import is_custom_type, load_custom_model
         | 
| 10 12 | 
             
            from openstef.model.regressors.lgbm import LGBMOpenstfRegressor
         | 
| 11 13 | 
             
            from openstef.model.regressors.linear import LinearOpenstfRegressor
         | 
| 14 | 
            +
            from openstef.model.regressors.linear_quantile import LinearQuantileOpenstfRegressor
         | 
| 12 15 | 
             
            from openstef.model.regressors.regressor import OpenstfRegressor
         | 
| 16 | 
            +
            from openstef.model.regressors.flatliner import FlatlinerRegressor
         | 
| 13 17 | 
             
            from openstef.model.regressors.xgb import XGBOpenstfRegressor
         | 
| 14 18 | 
             
            from openstef.model.regressors.xgb_quantile import XGBQuantileOpenstfRegressor
         | 
| 15 | 
            -
            from openstef.model.regressors. | 
| 19 | 
            +
            from openstef.model.regressors.xgb_multioutput_quantile import (
         | 
| 20 | 
            +
                XGBMultiOutputQuantileOpenstfRegressor,
         | 
| 21 | 
            +
            )
         | 
| 22 | 
            +
            from openstef.settings import Settings
         | 
| 16 23 |  | 
| 24 | 
            +
            structlog.configure(
         | 
| 25 | 
            +
                wrapper_class=structlog.make_filtering_bound_logger(
         | 
| 26 | 
            +
                    logging.getLevelName(Settings.log_level)
         | 
| 27 | 
            +
                )
         | 
| 28 | 
            +
            )
         | 
| 17 29 | 
             
            logger = structlog.get_logger(__name__)
         | 
| 18 | 
            -
            try:
         | 
| 19 | 
            -
                from openstef.model.regressors.proloaf import OpenstfProloafRegressor
         | 
| 20 | 
            -
            except ImportError:
         | 
| 21 | 
            -
                logger.info("Proloaf not available, setting constructor to None")
         | 
| 22 | 
            -
                OpenstfProloafRegressor = None
         | 
| 23 30 |  | 
| 24 31 | 
             
            valid_model_kwargs = {
         | 
| 25 32 | 
             
                MLModelType.XGB: [
         | 
| @@ -84,32 +91,32 @@ valid_model_kwargs = { | |
| 84 91 | 
             
                    "max_depth",
         | 
| 85 92 | 
             
                    "early_stopping_rounds",
         | 
| 86 93 | 
             
                ],
         | 
| 87 | 
            -
                MLModelType. | 
| 88 | 
            -
                    " | 
| 89 | 
            -
                    " | 
| 90 | 
            -
                    " | 
| 91 | 
            -
                    " | 
| 92 | 
            -
                    " | 
| 93 | 
            -
                    " | 
| 94 | 
            -
                    " | 
| 95 | 
            -
                    " | 
| 96 | 
            -
                    "training_metric",
         | 
| 97 | 
            -
                    "metric_options",
         | 
| 98 | 
            -
                    "optimizer_name",
         | 
| 99 | 
            -
                    "early_stopping_patience",
         | 
| 100 | 
            -
                    "early_stopping_margin",
         | 
| 101 | 
            -
                    "learning_rate",
         | 
| 102 | 
            -
                    "max_epochs",
         | 
| 103 | 
            -
                    "device",
         | 
| 104 | 
            -
                    "batch_size",
         | 
| 105 | 
            -
                    "history_horizon",
         | 
| 106 | 
            -
                    "horizon_minutes",
         | 
| 94 | 
            +
                MLModelType.XGB_MULTIOUTPUT_QUANTILE: [
         | 
| 95 | 
            +
                    "quantiles",
         | 
| 96 | 
            +
                    "gamma",
         | 
| 97 | 
            +
                    "colsample_bytree",
         | 
| 98 | 
            +
                    "subsample",
         | 
| 99 | 
            +
                    "min_child_weight",
         | 
| 100 | 
            +
                    "max_depth",
         | 
| 101 | 
            +
                    "early_stopping_rounds",
         | 
| 102 | 
            +
                    "arctan_smoothing",
         | 
| 107 103 | 
             
                ],
         | 
| 108 104 | 
             
                MLModelType.LINEAR: [
         | 
| 109 105 | 
             
                    "missing_values",
         | 
| 110 106 | 
             
                    "imputation_strategy",
         | 
| 111 107 | 
             
                    "fill_value",
         | 
| 112 108 | 
             
                ],
         | 
| 109 | 
            +
                MLModelType.FLATLINER: [
         | 
| 110 | 
            +
                    "quantiles",
         | 
| 111 | 
            +
                ],
         | 
| 112 | 
            +
                MLModelType.LINEAR_QUANTILE: [
         | 
| 113 | 
            +
                    "alpha",
         | 
| 114 | 
            +
                    "quantiles",
         | 
| 115 | 
            +
                    "solver",
         | 
| 116 | 
            +
                    "missing_values",
         | 
| 117 | 
            +
                    "imputation_strategy",
         | 
| 118 | 
            +
                    "fill_value",
         | 
| 119 | 
            +
                ],
         | 
| 113 120 | 
             
                MLModelType.ARIMA: [
         | 
| 114 121 | 
             
                    "backtest_max_horizon",
         | 
| 115 122 | 
             
                    "order",
         | 
| @@ -127,9 +134,11 @@ class ModelCreator: | |
| 127 134 | 
             
                    MLModelType.XGB: XGBOpenstfRegressor,
         | 
| 128 135 | 
             
                    MLModelType.LGB: LGBMOpenstfRegressor,
         | 
| 129 136 | 
             
                    MLModelType.XGB_QUANTILE: XGBQuantileOpenstfRegressor,
         | 
| 130 | 
            -
                    MLModelType. | 
| 137 | 
            +
                    MLModelType.XGB_MULTIOUTPUT_QUANTILE: XGBMultiOutputQuantileOpenstfRegressor,
         | 
| 131 138 | 
             
                    MLModelType.LINEAR: LinearOpenstfRegressor,
         | 
| 139 | 
            +
                    MLModelType.LINEAR_QUANTILE: LinearQuantileOpenstfRegressor,
         | 
| 132 140 | 
             
                    MLModelType.ARIMA: ARIMAOpenstfRegressor,
         | 
| 141 | 
            +
                    MLModelType.FLATLINER: FlatlinerRegressor,
         | 
| 133 142 | 
             
                }
         | 
| 134 143 |  | 
| 135 144 | 
             
                @staticmethod
         |