PyPI - openstef - Versions diffs - 3.4.10__py3-none-any.whl → 3.4.44__py3-none-any.whl - Mend

openstef 3.4.10py3-none-any.whl → 3.4.44py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (81) hide show

openstef/app_settings.py +19 -0
openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z +0 -0
openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md +18 -0
openstef/data/dutch_holidays.csv +1759 -0
openstef/data_classes/data_prep.py +1 -1
openstef/data_classes/prediction_job.py +15 -9
openstef/enums.py +108 -9
openstef/exceptions.py +1 -1
openstef/feature_engineering/apply_features.py +25 -6
openstef/feature_engineering/bidding_zone_to_country_mapping.py +106 -0
openstef/feature_engineering/cyclic_features.py +102 -0
openstef/feature_engineering/data_preparation.py +12 -5
openstef/feature_engineering/feature_applicator.py +1 -5
openstef/feature_engineering/general.py +14 -0
openstef/feature_engineering/holiday_features.py +35 -26
openstef/feature_engineering/missing_values_transformer.py +141 -0
openstef/feature_engineering/weather_features.py +7 -0
openstef/metrics/figure.py +3 -0
openstef/metrics/metrics.py +58 -1
openstef/metrics/reporter.py +7 -0
openstef/model/confidence_interval_applicator.py +28 -3
openstef/model/model_creator.py +54 -41
openstef/model/objective.py +17 -34
openstef/model/objective_creator.py +13 -12
openstef/model/regressors/arima.py +1 -1
openstef/model/regressors/dazls.py +35 -96
openstef/model/regressors/flatliner.py +95 -0
openstef/model/regressors/linear_quantile.py +296 -0
openstef/model/regressors/xgb.py +23 -0
openstef/model/regressors/xgb_multioutput_quantile.py +261 -0
openstef/model/regressors/xgb_quantile.py +3 -0
openstef/model/serializer.py +10 -0
openstef/model_selection/model_selection.py +4 -1
openstef/monitoring/performance_meter.py +1 -2
openstef/monitoring/teams.py +11 -0
openstef/pipeline/create_basecase_forecast.py +11 -1
openstef/pipeline/create_component_forecast.py +24 -28
openstef/pipeline/create_forecast.py +20 -1
openstef/pipeline/optimize_hyperparameters.py +18 -16
openstef/pipeline/train_create_forecast_backtest.py +11 -1
openstef/pipeline/train_model.py +31 -12
openstef/pipeline/utils.py +3 -0
openstef/postprocessing/postprocessing.py +29 -0
openstef/settings.py +15 -0
openstef/tasks/calculate_kpi.py +23 -20
openstef/tasks/create_basecase_forecast.py +15 -7
openstef/tasks/create_components_forecast.py +24 -8
openstef/tasks/create_forecast.py +9 -6
openstef/tasks/create_solar_forecast.py +4 -4
openstef/tasks/optimize_hyperparameters.py +2 -2
openstef/tasks/split_forecast.py +9 -2
openstef/tasks/train_model.py +9 -7
openstef/tasks/utils/taskcontext.py +7 -0
openstef/validation/validation.py +28 -3
{openstef-3.4.10.dist-info → openstef-3.4.44.dist-info}/METADATA +65 -57
openstef-3.4.44.dist-info/RECORD +97 -0
{openstef-3.4.10.dist-info → openstef-3.4.44.dist-info}/WHEEL +1 -1
openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model.z +0 -0
openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_features.z +0 -0
openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_scaler.z +0 -0
openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model.z +0 -0
openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model.z.license +0 -3
openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_features.z +0 -2
openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_features.z.license +0 -3
openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_scaler.z +0 -0
openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_domain_model_scaler.z.license +0 -3
openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target.z +0 -0
openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target.z.license +0 -3
openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target_scaler.z +0 -6
openstef/data/dazls_model_3.4.0/dazls_stored_3.4.0_target_scaler.z.license +0 -3
openstef/data/dutch_holidays_2020-2022.csv +0 -831
openstef/data/dutch_holidays_2020-2022.csv.license +0 -3
openstef/feature_engineering/historic_features.py +0 -40
openstef/model/regressors/proloaf.py +0 -281
openstef/tasks/run_tracy.py +0 -145
openstef-3.4.10.dist-info/RECORD +0 -104
/openstef/data/{dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model.z.license → dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z.license} +0 -0
/openstef/data/{dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_features.z.license → dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md.license} +0 -0
/openstef/data/{dazls_model_3.4.0/dazls_stored_3.4.0_adaptation_model_scaler.z.license → dutch_holidays.csv.license} +0 -0
{openstef-3.4.10.dist-info → openstef-3.4.44.dist-info}/LICENSE +0 -0
{openstef-3.4.10.dist-info → openstef-3.4.44.dist-info}/top_level.txt +0 -0

openstef/pipeline/create_basecase_forecast.py CHANGED Viewed

@@ -1,13 +1,14 @@
 # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
 #
 # SPDX-License-Identifier: MPL-2.0
+import logging
 from pathlib import Path
 import pandas as pd
 import structlog
 from openstef.data_classes.prediction_job import PredictionJobDataClass
-from openstef.exceptions import NoRealisedLoadError, InputDataOngoingZeroFlatlinerError
+from openstef.exceptions import InputDataOngoingZeroFlatlinerError, NoRealisedLoadError
 from openstef.feature_engineering.feature_applicator import (
     OperationalPredictFeatureApplicator,
 )
@@ -18,6 +19,7 @@ from openstef.postprocessing.postprocessing import (
     add_components_base_case_forecast,
     add_prediction_job_properties_to_forecast,
 )
+from openstef.settings import Settings
 from openstef.validation import validation
 MODEL_LOCATION = Path(".")
@@ -38,7 +40,15 @@ def create_basecase_forecast_pipeline(
     Returns:
         Base case forecast
+    Raises:
+        NoRealisedLoadError: When no realised load for given datetime range.
     """
+    structlog.configure(
+        wrapper_class=structlog.make_filtering_bound_logger(
+            logging.getLevelName(Settings.log_level)
+        )
+    )
     logger = structlog.get_logger(__name__)
     logger.info("Preprocessing data for basecase forecast")

openstef/pipeline/create_component_forecast.py CHANGED Viewed

@@ -2,7 +2,10 @@
 #
 # SPDX-License-Identifier: MPL-2.0
+import logging
 import joblib
+import numpy as np
 import pandas as pd
 import structlog
@@ -11,12 +14,11 @@ from openstef import PROJECT_ROOT
 from openstef.data_classes.prediction_job import PredictionJobDataClass
 from openstef.enums import ForecastType
 from openstef.model.regressors.dazls import Dazls
-import numpy as np
+from openstef.settings import Settings
 # Set the path for the Dazls stored model
 DAZLS_STORED = str(
-    PROJECT_ROOT / "openstef" / "data" / "dazls_model_3.4.0" / "dazls_stored_3.4.0_"
+    PROJECT_ROOT / "openstef" / "data" / "dazls_model_3.4.24" / "dazls_stored_3.4.24_"
 )
@@ -96,47 +98,34 @@ def create_components_forecast_pipeline(
         "algtype"
     """
+    structlog.configure(
+        wrapper_class=structlog.make_filtering_bound_logger(
+            logging.getLevelName(Settings.log_level)
+        )
+    )
     logger = structlog.get_logger(__name__)
     logger.info("Make components prediction", pid=pj["id"])
     # Make component forecasts
     try:
-        input_data = create_input(pj, input_data, weather_data)
+        dazls_input_data = create_input(pj, input_data, weather_data)
         # Save and load the model as .sav file (or as .z file)
         # For the code contact: korte.termijn.prognoses@alliander.com
         dazls_model = Dazls()
-        dazls_model.domain_model = joblib.load(DAZLS_STORED + "domain_model.z")
-        dazls_model.domain_model_scaler = joblib.load(
-            DAZLS_STORED + "domain_model_scaler.z"
-        )
-        dazls_model.domain_model_input_columns = joblib.load(
-            DAZLS_STORED + "domain_model_features.z"
-        )
+        dazls_model.model_ = joblib.load(DAZLS_STORED + "baseline_model.z")
-        dazls_model.adaptation_model = joblib.load(DAZLS_STORED + "adaptation_model.z")
-        dazls_model.adaptation_model_scaler = joblib.load(
-            DAZLS_STORED + "adaptation_model_scaler.z"
-        )
-        dazls_model.adaptation_model_input_columns = joblib.load(
-            DAZLS_STORED + "adaptation_model_features.z"
-        )
-        dazls_model.target_columns = joblib.load(DAZLS_STORED + "target.z")
-        dazls_model.target_scaler = joblib.load(DAZLS_STORED + "target_scaler.z")
-        logger = structlog.get_logger(__name__)
         logger.info("DAZLS model loaded", dazls_model=str(dazls_model))
         # Use the predict function of Dazls model
         # As input data we use the input_data function which takes into consideration what we want as an input for the forecast and what Dazls can accept as an input
-        forecasts = dazls_model.predict(x=input_data)
+        forecasts = dazls_model.predict(x=dazls_input_data)
         # Set the columns for the output forecast dataframe
         forecasts = pd.DataFrame(
             forecasts,
             columns=["forecast_wind_on_shore", "forecast_solar"],
-            index=input_data.index,
+            index=dazls_input_data.index,
         )
         # Make post-processed forecasts for solar and wind power
@@ -151,18 +140,25 @@ def create_components_forecast_pipeline(
         # Make forecast for the component: "forecast_other"
         forecasts["forecast_other"] = (
-            input_data["total_load"]
+            dazls_input_data["total_load"]
             - forecasts["forecast_solar"]
             - forecasts["forecast_wind_on_shore"]
         )
+        # Make sure the forecasts have the same form as the input data. Pad with 0 if necessary
+        forecasts = forecasts.reindex(index=input_data.index, fill_value=0)
     except Exception as e:
-        # In case something goes wrong we fall back on aan empty dataframe
+        # In case something goes wrong we fall back on an a zero-filled dataframe
         logger.warning(
             f"Could not make component forecasts: {e}, falling back on series of"
             " zeros!",
             exc_info=e,
         )
-        forecasts = pd.DataFrame()
+        forecasts = pd.DataFrame(
+            data=0,
+            index=input_data.index,
+            columns=["forecast_wind_on_shore", "forecast_solar", "forecast_other"],
+        )
     # Prepare for output
     # Add more prediction properties to the forecast ("pid","customer","description","type","algtype)

openstef/pipeline/create_forecast.py CHANGED Viewed

@@ -1,6 +1,8 @@
 # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
 #
 # SPDX-License-Identifier: MPL-2.0
+import logging
 import pandas as pd
 import structlog
@@ -16,7 +18,9 @@ from openstef.model.serializer import MLflowSerializer
 from openstef.pipeline.utils import generate_forecast_datetime_range
 from openstef.postprocessing.postprocessing import (
     add_prediction_job_properties_to_forecast,
+    sort_quantiles,
 )
+from openstef.settings import Settings
 from openstef.validation import validation
@@ -40,6 +44,10 @@ def create_forecast_pipeline(
     Returns:
         DataFrame with the forecast
+    Raises:
+        InputDataOngoingZeroFlatlinerError: When all recent load measurements are zero.
+        LookupError: When no model is found for the given prediction job in MLflow.
     """
     prediction_model_pid = pj["id"]
     # Use the alternative forecast model if it's specify in the pj
@@ -64,7 +72,7 @@ def create_forecast_pipeline_core(
     Computes the forecasts and confidence intervals given a prediction job and input data.
     This pipeline has no database or persisitent storage dependencies.
-    Expected prediction job keys: "resolution_minutes", "horizon_minutes", "id", "type",
+    Expected prediction job keys: "resolution_minutes", "id", "type",
         "name", "quantiles"
     Args:
@@ -76,7 +84,15 @@ def create_forecast_pipeline_core(
     Returns:
         Forecast
+    Raises:
+        InputDataOngoingZeroFlatlinerError: When all recent load measurements are zero.
     """
+    structlog.configure(
+        wrapper_class=structlog.make_filtering_bound_logger(
+            logging.getLevelName(Settings.log_level)
+        )
+    )
     logger = structlog.get_logger(__name__)
     fallback_strategy = "extreme_day"  # this can later be expanded
@@ -142,6 +158,9 @@ def create_forecast_pipeline_core(
         model, forecast_input_data
     ).add_confidence_interval(forecast, pj)
+    # Sort quantiles - prevents crossing and is statistically sound
+    forecast = sort_quantiles(forecast)
     # Prepare for output
     forecast = add_prediction_job_properties_to_forecast(
         pj,

openstef/pipeline/optimize_hyperparameters.py CHANGED Viewed

@@ -1,8 +1,9 @@
 # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
 #
 # SPDX-License-Identifier: MPL-2.0
+import logging
 import os
-from typing import Any, Union
+from typing import Any
 import optuna
 import pandas as pd
@@ -21,16 +22,22 @@ from openstef.model.objective import RegressorObjective
 from openstef.model.objective_creator import ObjectiveCreator
 from openstef.model.regressors.regressor import OpenstfRegressor
 from openstef.model.serializer import MLflowSerializer
+from openstef.model_selection.model_selection import split_data_train_validation_test
 from openstef.pipeline.train_model import (
     DEFAULT_TRAIN_HORIZONS_HOURS,
     train_model_pipeline_core,
 )
+from openstef.settings import Settings
 from openstef.validation import validation
-from openstef.model_selection.model_selection import split_data_train_validation_test
 optuna.logging.enable_propagation()  # Propagate logs to the root logger.
 optuna.logging.disable_default_handler()  # Stop showing logs in sys.stderr.
+structlog.configure(
+    wrapper_class=structlog.make_filtering_bound_logger(
+        logging.getLevelName(Settings.log_level)
+    )
+)
 logger = structlog.get_logger(__name__)
 # See https://optuna.readthedocs.io/en/stable/reference/generated/optuna.study.Study.html#optuna.study.Study.optimize
@@ -59,6 +66,9 @@ def optimize_hyperparameters_pipeline(
     Raises:
         ValueError: If the input_date is insufficient.
+        InputDataInsufficientError: If the input dataframe is empty.
+        InputDataWrongColumnOrderError: If the load column is missing in the input dataframe.
+        OldModelHigherScoreError: When old model is better than new model.
     Returns:
         Optimized hyperparameters.
@@ -119,6 +129,10 @@ def optimize_hyperparameters_pipeline_core(
     Raises:
         ValueError: If the input_date is insufficient.
+        InputDataInsufficientError: If the input dataframe is empty.
+        InputDataWrongColumnOrderError: If the load column is missing in the input dataframe.
+        OldModelHigherScoreError: When old model is better than new model.
+        InputDataOngoingZeroFlatlinerError: When all recent load measurements are zero.
     Returns:
         - Best model,
@@ -175,18 +189,6 @@ def optimize_hyperparameters_pipeline_core(
         horizons=horizons, feature_names=feature_names, feature_modules=feature_modules
     ).add_features(validated_data, pj=pj)
-    # Adds additional proloaf features to the input data, historic_load (equal to the load, first column)
-    if pj["model"] == "proloaf" and "historic_load" not in list(
-        validated_data_with_features.columns
-    ):
-        validated_data_with_features[
-            "historic_load"
-        ] = validated_data_with_features.iloc[:, 0]
-        # Make sure horizons is last column
-        temp_cols = validated_data_with_features.columns.tolist()
-        new_cols = temp_cols[:-2] + [temp_cols[-1]] + [temp_cols[-2]]
-        validated_data_with_features = validated_data_with_features[new_cols]
     # Create objective (NOTE: this is a callable class)
     objective = ObjectiveCreator.create_objective(model_type=pj["model"])
@@ -245,7 +247,7 @@ def optuna_optimization(
         - The objective object used by optuna
     """
-    model = ModelCreator.create_model(pj["model"])
+    model = ModelCreator.create_model(pj["model"], **(pj.model_kwargs or {}))
     # Apply set to default hyperparameters if they are specified in the pj
     if pj.default_modelspecs:
         valid_hyper_parameters = {
@@ -268,7 +270,7 @@ def optuna_optimization(
     if pj.train_split_func is None:
         split_func = split_data_train_validation_test
         split_args = {
-            "stratification_min_max": pj["model"] != "proloaf",
+            "stratification_min_max": True,
             "back_test": True,
         }
     else:

openstef/pipeline/train_create_forecast_backtest.py CHANGED Viewed

@@ -56,10 +56,16 @@ def train_model_and_forecast_back_test(
         - Validation data sets (list[pd.DataFrame])
         - Test data sets (list[pd.DataFrame])
+    Raises:
+        InputDataInsufficientError: when input data is insufficient.
+        InputDataWrongColumnOrderError: when input data has a invalid column order.
+        ValueError: when the horizon is a string and the corresponding column in not in the input data
+        InputDataOngoingZeroFlatlinerError: when all recent load measurements are zero.
     """
     if pj.backtest_split_func is None:
         backtest_split_func = backtest_split_default
-        backtest_split_args = {"stratification_min_max": pj["model"] != "proloaf"}
+        backtest_split_args = {"stratification_min_max": True}
     else:
         backtest_split_func, backtest_split_args = pj.backtest_split_func.load(
             required_arguments=["data", "n_folds"]
@@ -124,6 +130,10 @@ def train_model_and_forecast_test_core(
         - The trained model
         - The forecast on the test set.
+    Raises:
+        NotImplementedError: When using invalid model type in the prediction job.
+        InputDataWrongColumnOrderError: When 'load' column is not first and 'horizon' column is not last.
     """
     model = train_model.train_pipeline_step_train_model(
         pj, modelspecs, train_data, validation_data

openstef/pipeline/train_model.py CHANGED Viewed

@@ -3,7 +3,7 @@
 # SPDX-License-Identifier: MPL-2.0
 import logging
 import os
-from typing import Optional, Union
+from typing import Optional, Union, Tuple
 import pandas as pd
 import structlog
@@ -23,6 +23,7 @@ from openstef.model.regressors.regressor import OpenstfRegressor
 from openstef.model.serializer import MLflowSerializer
 from openstef.model.standard_deviation_generator import StandardDeviationGenerator
 from openstef.model_selection.model_selection import split_data_train_validation_test
+from openstef.settings import Settings
 from openstef.validation import validation
 DEFAULT_TRAIN_HORIZONS_HOURS: list[float] = [0.25, 47.0]
@@ -31,6 +32,11 @@ MAXIMUM_MODEL_AGE: int = 7
 DEFAULT_EARLY_STOPPING_ROUNDS: int = 10
 PENALTY_FACTOR_OLD_MODEL: float = 1.2
+structlog.configure(
+    wrapper_class=structlog.make_filtering_bound_logger(
+        logging.getLevelName(Settings.log_level)
+    )
+)
 logger = structlog.get_logger(__name__)
@@ -60,6 +66,13 @@ def train_model_pipeline(
             - The validation dataset with forecasts
             - The test dataset with forecasts
+    Raises:
+        InputDataInsufficientError: when input data is insufficient.
+        InputDataWrongColumnOrderError: when input data has a invalid column order.
+            'load' column should be first and 'horizon' column last.
+        OldModelHigherScoreError: When old model is better than new model.
+        SkipSaveTrainingForecasts: If old model is better or younger than `MAXIMUM_MODEL_AGE`, the model is not saved.
     """
     # Initialize serializer
     serializer = MLflowSerializer(mlflow_tracking_uri=mlflow_tracking_uri)
@@ -142,7 +155,7 @@ def train_model_pipeline_core(
     input_data: pd.DataFrame,
     old_model: OpenstfRegressor = None,
     horizons: list[float] = DEFAULT_TRAIN_HORIZONS_HOURS,
-) -> Union[
+) -> Tuple[
     OpenstfRegressor,
     Report,
     ModelSpecificationDataClass,
@@ -164,6 +177,7 @@ def train_model_pipeline_core(
         InputDataInsufficientError: when input data is insufficient.
         InputDataWrongColumnOrderError: when input data has a invalid column order.
         OldModelHigherScoreError: When old model is better than new model.
+        InputDataOngoingZeroFlatlinerError: when all recent load measurements are zero.
     Returns:
         - Fitted_model (OpenstfRegressor)
@@ -172,8 +186,6 @@ def train_model_pipeline_core(
         - Datasets (tuple[pd.DataFrmae, pd.DataFrame, pd.Dataframe): The train, validation and test sets
     """
-    logger = structlog.get_logger(__name__)
     # Call common pipeline
     (
         model,
@@ -234,7 +246,9 @@ def train_pipeline_common(
     test_fraction: float = 0.0,
     backtest: bool = False,
     test_data_predefined: pd.DataFrame = pd.DataFrame(),
-) -> tuple[OpenstfRegressor, Report, pd.DataFrame, pd.DataFrame, pd.DataFrame]:
+) -> tuple[
+    OpenstfRegressor, Report, pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame
+]:
     """Common pipeline shared with operational training and backtest training.
     Args:
@@ -257,6 +271,8 @@ def train_pipeline_common(
     Raises:
         InputDataInsufficientError: when input data is insufficient.
         InputDataWrongColumnOrderError: when input data has a invalid column order.
+            'load' column should be first and 'horizon' column last.
+        InputDataOngoingZeroFlatlinerError: when all recent load measurements are zero.
     """
     data_with_features = train_pipeline_step_compute_features(
@@ -300,7 +316,8 @@ def train_pipeline_common(
 def train_pipeline_step_load_model(
     pj: PredictionJobDataClass, serializer: MLflowSerializer
-) -> tuple[OpenstfRegressor, ModelSpecificationDataClass, Union[int, float]]:
+) -> Tuple[OpenstfRegressor, ModelSpecificationDataClass, Union[int, float]]:
+    old_model: Optional[OpenstfRegressor]
     try:
         old_model, model_specs = serializer.load_model(experiment_name=str(pj.id))
         old_model_age = old_model.age  # Age attribute is openstef specific
@@ -346,12 +363,9 @@ def train_pipeline_step_compute_features(
         InputDataInsufficientError: when input data is insufficient.
         InputDataWrongColumnOrderError: when input data has a invalid column order.
         ValueError: when the horizon is a string and the corresponding column in not in the input data
+        InputDataOngoingZeroFlatlinerError: when all recent load measurements are zero.
     """
-    if pj["model"] == "proloaf":
-        # proloaf is only able to train with one horizon
-        horizons = [horizons[0]]
     if input_data.empty:
         raise InputDataInsufficientError("Input dataframe is empty")
     elif "load" not in input_data.columns:
@@ -423,6 +437,10 @@ def train_pipeline_step_train_model(
     Returns:
         The trained model
+    Raises:
+        NotImplementedError: When using invalid model type in the prediction job.
+        InputDataWrongColumnOrderError: When 'load' column is not first and 'horizon' column is not last.
     """
     # Test if first column is "load" and last column is "horizon"
     if train_data.columns[0] != "load" or train_data.columns[-1] != "horizon":
@@ -435,6 +453,7 @@ def train_pipeline_step_train_model(
     model = ModelCreator.create_model(
         pj["model"],
         quantiles=pj["quantiles"],
+        **(pj.model_kwargs or {}),
     )
     # split x and y data
@@ -493,7 +512,7 @@ def train_pipeline_step_split_data(
     test_fraction: float,
     backtest: bool = False,
     test_data_predefined: pd.DataFrame = pd.DataFrame(),
-) -> Union[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
+) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame]:
     """The default way to perform train, val, test split.
     Args:
@@ -523,7 +542,7 @@ def train_pipeline_step_split_data(
     if pj.train_split_func is None:
         split_func = split_data_train_validation_test
         split_args = {
-            "stratification_min_max": pj["model"] != "proloaf",
+            "stratification_min_max": True,
             "back_test": backtest,
         }
     else:

openstef/pipeline/utils.py CHANGED Viewed

@@ -27,6 +27,9 @@ def generate_forecast_datetime_range(
     Returns:
         Start and end datetimes of the forecast range.
+    Raises:
+        ValueError: If the target column does not have null values.
     """
     # By labeling the True/False values (based on the isnull() statement) as clusters,
     # we find what True value belongs to what cluster and the amount of True clusters.

openstef/postprocessing/postprocessing.py CHANGED Viewed

@@ -1,6 +1,7 @@
 # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
 #
 # SPDX-License-Identifier: MPL-2.0
+import logging
 from enum import Enum
 import numpy as np
@@ -10,6 +11,7 @@ import structlog
 from openstef.data_classes.prediction_job import PredictionJobDataClass
 from openstef.enums import ForecastType
 from openstef.feature_engineering import weather_features
+from openstef.settings import Settings
 # this is the default for "Lagerwey100"
 TURBINE_DATA = {
@@ -219,6 +221,11 @@ def add_prediction_job_properties_to_forecast(
         Dataframe with added metadata.
     """
+    structlog.configure(
+        wrapper_class=structlog.make_filtering_bound_logger(
+            logging.getLevelName(Settings.log_level)
+        )
+    )
     logger = structlog.get_logger(__name__)
     logger.info("Postproces in preparation of storing")
@@ -244,3 +251,25 @@ def add_prediction_job_properties_to_forecast(
     forecast["algtype"] = algorithm_type
     return forecast
+def sort_quantiles(
+    forecast: pd.DataFrame, quantile_col_start="quantile_P"
+) -> pd.DataFrame:
+    """Sort quantile values so quantiles do not cross.
+    This function assumes that all quantile columns start with 'quantile_P' For more academic details on why this is
+    mathematically sounds, please refer to Quantile and Probability Curves Without Crossing (Chernozhukov, 2010)
+    """
+    p_columns = [col for col in forecast.columns if col.startswith(quantile_col_start)]
+    if len(p_columns) == 0:
+        return forecast
+    # sort the columns
+    p_columns = np.sort(p_columns)
+    forecast.loc[:, p_columns] = forecast[p_columns].apply(sorted, axis=1).to_list()
+    return forecast

openstef/settings.py ADDED Viewed

@@ -0,0 +1,15 @@
+# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
+#
+# SPDX-License-Identifier: MPL-2.0
+from functools import lru_cache
+from openstef.app_settings import AppSettings
+@lru_cache
+def _get_app_settings() -> AppSettings:
+    return AppSettings()
+Settings = _get_app_settings()

openstef 3.4.10__py3-none-any.whl → 3.4.44__py3-none-any.whl

openstef 3.4.10py3-none-any.whl → 3.4.44py3-none-any.whl