PyPI - openstef - Versions diffs - 3.4.56__py3-none-any.whl → 4.0.0a3__py3-none-any.whl - Mend

openstef 3.4.56py3-none-any.whl → 4.0.0a3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (104) hide show

openstef-4.0.0a3.dist-info/METADATA +177 -0
openstef-4.0.0a3.dist-info/RECORD +4 -0
{openstef-3.4.56.dist-info → openstef-4.0.0a3.dist-info}/WHEEL +1 -2
openstef/__init__.py +0 -14
openstef/__main__.py +0 -3
openstef/app_settings.py +0 -19
openstef/data/NL_terrestrial_radiation.csv +0 -25585
openstef/data/NL_terrestrial_radiation.csv.license +0 -3
openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z +0 -0
openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z.license +0 -3
openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md +0 -18
openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md.license +0 -3
openstef/data/dutch_holidays.csv +0 -1759
openstef/data/dutch_holidays.csv.license +0 -3
openstef/data/pv_single_coefs.csv +0 -601
openstef/data/pv_single_coefs.csv.license +0 -3
openstef/data_classes/__init__.py +0 -3
openstef/data_classes/data_prep.py +0 -99
openstef/data_classes/model_specifications.py +0 -30
openstef/data_classes/prediction_job.py +0 -135
openstef/data_classes/split_function.py +0 -97
openstef/enums.py +0 -140
openstef/exceptions.py +0 -74
openstef/feature_engineering/__init__.py +0 -3
openstef/feature_engineering/apply_features.py +0 -138
openstef/feature_engineering/bidding_zone_to_country_mapping.py +0 -106
openstef/feature_engineering/cyclic_features.py +0 -161
openstef/feature_engineering/data_preparation.py +0 -152
openstef/feature_engineering/feature_adder.py +0 -206
openstef/feature_engineering/feature_applicator.py +0 -202
openstef/feature_engineering/general.py +0 -141
openstef/feature_engineering/holiday_features.py +0 -231
openstef/feature_engineering/lag_features.py +0 -165
openstef/feature_engineering/missing_values_transformer.py +0 -141
openstef/feature_engineering/rolling_features.py +0 -58
openstef/feature_engineering/weather_features.py +0 -492
openstef/metrics/__init__.py +0 -3
openstef/metrics/figure.py +0 -303
openstef/metrics/metrics.py +0 -486
openstef/metrics/reporter.py +0 -222
openstef/model/__init__.py +0 -3
openstef/model/basecase.py +0 -82
openstef/model/confidence_interval_applicator.py +0 -242
openstef/model/fallback.py +0 -77
openstef/model/metamodels/__init__.py +0 -3
openstef/model/metamodels/feature_clipper.py +0 -90
openstef/model/metamodels/grouped_regressor.py +0 -222
openstef/model/metamodels/missing_values_handler.py +0 -138
openstef/model/model_creator.py +0 -214
openstef/model/objective.py +0 -426
openstef/model/objective_creator.py +0 -65
openstef/model/regressors/__init__.py +0 -3
openstef/model/regressors/arima.py +0 -197
openstef/model/regressors/custom_regressor.py +0 -64
openstef/model/regressors/dazls.py +0 -116
openstef/model/regressors/flatliner.py +0 -95
openstef/model/regressors/gblinear_quantile.py +0 -334
openstef/model/regressors/lgbm.py +0 -29
openstef/model/regressors/linear.py +0 -90
openstef/model/regressors/linear_quantile.py +0 -305
openstef/model/regressors/regressor.py +0 -114
openstef/model/regressors/xgb.py +0 -52
openstef/model/regressors/xgb_multioutput_quantile.py +0 -261
openstef/model/regressors/xgb_quantile.py +0 -228
openstef/model/serializer.py +0 -431
openstef/model/standard_deviation_generator.py +0 -81
openstef/model_selection/__init__.py +0 -3
openstef/model_selection/model_selection.py +0 -311
openstef/monitoring/__init__.py +0 -3
openstef/monitoring/performance_meter.py +0 -92
openstef/monitoring/teams.py +0 -203
openstef/pipeline/__init__.py +0 -3
openstef/pipeline/create_basecase_forecast.py +0 -133
openstef/pipeline/create_component_forecast.py +0 -168
openstef/pipeline/create_forecast.py +0 -171
openstef/pipeline/optimize_hyperparameters.py +0 -317
openstef/pipeline/train_create_forecast_backtest.py +0 -163
openstef/pipeline/train_model.py +0 -561
openstef/pipeline/utils.py +0 -52
openstef/postprocessing/__init__.py +0 -3
openstef/postprocessing/postprocessing.py +0 -275
openstef/preprocessing/__init__.py +0 -3
openstef/preprocessing/preprocessing.py +0 -42
openstef/settings.py +0 -15
openstef/tasks/__init__.py +0 -3
openstef/tasks/calculate_kpi.py +0 -324
openstef/tasks/create_basecase_forecast.py +0 -118
openstef/tasks/create_components_forecast.py +0 -162
openstef/tasks/create_forecast.py +0 -145
openstef/tasks/create_solar_forecast.py +0 -420
openstef/tasks/create_wind_forecast.py +0 -80
openstef/tasks/optimize_hyperparameters.py +0 -135
openstef/tasks/split_forecast.py +0 -273
openstef/tasks/train_model.py +0 -224
openstef/tasks/utils/__init__.py +0 -3
openstef/tasks/utils/dependencies.py +0 -107
openstef/tasks/utils/predictionjobloop.py +0 -243
openstef/tasks/utils/taskcontext.py +0 -160
openstef/validation/__init__.py +0 -3
openstef/validation/validation.py +0 -322
openstef-3.4.56.dist-info/METADATA +0 -154
openstef-3.4.56.dist-info/RECORD +0 -102
openstef-3.4.56.dist-info/top_level.txt +0 -1
/openstef-3.4.56.dist-info/LICENSE → /openstef-4.0.0a3.dist-info/licenses/LICENSE.md +0 -0

openstef/tasks/create_basecase_forecast.py DELETED Viewed

@@ -1,118 +0,0 @@
-# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
-#
-# SPDX-License-Identifier: MPL-2.0
-"""This module should be executed once every day.
-For all prediction_jobs, it will create a 'basecase' forecast which is less accurate, but (almost) always available.
-For now, it uses the load a week earlier.
-Missing datapoints are interpolated.
-Example:
-    This module is meant to be called directly from a CRON job. A description of the
-    CRON job can be found in the /k8s/CronJobs folder.
-    Alternatively this code can be run directly by running:
-        $ python create_basecase_forecast.py
-"""
-from datetime import datetime, timedelta
-from pathlib import Path
-import pandas as pd
-from openstef.data_classes.prediction_job import PredictionJobDataClass
-from openstef.enums import PipelineType
-from openstef.pipeline.create_basecase_forecast import create_basecase_forecast_pipeline
-from openstef.tasks.utils.predictionjobloop import PredictionJobLoop
-from openstef.tasks.utils.taskcontext import TaskContext
-T_BEHIND_DAYS: int = 15
-T_AHEAD_DAYS: int = 14
-def create_basecase_forecast_task(
-    pj: PredictionJobDataClass,
-    context: TaskContext,
-    t_behind_days=T_BEHIND_DAYS,
-    t_ahead_days=T_AHEAD_DAYS,
-) -> None:
-    """Top level task that creates a basecase forecast.
-    On this task level all database and context manager dependencies are resolved.
-    Args:
-        pj: Prediction job
-        context: Contect object that holds a config manager and a database connection
-        t_behind_days: number of days included as history. This is used to generated lagged features for the to-be-forecasted period
-        t_ahead_days: number of days a basecase forecast is created for
-    """
-    # Check pipeline types
-    if PipelineType.FORECAST not in pj.pipelines_to_run:
-        context.logger.info(
-            "Skip this PredictionJob because forecast pipeline is not specified in the pj."
-        )
-        return
-    # TODO: Improve implementation by using a field in the database and leveraging the
-    #       `pipelines_to_run` attribute of the `PredictionJobDataClass` object. This
-    #       would require a change to the MySQL datamodel.
-    if (
-        context.config.externally_posted_forecasts_pids
-        and pj.id in context.config.externally_posted_forecasts_pids
-    ):
-        context.logger.info(
-            "Skip this PredictionJob because its forecasts are posted by an external process."
-        )
-        return
-    # Define datetime range for input data
-    datetime_start = datetime.utcnow() - timedelta(days=t_behind_days)
-    datetime_end = datetime.utcnow() + timedelta(days=t_ahead_days)
-    # Retrieve input data
-    input_data = context.database.get_model_input(
-        pid=pj["id"],
-        location=[pj["lat"], pj["lon"]],
-        datetime_start=datetime_start,
-        datetime_end=datetime_end,
-    )
-    # Make basecase forecast using the corresponding pipeline
-    basecase_forecast = create_basecase_forecast_pipeline(pj, input_data)
-    # Do not store basecase forecasts for moments within the prediction job's horizon.
-    # Those should be updated by regular forecast process.
-    basecase_forecast = basecase_forecast.loc[
-        basecase_forecast.index
-        > (
-            pd.to_datetime(datetime.utcnow(), utc=True)
-            + timedelta(minutes=pj.horizon_minutes)
-        ),
-        :,
-    ]
-    # Write basecase forecast to the database
-    context.database.write_forecast(basecase_forecast, t_ahead_series=True)
-def main(config: object = None, database: object = None, **kwargs):
-    taskname = Path(__file__).name.replace(".py", "")
-    if database is None or config is None:
-        raise RuntimeError(
-            "Please specifiy a config object and/or database connection object. These"
-            " can be found in the openstef-dbc package."
-        )
-    with TaskContext(taskname, config, database) as context:
-        model_type = ["xgb", "xgb_quantile", "lgb"]
-        PredictionJobLoop(context, model_type=model_type).map(
-            create_basecase_forecast_task, context, **kwargs
-        )
-if __name__ == "__main__":
-    main()

openstef/tasks/create_components_forecast.py DELETED Viewed

@@ -1,162 +0,0 @@
-# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
-#
-# SPDX-License-Identifier: MPL-2.0
-"""This module contains the CRON job that is periodically executed to make the components prognoses.
-This code assumes trained models are available from the persistent storage.
-If these are not available run model_train.py to train all models.
-To provide the prognoses the following steps are carried out:
-  1. Get historic training data (TDCV, Load, Weather and day_ahead_electricity_price price data)
-  2. Apply features
-  3. Load model
-  4. Make component prediction
-  5. Write prediction to the database
-  6. Send Teams message if something goes wrong
-Example:
-    This module is meant to be called directly from a CRON job. A description of
-    the CRON job can be found in the /k8s/CronJobs folder.
-    Alternatively this code can be run directly by running::
-        $ python create_components_forecast.py
-"""
-import logging
-from datetime import datetime, timedelta, timezone
-from pathlib import Path
-import pandas as pd
-import structlog
-from openstef.data_classes.prediction_job import PredictionJobDataClass
-from openstef.enums import ModelType
-from openstef.exceptions import ComponentForecastTooShortHorizonError
-from openstef.pipeline.create_component_forecast import (
-    create_components_forecast_pipeline,
-)
-from openstef.settings import Settings
-from openstef.tasks.utils.predictionjobloop import PredictionJobLoop
-from openstef.tasks.utils.taskcontext import TaskContext
-T_BEHIND_DAYS = 0
-T_AHEAD_DAYS = 3
-def create_components_forecast_task(
-    pj: PredictionJobDataClass,
-    context: TaskContext,
-    t_behind_days: int = T_BEHIND_DAYS,
-    t_ahead_days: int = T_AHEAD_DAYS,
-) -> None:
-    """Top level task that creates a components forecast.
-    On this task level all database and context manager dependencies are resolved.
-    Args:
-        pj: Prediction job
-        context: Contect object that holds a config manager and a database connection
-        t_behind_days: number of days in the past that the component forecast is created for
-        t_ahead_days: number of days in the future that the component forecast is created for
-    Raises:
-        ComponentForecastTooShortHorizonError: If the forecast horizon is too short
-         (less than 30 minutes in advance)
-    """
-    structlog.configure(
-        wrapper_class=structlog.make_filtering_bound_logger(
-            logging.getLevelName(Settings.log_level)
-        )
-    )
-    logger = structlog.get_logger(__name__)
-    if pj["train_components"] == 0:
-        context.logger.info(
-            "Skip prediction job", train_components=pj["train_components"]
-        )
-        return
-    # Define datetime range for input data
-    datetime_start = datetime.utcnow() - timedelta(days=t_behind_days)
-    datetime_end = datetime.utcnow() + timedelta(days=t_ahead_days)
-    logger.info(
-        "Get predicted load", datetime_start=datetime_start, datetime_end=datetime_end
-    )
-    # Get most recent load forecast as input_data,
-    # we use a regular forecast as input point for creating component forecasts
-    input_data = context.database.get_predicted_load(
-        pj, start_time=datetime_start, end_time=datetime_end
-    )
-    # Check if input_data is not empty
-    if len(input_data) == 0:
-        logger.warning(f"No forecast found. Skipping pid", pid=pj["id"])
-        return
-    logger.info("retrieving weather data")
-    # TODO make openstef_dbc function to retrieve inputdata for component forecast in one call,
-    #  this will make this function much shorter
-    # Get required weather data
-    weather_data = context.database.get_weather_data(
-        [pj["lat"], pj["lon"]],
-        [
-            "radiation",
-            "windspeed_100m",
-        ],  # These variables are used when determing the splitting coeficients, and should therefore be reused when making the component forcasts.
-        datetime_start=datetime_start,
-        datetime_end=datetime_end,
-    )
-    # Make forecast for the demand, wind and pv components
-    forecasts = create_components_forecast_pipeline(pj, input_data, weather_data)
-    ## Perform sanity check on index
-    if not isinstance(forecasts.index, pd.core.indexes.datetimes.DatetimeIndex):
-        raise ValueError(
-            f"Index is not datetime. Received forecasts:{forecasts.head()}"
-        )
-    # save forecast to database #######################################################
-    context.database.write_forecast(forecasts)
-    logger.debug("Written forecast to database")
-    # Check if forecast was complete enough, otherwise raise exception
-    if forecasts.index.max() < datetime.utcnow().replace(
-        tzinfo=timezone.utc
-    ) + timedelta(hours=30):
-        # Check which input data is missing the most.
-        # Do this by counting the NANs for (load)forecast, radiation and windspeed
-        max_index = forecasts.index.max()
-        n_nas = dict(
-            nans_load_forecast=input_data.loc[max_index:, "forecast"].isna().sum(),
-            nans_radiation=weather_data.loc[max_index:, "radiation"].isna().sum(),
-            nans_windspeed_100m=weather_data.loc[max_index:, "windspeed_100m"]
-            .isna()
-            .sum(),
-        )
-        max_na = max(n_nas, key=n_nas.get)
-        raise ComponentForecastTooShortHorizonError(
-            f"Could not make component forecast for two days ahead, probably input data is missing, {max_na}: {n_nas[max_na]}"
-        )
-def main(config: object = None, database: object = None, **kwargs):
-    taskname = Path(__file__).name.replace(".py", "")
-    if database is None or config is None:
-        raise RuntimeError(
-            "Please specifiy a config object and/or database connection object. These"
-            " can be found in the openstef-dbc package."
-        )
-    with TaskContext(taskname, config, database) as context:
-        model_type = [ml.value for ml in ModelType]
-        PredictionJobLoop(
-            context,
-            model_type=model_type,
-        ).map(create_components_forecast_task, context, **kwargs)
-if __name__ == "__main__":
-    main()

openstef/tasks/create_forecast.py DELETED Viewed

@@ -1,145 +0,0 @@
-# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
-#
-# SPDX-License-Identifier: MPL-2.0
-"""This module contains the CRON job that is periodically executed to make prognoses and save them in to the database.
-This code assumes trained models are available from the persistent storage. If these
-are not available run model_train.py to train all models.
-To provide the prognoses the folowing steps are carried out:
-  1. Get historic training data (TDCV, Load, Weather and day_ahead_electricity_price price data)
-  2. Apply features
-  3. Load model
-  4. Make prediction
-  5. Write prediction to the database
-  6. Send Teams message if something goes wrong
-Example:
-    This module is meant to be called directly from a CRON job.
-    Alternatively this code can be run directly by running::
-        $ python create_forecast.py
-"""
-from datetime import datetime, timedelta
-from pathlib import Path
-from openstef.data_classes.prediction_job import PredictionJobDataClass
-from openstef.enums import BiddingZone, ModelType, PipelineType
-from openstef.exceptions import InputDataOngoingZeroFlatlinerError
-from openstef.pipeline.create_forecast import create_forecast_pipeline
-from openstef.tasks.utils.predictionjobloop import PredictionJobLoop
-from openstef.tasks.utils.taskcontext import TaskContext
-from openstef.validation.validation import detect_ongoing_zero_flatliner
-T_BEHIND_DAYS: int = 14
-def create_forecast_task(
-    pj: PredictionJobDataClass, context: TaskContext, t_behind_days: int = T_BEHIND_DAYS
-) -> None:
-    """Top level task that creates a forecast.
-    On this task level all database and context manager dependencies are resolved.
-    Expected prediction job keys; "id", "lat", "lon", "resolution_minutes",
-        "horizon_minutes", "type", "name", "quantiles"
-    Args:
-        pj: Prediction job
-        context: Contect object that holds a config manager and a database connection
-        t_behind_days: number of days included as history. This is used to generated lagged features for the to-be-forecasted period
-    """
-    # Check pipeline types
-    if PipelineType.FORECAST not in pj.pipelines_to_run:
-        context.logger.info(
-            "Skip this PredictionJob because forecast pipeline is not specified in the pj."
-        )
-        return
-    # TODO: Improve implementation by using a field in the database and leveraging the
-    #       `pipelines_to_run` attribute of the `PredictionJobDataClass` object. This
-    #       would require a change to the MySQL datamodel.
-    if (
-        context.config.externally_posted_forecasts_pids
-        and pj.id in context.config.externally_posted_forecasts_pids
-    ):
-        context.logger.info(
-            "Skip this PredictionJob because its forecasts are posted by an external process."
-        )
-        return
-    # Extract mlflow tracking URI and trained models folder
-    mlflow_tracking_uri = context.config.paths_mlflow_tracking_uri
-    # Define datetime range for input data
-    datetime_start = datetime.utcnow() - timedelta(days=t_behind_days)
-    datetime_end = datetime.utcnow() + timedelta(seconds=pj.horizon_minutes * 60)
-    # Retrieve input data
-    input_data = context.database.get_model_input(
-        pid=pj["id"],
-        location=[pj["lat"], pj["lon"]],
-        datetime_start=datetime_start,
-        datetime_end=datetime_end,
-        market_price=pj.electricity_bidding_zone.value,
-    )
-    # Add APX price to the input data for backward compatibility,remove this line when all models are retrained
-    if pj.electricity_bidding_zone == BiddingZone.NL:
-        input_data["APX"] = input_data["day_ahead_electricity_price"]
-    try:
-        # Make forecast with the forecast pipeline
-        forecast = create_forecast_pipeline(
-            pj, input_data, mlflow_tracking_uri=mlflow_tracking_uri
-        )
-    except (InputDataOngoingZeroFlatlinerError, LookupError) as e:
-        if (
-            context.config.known_zero_flatliners
-            and pj.id in context.config.known_zero_flatliners
-        ):
-            context.logger.info(
-                "No forecasts were made for this known zero flatliner prediction job. No forecasts need to be made either, since the fallback forecasts are sufficient."
-            )
-            return
-        elif isinstance(e, InputDataOngoingZeroFlatlinerError):
-            raise InputDataOngoingZeroFlatlinerError(
-                'All recent load measurements are zero. Check the load profile of this pid as well as related/neighbouring prediction jobs. Afterwards, consider adding this pid to the "known_zero_flatliners" app_setting and possibly removing other pids from the same app_setting.'
-            ) from e
-        elif isinstance(e, LookupError):
-            zero_flatliner_ongoing = detect_ongoing_zero_flatliner(
-                load=input_data.iloc[:, 0],
-                duration_threshold_minutes=pj.flatliner_threshold_minutes,
-            )
-            if zero_flatliner_ongoing:
-                raise LookupError(
-                    'Model not found. Consider checking for a zero flatliner and adding this pid to the "known_zero_flatliners" app_setting. For zero flatliners, no model can be trained.'
-                ) from e
-            else:
-                raise e
-    # Write forecast to the database
-    context.database.write_forecast(forecast, t_ahead_series=True)
-def main(model_type=None, config=None, database=None, **kwargs):
-    taskname = Path(__file__).name.replace(".py", "")
-    if database is None or config is None:
-        raise RuntimeError(
-            "Please specify a config object and/or database connection object. These"
-            " can be found in the openstef-dbc package."
-        )
-    with TaskContext(taskname, config, database) as context:
-        if model_type is None:
-            model_type = [ml.value for ml in ModelType]
-        PredictionJobLoop(context, model_type=model_type).map(
-            create_forecast_task, context, **kwargs
-        )
-if __name__ == "__main__":
-    main()

openstef 3.4.56__py3-none-any.whl → 4.0.0a3__py3-none-any.whl

openstef 3.4.56py3-none-any.whl → 4.0.0a3py3-none-any.whl