PyPI - openstef - Versions diffs - 3.4.56__py3-none-any.whl → 4.0.0a3__py3-none-any.whl - Mend

openstef 3.4.56py3-none-any.whl → 4.0.0a3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (104) hide show

openstef-4.0.0a3.dist-info/METADATA +177 -0
openstef-4.0.0a3.dist-info/RECORD +4 -0
{openstef-3.4.56.dist-info → openstef-4.0.0a3.dist-info}/WHEEL +1 -2
openstef/__init__.py +0 -14
openstef/__main__.py +0 -3
openstef/app_settings.py +0 -19
openstef/data/NL_terrestrial_radiation.csv +0 -25585
openstef/data/NL_terrestrial_radiation.csv.license +0 -3
openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z +0 -0
openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z.license +0 -3
openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md +0 -18
openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md.license +0 -3
openstef/data/dutch_holidays.csv +0 -1759
openstef/data/dutch_holidays.csv.license +0 -3
openstef/data/pv_single_coefs.csv +0 -601
openstef/data/pv_single_coefs.csv.license +0 -3
openstef/data_classes/__init__.py +0 -3
openstef/data_classes/data_prep.py +0 -99
openstef/data_classes/model_specifications.py +0 -30
openstef/data_classes/prediction_job.py +0 -135
openstef/data_classes/split_function.py +0 -97
openstef/enums.py +0 -140
openstef/exceptions.py +0 -74
openstef/feature_engineering/__init__.py +0 -3
openstef/feature_engineering/apply_features.py +0 -138
openstef/feature_engineering/bidding_zone_to_country_mapping.py +0 -106
openstef/feature_engineering/cyclic_features.py +0 -161
openstef/feature_engineering/data_preparation.py +0 -152
openstef/feature_engineering/feature_adder.py +0 -206
openstef/feature_engineering/feature_applicator.py +0 -202
openstef/feature_engineering/general.py +0 -141
openstef/feature_engineering/holiday_features.py +0 -231
openstef/feature_engineering/lag_features.py +0 -165
openstef/feature_engineering/missing_values_transformer.py +0 -141
openstef/feature_engineering/rolling_features.py +0 -58
openstef/feature_engineering/weather_features.py +0 -492
openstef/metrics/__init__.py +0 -3
openstef/metrics/figure.py +0 -303
openstef/metrics/metrics.py +0 -486
openstef/metrics/reporter.py +0 -222
openstef/model/__init__.py +0 -3
openstef/model/basecase.py +0 -82
openstef/model/confidence_interval_applicator.py +0 -242
openstef/model/fallback.py +0 -77
openstef/model/metamodels/__init__.py +0 -3
openstef/model/metamodels/feature_clipper.py +0 -90
openstef/model/metamodels/grouped_regressor.py +0 -222
openstef/model/metamodels/missing_values_handler.py +0 -138
openstef/model/model_creator.py +0 -214
openstef/model/objective.py +0 -426
openstef/model/objective_creator.py +0 -65
openstef/model/regressors/__init__.py +0 -3
openstef/model/regressors/arima.py +0 -197
openstef/model/regressors/custom_regressor.py +0 -64
openstef/model/regressors/dazls.py +0 -116
openstef/model/regressors/flatliner.py +0 -95
openstef/model/regressors/gblinear_quantile.py +0 -334
openstef/model/regressors/lgbm.py +0 -29
openstef/model/regressors/linear.py +0 -90
openstef/model/regressors/linear_quantile.py +0 -305
openstef/model/regressors/regressor.py +0 -114
openstef/model/regressors/xgb.py +0 -52
openstef/model/regressors/xgb_multioutput_quantile.py +0 -261
openstef/model/regressors/xgb_quantile.py +0 -228
openstef/model/serializer.py +0 -431
openstef/model/standard_deviation_generator.py +0 -81
openstef/model_selection/__init__.py +0 -3
openstef/model_selection/model_selection.py +0 -311
openstef/monitoring/__init__.py +0 -3
openstef/monitoring/performance_meter.py +0 -92
openstef/monitoring/teams.py +0 -203
openstef/pipeline/__init__.py +0 -3
openstef/pipeline/create_basecase_forecast.py +0 -133
openstef/pipeline/create_component_forecast.py +0 -168
openstef/pipeline/create_forecast.py +0 -171
openstef/pipeline/optimize_hyperparameters.py +0 -317
openstef/pipeline/train_create_forecast_backtest.py +0 -163
openstef/pipeline/train_model.py +0 -561
openstef/pipeline/utils.py +0 -52
openstef/postprocessing/__init__.py +0 -3
openstef/postprocessing/postprocessing.py +0 -275
openstef/preprocessing/__init__.py +0 -3
openstef/preprocessing/preprocessing.py +0 -42
openstef/settings.py +0 -15
openstef/tasks/__init__.py +0 -3
openstef/tasks/calculate_kpi.py +0 -324
openstef/tasks/create_basecase_forecast.py +0 -118
openstef/tasks/create_components_forecast.py +0 -162
openstef/tasks/create_forecast.py +0 -145
openstef/tasks/create_solar_forecast.py +0 -420
openstef/tasks/create_wind_forecast.py +0 -80
openstef/tasks/optimize_hyperparameters.py +0 -135
openstef/tasks/split_forecast.py +0 -273
openstef/tasks/train_model.py +0 -224
openstef/tasks/utils/__init__.py +0 -3
openstef/tasks/utils/dependencies.py +0 -107
openstef/tasks/utils/predictionjobloop.py +0 -243
openstef/tasks/utils/taskcontext.py +0 -160
openstef/validation/__init__.py +0 -3
openstef/validation/validation.py +0 -322
openstef-3.4.56.dist-info/METADATA +0 -154
openstef-3.4.56.dist-info/RECORD +0 -102
openstef-3.4.56.dist-info/top_level.txt +0 -1
/openstef-3.4.56.dist-info/LICENSE → /openstef-4.0.0a3.dist-info/licenses/LICENSE.md +0 -0

openstef/pipeline/create_basecase_forecast.py DELETED Viewed

@@ -1,133 +0,0 @@
-# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
-#
-# SPDX-License-Identifier: MPL-2.0
-import logging
-from pathlib import Path
-import pandas as pd
-import structlog
-from openstef.data_classes.prediction_job import PredictionJobDataClass
-from openstef.exceptions import InputDataOngoingZeroFlatlinerError, NoRealisedLoadError
-from openstef.feature_engineering.feature_applicator import (
-    OperationalPredictFeatureApplicator,
-)
-from openstef.model.basecase import BaseCaseModel
-from openstef.model.confidence_interval_applicator import ConfidenceIntervalApplicator
-from openstef.pipeline.utils import generate_forecast_datetime_range
-from openstef.postprocessing.postprocessing import (
-    add_components_base_case_forecast,
-    add_prediction_job_properties_to_forecast,
-)
-from openstef.settings import Settings
-from openstef.validation import validation
-MODEL_LOCATION = Path(".")
-BASECASE_HORIZON_MINUTES = 60 * 24 * 14  # 14 days ahead
-BASECASE_RESOLUTION_MINUTES = 15
-def create_basecase_forecast_pipeline(
-    pj: PredictionJobDataClass,
-    input_data: pd.DataFrame,
-) -> pd.DataFrame:
-    """Compute the base case forecast and confidence intervals for a given prediction job and input data.
-    Args:
-        pj: Prediction job
-        input_data: data frame containing the input data necessary for the prediction.
-    Returns:
-        Base case forecast
-    Raises:
-        NoRealisedLoadError: When no realised load for given datetime range.
-    """
-    structlog.configure(
-        wrapper_class=structlog.make_filtering_bound_logger(
-            logging.getLevelName(Settings.log_level)
-        )
-    )
-    logger = structlog.get_logger(__name__)
-    logger.info("Preprocessing data for basecase forecast")
-    forecast_start, forecast_end = generate_forecast_datetime_range(input_data)
-    if not isinstance(input_data.index, pd.DatetimeIndex):
-        raise ValueError("Input dataframe does not have a datetime index.")
-    zero_flatliner_ongoing = validation.detect_ongoing_zero_flatliner(
-        load=input_data.iloc[:, 0],
-        duration_threshold_minutes=pj.flatliner_threshold_minutes,
-    )
-    if zero_flatliner_ongoing:
-        # Set historic load to zero to force the basecase forecasts to be zero.
-        input_data.loc[input_data.index < forecast_start, "load"] = 0
-    # Add features
-    data_with_features = OperationalPredictFeatureApplicator(
-        horizons=[0.25],
-        feature_names=[
-            "T-7d",
-            "T-14d",
-        ],  # Generate features for load 7 days ago and load 14 days ago these are the same as the basecase forecast.
-    ).add_features(input_data)
-    forecast_input = data_with_features[forecast_start:forecast_end]
-    # Initialize model
-    model = BaseCaseModel()
-    logger.info("Making basecase forecast")
-    # Make basecase forecast
-    basecase_forecast = BaseCaseModel().predict(forecast_input)
-    # Check if input data is available
-    if len(basecase_forecast) == 0:
-        raise NoRealisedLoadError(pj["id"])
-    # Estimate the stdev by using the stdev of the hour for historic (T-14d) load
-    model.standard_deviation = generate_basecase_confidence_interval(forecast_input)
-    logger.info("Postprocessing basecase forecast")
-    # Apply confidence interval
-    basecase_forecast = ConfidenceIntervalApplicator(
-        model, forecast_input
-    ).add_confidence_interval(basecase_forecast, pj)
-    # Add basecase for the component forecasts
-    basecase_forecast = add_components_base_case_forecast(basecase_forecast)
-    # Do further postprocessing
-    basecase_forecast = add_prediction_job_properties_to_forecast(
-        pj=pj,
-        forecast=basecase_forecast,
-        algorithm_type="basecase_lastweek",
-        forecast_quality="not_renewed",
-    )
-    return basecase_forecast
-def generate_basecase_confidence_interval(
-    data_with_features: pd.DataFrame,
-) -> pd.DataFrame:
-    """Calculate confidence interval for a basecase forecast.
-    Args:
-        data_with_features: Input dataframe that is used to make the basecase forecast.
-    Returns:
-        Dataframe with the confidence interval.
-    """
-    confidence_interval = (
-        data_with_features[["T-14d"]]  # Select only the T-14d column as a DataFrame
-        .groupby(data_with_features.index.hour)  # Get the std for every hour
-        .std()
-        .rename(columns={"T-14d": "stdev"})  # Rename the column to stdev
-    )
-    confidence_interval["hour"] = confidence_interval.index
-    confidence_interval["horizon"] = 48
-    return confidence_interval

openstef/pipeline/create_component_forecast.py DELETED Viewed

@@ -1,168 +0,0 @@
-# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
-#
-# SPDX-License-Identifier: MPL-2.0
-import logging
-import joblib
-import numpy as np
-import pandas as pd
-import structlog
-import openstef.postprocessing.postprocessing as postprocessing
-from openstef import PROJECT_ROOT
-from openstef.data_classes.prediction_job import PredictionJobDataClass
-from openstef.enums import ForecastType
-from openstef.model.regressors.dazls import Dazls
-from openstef.settings import Settings
-# Set the path for the Dazls stored model
-DAZLS_STORED = str(
-    PROJECT_ROOT / "openstef" / "data" / "dazls_model_3.4.24" / "dazls_stored_3.4.24_"
-)
-def create_input(
-    pj: PredictionJobDataClass, input_data: pd.DataFrame, weather_data: pd.DataFrame
-) -> pd.DataFrame:
-    """This function prepares the input data.
-    This data will be used for the Dazls model prediction, so they will be
-    according Dazls model requirements.
-    Args:
-        pj: Prediction job
-        input_data: Input forecast for the components forecast.
-        weather_data: Weather data with 'radiation' and 'windspeed_100m' columns
-    Returns:
-        It outputs a dataframe which will be used for the Dazls prediction function.
-    """
-    # Prepare raw input data
-    input_df = (
-        weather_data[["radiation", "windspeed_100m"]]
-        .merge(
-            input_data[["forecast"]].rename(columns={"forecast": "total_load"}),
-            how="inner",
-            right_index=True,
-            left_index=True,
-        )
-        .dropna()
-    )
-    # Add additional features
-    input_df["lat"] = pj["lat"]
-    input_df["lon"] = pj["lon"]
-    input_df["solar_on"] = 1
-    input_df["wind_on"] = 1
-    input_df["hour"] = input_df.index.hour
-    input_df["minute"] = input_df.index.minute
-    input_df["var0"] = input_df["total_load"].var()
-    input_df["var1"] = input_df["radiation"].var()
-    input_df["var2"] = input_df["windspeed_100m"].var()
-    input_df["sem0"] = input_df["total_load"].sem()
-    input_df["sem1"] = input_df["radiation"].sem()
-    input_df["sem2"] = input_df["windspeed_100m"].sem()
-    # Features for the new model
-    # Periodic Month feature
-    c = (1 / 11) * np.pi - (1 / 365)
-    n = np.array(input_df.index.month, dtype=float)
-    input_df["month_ff"] = np.sin(c * (n - 1))
-    return input_df
-def create_components_forecast_pipeline(
-    pj: PredictionJobDataClass, input_data: pd.DataFrame, weather_data: pd.DataFrame
-) -> pd.DataFrame:
-    """Pipeline for creating a component forecast using Dazls prediction model.
-    Args:
-        pj: Prediction job
-        input_data: Input forecast for the components forecast.
-        weather_data: Weather data with 'radiation' and 'windspeed_100m' columns
-    Returns:
-        DataFrame with component forecasts. The dataframe contains these columns;
-        "forecast_wind_on_shore",
-        "forecast_solar",
-        "forecast_other",
-        "pid",
-        "customer",
-        "description",
-        "type",
-        "algtype"
-    """
-    structlog.configure(
-        wrapper_class=structlog.make_filtering_bound_logger(
-            logging.getLevelName(Settings.log_level)
-        )
-    )
-    logger = structlog.get_logger(__name__)
-    logger.info("Make components prediction", pid=pj["id"])
-    # Make component forecasts
-    try:
-        dazls_input_data = create_input(pj, input_data, weather_data)
-        # Save and load the model as .sav file (or as .z file)
-        # For the code contact: korte.termijn.prognoses@alliander.com
-        dazls_model = Dazls()
-        dazls_model.model_ = joblib.load(DAZLS_STORED + "baseline_model.z")
-        logger.info("DAZLS model loaded", dazls_model=str(dazls_model))
-        # Use the predict function of Dazls model
-        # As input data we use the input_data function which takes into consideration what we want as an input for the forecast and what Dazls can accept as an input
-        forecasts = dazls_model.predict(x=dazls_input_data)
-        # Set the columns for the output forecast dataframe
-        forecasts = pd.DataFrame(
-            forecasts,
-            columns=["forecast_wind_on_shore", "forecast_solar"],
-            index=dazls_input_data.index,
-        )
-        # Make post-processed forecasts for solar and wind power
-        # These forecasts are respectively for the components: "forecast_solar" and "forecast_wind_on_shore"
-        # The outcome forecasts are added in the "forecasts" DataFrame we created above
-        forecasts["forecast_solar"] = postprocessing.post_process_wind_solar(
-            forecasts["forecast_solar"], forecast_type=ForecastType.SOLAR
-        )
-        forecasts["forecast_wind_on_shore"] = postprocessing.post_process_wind_solar(
-            forecasts["forecast_wind_on_shore"], forecast_type=ForecastType.WIND
-        )
-        # Make forecast for the component: "forecast_other"
-        forecasts["forecast_other"] = (
-            dazls_input_data["total_load"]
-            - forecasts["forecast_solar"]
-            - forecasts["forecast_wind_on_shore"]
-        )
-        # Make sure the forecasts have the same form as the input data. Pad with 0 if necessary
-        forecasts = forecasts.reindex(index=input_data.index, fill_value=0)
-    except Exception as e:
-        # In case something goes wrong we fall back on an a zero-filled dataframe
-        logger.warning(
-            f"Could not make component forecasts: {e}, falling back on series of"
-            " zeros!",
-            exc_info=e,
-        )
-        forecasts = pd.DataFrame(
-            data=0,
-            index=input_data.index,
-            columns=["forecast_wind_on_shore", "forecast_solar", "forecast_other"],
-        )
-    # Prepare for output
-    # Add more prediction properties to the forecast ("pid","customer","description","type","algtype)
-    forecasts = postprocessing.add_prediction_job_properties_to_forecast(
-        pj, forecasts, algorithm_type="component"
-    )
-    return forecasts

openstef/pipeline/create_forecast.py DELETED Viewed

@@ -1,171 +0,0 @@
-# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
-#
-# SPDX-License-Identifier: MPL-2.0
-import logging
-import pandas as pd
-import structlog
-from openstef.data_classes.model_specifications import ModelSpecificationDataClass
-from openstef.data_classes.prediction_job import PredictionJobDataClass
-from openstef.feature_engineering.feature_applicator import (
-    OperationalPredictFeatureApplicator,
-)
-from openstef.model.confidence_interval_applicator import ConfidenceIntervalApplicator
-from openstef.model.fallback import generate_fallback
-from openstef.model.regressors.regressor import OpenstfRegressor
-from openstef.model.serializer import MLflowSerializer
-from openstef.pipeline.utils import generate_forecast_datetime_range
-from openstef.postprocessing.postprocessing import (
-    add_prediction_job_properties_to_forecast,
-    sort_quantiles,
-)
-from openstef.settings import Settings
-from openstef.validation import validation
-def create_forecast_pipeline(
-    pj: PredictionJobDataClass,
-    input_data: pd.DataFrame,
-    mlflow_tracking_uri: str,
-) -> pd.DataFrame:
-    """Create forecast pipeline.
-    This is the top-level pipeline which included loading the most recent model for
-    the given prediction job.
-    Expected prediction job keys: "id",
-    Args:
-        pj: Prediction job
-        input_data: Training input data (without features)
-        mlflow_tracking_uri: MlFlow tracking URI
-    Returns:
-        DataFrame with the forecast
-    Raises:
-        InputDataOngoingZeroFlatlinerError: When all recent load measurements are zero.
-        LookupError: When no model is found for the given prediction job in MLflow.
-    """
-    prediction_model_pid = pj["id"]
-    # Use the alternative forecast model if it's specify in the pj
-    if pj.alternative_forecast_model_pid:
-        prediction_model_pid = pj.alternative_forecast_model_pid
-    # Load most recent model for the given pid
-    model, model_specs = MLflowSerializer(
-        mlflow_tracking_uri=mlflow_tracking_uri
-    ).load_model(experiment_name=str(prediction_model_pid))
-    return create_forecast_pipeline_core(pj, input_data, model, model_specs)
-def create_forecast_pipeline_core(
-    pj: PredictionJobDataClass,
-    input_data: pd.DataFrame,
-    model: OpenstfRegressor,
-    model_specs: ModelSpecificationDataClass,
-) -> pd.DataFrame:
-    """Create forecast pipeline (core).
-    Computes the forecasts and confidence intervals given a prediction job and input data.
-    This pipeline has no database or persisitent storage dependencies.
-    Expected prediction job keys: "resolution_minutes", "id", "type",
-        "name", "quantiles"
-    Args:
-        pj: Prediction job.
-        input_data: Input data for the prediction.
-        model: Model to use for this prediction.
-        model_specs: Model specifications.
-    Returns:
-        Forecast
-    Raises:
-        InputDataOngoingZeroFlatlinerError: When all recent load measurements are zero.
-    """
-    structlog.configure(
-        wrapper_class=structlog.make_filtering_bound_logger(
-            logging.getLevelName(Settings.log_level)
-        )
-    )
-    logger = structlog.get_logger(__name__)
-    fallback_strategy = "extreme_day"  # this can later be expanded
-    # Validate and clean data
-    validated_data = validation.validate(
-        pj["id"],
-        input_data,
-        pj["flatliner_threshold_minutes"],
-        pj["resolution_minutes"],
-    )
-    # Custom data prep or legacy behavior
-    if pj.data_prep_class:
-        data_prep_class, data_prep_args = pj.data_prep_class.load()
-        forecast_input_data, data_with_features = data_prep_class(
-            pj=pj,
-            model_specs=model_specs,
-            model=model,
-            **data_prep_args,
-        ).prepare_forecast_data(validated_data)
-    else:
-        # Add features
-        data_with_features = OperationalPredictFeatureApplicator(
-            horizons=[pj["resolution_minutes"] / 60.0],
-            feature_names=model.feature_names,
-            feature_modules=model_specs.feature_modules,
-        ).add_features(validated_data)
-        # Prep forecast input by selecting only the forecast datetime interval (this is much smaller than the input range)
-        # Also drop the load column
-        forecast_start, forecast_end = generate_forecast_datetime_range(
-            data_with_features
-        )
-        forecast_input_data = data_with_features[forecast_start:forecast_end].drop(
-            columns="load"
-        )
-    # Check if sufficient data is left after cleaning
-    if not validation.is_data_sufficient(
-        data_with_features,
-        pj["completeness_threshold"],
-        pj["minimal_table_length"],
-        model,
-    ):
-        logger.warning(
-            "Using fallback forecast",
-            forecast_type="fallback",
-            pid=pj["id"],
-            fallback_strategy=fallback_strategy,
-        )
-        forecast = generate_fallback(data_with_features, input_data[["load"]])
-    else:
-        # Predict
-        model_forecast = model.predict(forecast_input_data)
-        forecast = pd.DataFrame(
-            index=forecast_input_data.index, data={"forecast": model_forecast}
-        )
-    # Add confidence
-    forecast = ConfidenceIntervalApplicator(
-        model, forecast_input_data
-    ).add_confidence_interval(forecast, pj)
-    # Sort quantiles - prevents crossing and is statistically sound
-    forecast = sort_quantiles(forecast)
-    # Prepare for output
-    forecast = add_prediction_job_properties_to_forecast(
-        pj,
-        forecast,
-        algorithm_type=str(model.path),
-    )
-    return forecast

openstef 3.4.56__py3-none-any.whl → 4.0.0a3__py3-none-any.whl

openstef 3.4.56py3-none-any.whl → 4.0.0a3py3-none-any.whl