PyPI - openstef - Versions diffs - 3.4.60__tar.gz → 3.4.62__tar.gz - Mend

openstef 3.4.60tar.gz → 3.4.62tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (109) hide show

{openstef-3.4.60 → openstef-3.4.62}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: openstef
-Version: 3.4.60
+Version: 3.4.62
 Summary: Open short term energy forecaster
 Home-page: https://github.com/OpenSTEF/openstef
 Author: Alliander N.V

{openstef-3.4.60 → openstef-3.4.62}/openstef/data_classes/data_prep.py RENAMED Viewed

@@ -2,12 +2,13 @@
 #
 # SPDX-License-Identifier: MPL-2.0
 """Specifies the split function dataclass."""
 import inspect
 import json
 from importlib import import_module
 from typing import Any, Sequence, TypeVar, Union
-from pydantic.v1 import BaseModel
+from pydantic import BaseModel, Field
 DataPrepClass = TypeVar("DataPrepClass")
@@ -15,10 +16,14 @@ DataPrepClass = TypeVar("DataPrepClass")
 class DataPrepDataClass(BaseModel):
     """Class that allows to specify a custom class to prepare the data (feature engineering , etc ...)."""
-    klass: Union[str, type[DataPrepClass]]
-    arguments: Union[
-        str, dict[str, Any]
-    ]  # JSON string holding the function parameters or dict
+    klass: Union[str, type[DataPrepClass]] = Field(
+        ...,
+        description="The class that should be used to prepare the data. Can be a string with the path to the class or the class itself.",
+    )
+    arguments: Union[str, dict[str, Any]] = Field(
+        default=None,
+        description="The arguments that should be passed to the class. Can be a JSON string holding the function parameters or dict.",
+    )
     def __getitem__(self, key: str):
         """Allows us to use subscription to get the items from the object."""

{openstef-3.4.60 → openstef-3.4.62}/openstef/data_classes/model_specifications.py RENAMED Viewed

@@ -2,27 +2,31 @@
 #
 # SPDX-License-Identifier: MPL-2.0
 """Specifies the dataclass for model specifications."""
-from typing import Optional, Union
-from pydantic.v1 import BaseModel
+from typing import Any, Optional, Union
+from pydantic import BaseModel, Field
 class ModelSpecificationDataClass(BaseModel):
     """Holds all information regarding the training procces of a specific model."""
-    id: Union[int, str]
-    hyper_params: Optional[dict] = {}
-    """Hyperparameters that should be used during training."""
-    feature_names: Optional[list] = None
-    """Features that should be used during training."""
-    feature_modules: Optional[list] = []
-    """Feature modules that should be used during training."""
+    id: Union[int, str] = Field(description="The model id.")
+    hyper_params: Optional[dict] = Field(
+        default={}, description="Hyperparameters that should be used during training."
+    )
+    feature_names: Optional[list] = Field(
+        default=None, description="Features that should be used during training."
+    )
+    feature_modules: Optional[list] = Field(
+        default=[], description="Modules that should be used during training."
+    )
-    def __getitem__(self, item: str) -> any:
+    def __getitem__(self, item: str) -> Any:
         """Allows us to use subscription to get the items from the object."""
         return getattr(self, item)
-    def __setitem__(self, key: str, value: any) -> None:
+    def __setitem__(self, key: str, value: Any) -> None:
         """Allows us to use subscription to set the items in the object."""
         if hasattr(self, key):
             self.__dict__[key] = value

openstef-3.4.62/openstef/data_classes/prediction_job.py ADDED Viewed

@@ -0,0 +1,151 @@
+# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
+#
+# SPDX-License-Identifier: MPL-2.0
+"""Specifies the prediction job dataclass."""
+from typing import Any, Optional, Union
+from pydantic import BaseModel, Field
+from openstef.data_classes.data_prep import DataPrepDataClass
+from openstef.data_classes.model_specifications import ModelSpecificationDataClass
+from openstef.data_classes.split_function import SplitFuncDataClass
+from openstef.enums import AggregateFunction, BiddingZone, PipelineType
+class PredictionJobDataClass(BaseModel):
+    """Holds all information about the specific forecast that has to be made."""
+    id: Union[int, str] = Field(
+        ..., description="The predictions job id (often abreviated as pid)."
+    )
+    model: str = Field(
+        ...,
+        description="The model type that should be used. Options are: 'xgb', 'xgb_quantile', 'lgb', 'linear', 'linear_quantile', 'gblinear_quantile', 'xgb_multioutput_quantile', 'flatliner'.",
+    )
+    model_kwargs: Optional[dict] = Field(
+        default=None, description="The model parameters that should be used."
+    )
+    forecast_type: str = Field(
+        ...,
+        description="The type of forecasts that should be made. Options are: 'demand', 'wind', 'basecase'. If unsure what to pick, choose 'demand'.",
+    )
+    horizon_minutes: Optional[int] = Field(
+        2880,
+        description="The horizon of the desired forecast in minutes used in tasks. Defaults to 2880 minutes (i.e. 2 days).",
+    )
+    resolution_minutes: int = Field(
+        60, description="The resolution of the desired forecast in minutes."
+    )
+    lat: Optional[float] = Field(
+        52.132633,
+        description="Latitude of the forecasted location in degrees. Used for fetching weather data in tasks, calculating derrived features and component splitting.",
+    )
+    lon: Optional[float] = Field(
+        5.291266,
+        description="Longitude of the forecasted location in degrees. Used for fetching weather data in tasks, calculating derrived features and component splitting.",
+    )
+    name: str = Field(..., description="Name of the forecast, e.g. the location name.")
+    electricity_bidding_zone: Optional[BiddingZone] = Field(
+        BiddingZone.NL,
+        description="The bidding zone of the forecasted location. Used for fetching electricity prices in tasks. It is also used to determine the holidays that should be used. Currently only ENTSO-E bidding zones are supported.",
+    )
+    train_components: Optional[bool] = Field(
+        None,
+        description="Whether splitting the forecasts in wind, solar, rest is desired.",
+    )
+    description: Optional[str] = Field(
+        None,
+        description="Optional description of the prediction job for human reference.",
+    )
+    quantiles: Optional[list[float]] = Field(
+        None,
+        description="Quantiles that have to be forecasted. Only used for quantile models.",
+    )
+    train_split_func: Optional[SplitFuncDataClass] = Field(
+        None, description="Optional custom splitting function for operational procces."
+    )
+    backtest_split_func: Optional[SplitFuncDataClass] = Field(
+        None, description="Optional custom splitting function for backtesting."
+    )
+    train_horizons_minutes: Optional[list[int]] = Field(
+        None,
+        description="List of horizons that should be taken into account during training.",
+    )
+    default_modelspecs: Optional[ModelSpecificationDataClass] = Field(
+        None, description="Default model specifications"
+    )
+    save_train_forecasts: bool = Field(
+        False,
+        description="Indicate wether the forecasts produced during the training process should be saved.",
+    )
+    completeness_threshold: float = Field(
+        0.5,
+        description="Minimum fraction of data that should be available for making a regular forecast.",
+    )
+    minimal_table_length: int = Field(
+        100,
+        description="Minimum length (in rows) of the forecast input for making a regular forecast.",
+    )
+    flatliner_threshold_minutes: int = Field(
+        1440,
+        description="Number of minutes that the load has to be constant to detect a flatliner.",
+    )
+    data_balancing_ratio: Optional[float] = Field(
+        None,
+        description="If data balancing is enabled, the data will be balanced with data from 1 year ago in the future.",
+    )
+    rolling_aggregate_features: Optional[list[AggregateFunction]] = Field(
+        [],
+        description="If not None, rolling aggregate(s) of load will be used as features in the model.",
+    )
+    depends_on: Optional[list[Union[int, str]]] = Field(
+        [],
+        description="Link to another prediction job on which this prediction job might depend.",
+    )
+    sid: Optional[str] = Field(
+        None, description="Only required for create_solar_forecast task"
+    )
+    turbine_type: Optional[str] = Field(
+        None, description="Only required for create_wind_forecast task"
+    )
+    n_turbines: Optional[float] = Field(
+        None, description="Only required for create_wind_forecast task"
+    )
+    hub_height: Optional[float] = Field(
+        None, description="Only required for create_wind_forecast task"
+    )
+    pipelines_to_run: list[PipelineType] = Field(
+        [PipelineType.TRAIN, PipelineType.HYPER_PARMATERS, PipelineType.FORECAST],
+        description="The pipelines to run for this pj",
+    )
+    alternative_forecast_model_pid: Optional[Union[int, str]] = Field(
+        None,
+        description="The pid that references another prediction job from which the model should be used for making forecasts.",
+    )
+    data_prep_class: Optional[DataPrepDataClass] = Field(
+        None, description="The import string for the custom data prep class"
+    )
+    def __getitem__(self, item: str) -> Any:
+        """Allows us to use subscription to get the items from the object."""
+        return getattr(self, item)
+    def __setitem__(self, key: str, value: Any) -> None:
+        """Allows us to use subscription to set the items in the object."""
+        if hasattr(self, key):
+            self.__dict__[key] = value
+        else:
+            raise AttributeError(f"{key} not an attribute of prediction job.")
+    def get(self, key: str, default: Any = None) -> Any:
+        """Allows to use the get functions similar to a python dict."""
+        if hasattr(self, key):
+            return getattr(self, key)
+        else:
+            return default

{openstef-3.4.60 → openstef-3.4.62}/openstef/data_classes/split_function.py RENAMED Viewed

@@ -2,27 +2,28 @@
 #
 # SPDX-License-Identifier: MPL-2.0
 """Specifies the split function dataclass."""
 import inspect
 import json
 from importlib import import_module
 from typing import Any, Callable, Sequence, Union
-from pydantic.v1 import BaseModel
+from pydantic import BaseModel, Field
 class SplitFuncDataClass(BaseModel):
     """Class that allows to specify a custom function to generate a train, test and validation set."""
-    function: Union[str, Callable]
-    arguments: Union[
-        str, dict[str, Any]
-    ]  # JSON string holding the function parameters or dict
+    function: Union[str, Callable] = Field(..., description="The split function")
+    arguments: Union[str, dict[str, Any]] = Field(
+        ..., description="JSON string holding the function parameters or dict"
+    )
     def __getitem__(self, key: str):
         """Allows us to use subscription to get the items from the object."""
         return getattr(self, key)
-    def __setitem__(self, key: str, value: any):
+    def __setitem__(self, key: str, value: Any):
         """Allows us to use subscription to set the items in the object."""
         if hasattr(self, key):
             self.__dict__[key] = value

{openstef-3.4.60 → openstef-3.4.62}/openstef/feature_engineering/weather_features.py RENAMED Viewed

@@ -397,7 +397,6 @@ def calculate_dni(radiation: pd.Series, pj: PredictionJobDataClass) -> pd.Series
     solar_zenith = solpos.apparent_zenith
     # convert radiation (ghi) to right unit (J/m^2 to kWh/m^2)
-    # TODO: check whether unit conversion is necessary
     ghi_forecasted = radiation / 3600
     # convert ghi to dni
     dni_converted = pvlib.irradiance.dni(

{openstef-3.4.60 → openstef-3.4.62}/openstef/metrics/metrics.py RENAMED Viewed

@@ -9,7 +9,7 @@
 #
 # SPDX-License-Identifier: MIT
 """This module contains all metrics to assess forecast quality."""
-from typing import Callable
+from typing import Callable, Optional, Tuple
 import numpy as np
 import pandas as pd
@@ -299,12 +299,15 @@ def skill_score_positive_peaks(
 def franks_skill_score(
-    realised: pd.Series, forecast: pd.Series, basecase: pd.Series, range_: float = 1.0
+    realised: pd.Series,
+    forecast: pd.Series,
+    basecase: pd.Series,
+    range_: Optional[float] = None,
 ) -> float:
     """Calculate Franks skill score."""
     # Combine series in one DataFrame
     combined = pd.concat([realised, forecast], axis=1)
-    if range_ == 1.0:
+    if not range_:
         range_ = (
             combined[realised.name].max() - combined[realised.name].min()
             if (combined[realised.name].max() - combined[realised.name].min()) != 0
@@ -360,7 +363,7 @@ def franks_skill_score_peaks(
 def xgb_quantile_eval(
     preds: np.ndarray, dmatrix: xgboost.DMatrix, quantile: float = 0.2
-) -> str:
+) -> Tuple:
     """Customized evaluational metric that equals to quantile regression loss (also known as pinball loss).
     Quantile regression is regression that estimates a specified quantile of target's distribution conditional on given features.

{openstef-3.4.60 → openstef-3.4.62}/openstef/model/confidence_interval_applicator.py RENAMED Viewed

@@ -137,7 +137,7 @@ class ConfidenceIntervalApplicator:
             # Determine now, rounded on 15 minutes,
             # Rounding helps to prevent fractional t_aheads
             now = (
-                pd.Series(datetime.utcnow().replace(tzinfo=forecast_copy.index.tzinfo))
+                pd.Series(datetime.now(tz=forecast_copy.index.tzinfo))
                 .min()
                 .round(f"{minimal_resolution}T")
                 .to_pydatetime()

{openstef-3.4.60 → openstef-3.4.62}/openstef/model/fallback.py RENAMED Viewed

@@ -1,7 +1,7 @@
 # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
 #
 # SPDX-License-Identifier: MPL-2.0
-from datetime import datetime
+from datetime import datetime, UTC
 import pandas as pd
@@ -43,9 +43,7 @@ def generate_fallback(
         # Find most extreme historic day (do not count today as it is incomplete)
         day_with_highest_load_date = (
-            load[load.index.tz_localize(None).date != datetime.utcnow().date()]
-            .idxmax()
-            .load.date()
+            load[load.index < datetime.now(tz=UTC)].idxmax().load.date()
         )
         # generate datetime range of the day with the highest load
         from_datetime = pd.Timestamp(day_with_highest_load_date, tz=load.index.tz)

{openstef-3.4.60 → openstef-3.4.62}/openstef/model/metamodels/missing_values_handler.py RENAMED Viewed

@@ -90,7 +90,7 @@ class MissingValuesHandler(BaseEstimator, RegressorMixin, MetaEstimatorMixin):
     def fit(self, x, y):
         """Fit model."""
         _, y = check_X_y(x, y, force_all_finite="allow-nan", y_numeric=True)
-        if type(x) != pd.DataFrame:
+        if not isinstance(x, pd.DataFrame):
             x = pd.DataFrame(np.asarray(x))
         self.feature_in_names_ = list(x.columns)
         self.n_features_in_ = x.shape[1]
@@ -133,6 +133,6 @@ class MissingValuesHandler(BaseEstimator, RegressorMixin, MetaEstimatorMixin):
             x,
             force_all_finite="allow-nan",
         )
-        if type(x) != pd.DataFrame:
+        if not isinstance(x, pd.DataFrame):
             x = pd.DataFrame(np.array(x))
         return self.pipeline_.predict(x[self.non_null_columns_])

{openstef-3.4.60 → openstef-3.4.62}/openstef/model/regressors/custom_regressor.py RENAMED Viewed

@@ -26,9 +26,9 @@ class CustomOpenstfRegressor(OpenstfRegressor):
     def valid_kwargs() -> list[str]:
         ...
-    @classmethod
+    @staticmethod
     @abstractmethod
-    def objective(self) -> Type[RegressorObjective]:
+    def objective() -> Type[RegressorObjective]:
         ...

{openstef-3.4.60 → openstef-3.4.62}/openstef/model/serializer.py RENAMED Viewed

@@ -5,7 +5,7 @@ import json
 import logging
 import os
 import shutil
-from datetime import datetime
+from datetime import datetime, UTC
 from json import JSONDecodeError
 from typing import Optional, Union
 from urllib.parse import unquote, urlparse
@@ -283,8 +283,7 @@ class MLflowSerializer:
         """Determines how many days ago a model is trained from the mlflow run."""
         try:
             model_datetime = run.end_time.to_pydatetime()
-            model_datetime = model_datetime.replace(tzinfo=None)
-            model_age_days = (datetime.utcnow() - model_datetime).days
+            model_age_days = (datetime.now(tz=UTC) - model_datetime).days
         except Exception as e:
             self.logger.warning(
                 "Could not get model age. Returning infinite age!", exception=str(e)

{openstef-3.4.60 → openstef-3.4.62}/openstef/model/standard_deviation_generator.py RENAMED Viewed

@@ -69,7 +69,7 @@ class StandardDeviationGenerator:
         # Calculate the error for each predicted point
         error = realised - predicted
         error.index = error.index.hour  # Hour only, remove the rest
-        # For the time starts with 00, 01, 02, etc. TODO (MAKE MORE ELEGANT SOLUTION THAN A LOOP)
+        # For the time starts with 00, 01, 02, etc.
         for hour in range(24):
             hour_error = error[error.index == hour]

{openstef-3.4.60 → openstef-3.4.62}/openstef/model_selection/model_selection.py RENAMED Viewed

@@ -230,7 +230,9 @@ def split_data_train_validation_test(
         for date_set in [max_dates, min_dates, other_dates]:
             n_days_val = max(1, int(validation_fraction * len(date_set)))
             val_dates += list(
-                np.random.choice(list(date_set), n_days_val, replace=False)
+                np.random.default_rng().choice(
+                    list(date_set), n_days_val, replace=False
+                )
             )
             train_dates += [x for x in date_set if x not in val_dates]

{openstef-3.4.60 → openstef-3.4.62}/openstef/postprocessing/postprocessing.py RENAMED Viewed

@@ -239,11 +239,6 @@ def add_prediction_job_properties_to_forecast(
     if forecast_quality is not None:
         forecast["quality"] = forecast_quality
-    # TODO rename prediction job typ to type
-    # TODO algtype = model_file_path, perhaps we can find a more logical name
-    # TODO perhaps better to make a forecast its own class!
-    # TODO double check and sync this with make_basecase_forecast (other fields are added)
-    # !!!!! TODO fix the requirement for customer
     forecast["pid"] = pj["id"]
     forecast["customer"] = pj["name"]
     forecast["description"] = pj["description"]

{openstef-3.4.60 → openstef-3.4.62}/openstef/tasks/calculate_kpi.py RENAMED Viewed

@@ -21,7 +21,7 @@ Example:
 import logging
 # Import builtins
-from datetime import datetime, timedelta
+from datetime import datetime, timedelta, UTC
 from pathlib import Path
 import numpy as np
@@ -56,8 +56,8 @@ def main(model_type: ModelType = None, config=None, database=None) -> None:
     with TaskContext(taskname, config, database) as context:
         # Set start and end time
-        start_time = datetime.utcnow() - timedelta(days=1)
-        end_time = datetime.utcnow()
+        end_time = datetime.now(tz=UTC)
+        start_time = end_time - timedelta(days=1)
         PredictionJobLoop(context, model_type=model_type).map(
             check_kpi_task,
@@ -77,9 +77,9 @@ def check_kpi_task(
 ) -> None:
     # Apply default parameters if none are provided
     if start_time is None:
-        start_time = datetime.utcnow() - timedelta(days=1)
+        start_time = datetime.now(tz=UTC) - timedelta(days=1)
     if end_time is None:
-        end_time = datetime.utcnow()
+        end_time = datetime.now(tz=UTC)
     # Get realised load data
     realised = context.database.get_load_pid(pj["id"], start_time, end_time, "15T")

{openstef-3.4.60 → openstef-3.4.62}/openstef/tasks/create_basecase_forecast.py RENAMED Viewed

@@ -16,7 +16,7 @@ Example:
         $ python create_basecase_forecast.py
 """
-from datetime import datetime, timedelta
+from datetime import datetime, timedelta, UTC
 from pathlib import Path
 import pandas as pd
@@ -68,8 +68,8 @@ def create_basecase_forecast_task(
         return
     # Define datetime range for input data
-    datetime_start = datetime.utcnow() - timedelta(days=t_behind_days)
-    datetime_end = datetime.utcnow() + timedelta(days=t_ahead_days)
+    datetime_start = datetime.now(tz=UTC) - timedelta(days=t_behind_days)
+    datetime_end = datetime.now(tz=UTC) + timedelta(days=t_ahead_days)
     # Retrieve input data
     input_data = context.database.get_model_input(
@@ -87,7 +87,7 @@ def create_basecase_forecast_task(
     basecase_forecast = basecase_forecast.loc[
         basecase_forecast.index
         > (
-            pd.to_datetime(datetime.utcnow(), utc=True)
+            pd.to_datetime(datetime.now(tz=UTC), utc=True)
             + timedelta(minutes=pj.horizon_minutes)
         ),
         :,

{openstef-3.4.60 → openstef-3.4.62}/openstef/tasks/create_components_forecast.py RENAMED Viewed

@@ -22,7 +22,7 @@ Example:
 """
 import logging
-from datetime import datetime, timedelta, timezone
+from datetime import datetime, timedelta, UTC
 from pathlib import Path
 import pandas as pd
@@ -76,8 +76,8 @@ def create_components_forecast_task(
         return
     # Define datetime range for input data
-    datetime_start = datetime.utcnow() - timedelta(days=t_behind_days)
-    datetime_end = datetime.utcnow() + timedelta(days=t_ahead_days)
+    datetime_start = datetime.now(tz=UTC) - timedelta(days=t_behind_days)
+    datetime_end = datetime.now(tz=UTC) + timedelta(days=t_ahead_days)
     logger.info(
         "Get predicted load", datetime_start=datetime_start, datetime_end=datetime_end
@@ -120,9 +120,7 @@ def create_components_forecast_task(
     logger.debug("Written forecast to database")
     # Check if forecast was complete enough, otherwise raise exception
-    if forecasts.index.max() < datetime.utcnow().replace(
-        tzinfo=timezone.utc
-    ) + timedelta(hours=30):
+    if forecasts.index.max() < datetime.now(tz=UTC) + timedelta(hours=30):
         # Check which input data is missing the most.
         # Do this by counting the NANs for (load)forecast, radiation and windspeed
         max_index = forecasts.index.max()

{openstef-3.4.60 → openstef-3.4.62}/openstef/tasks/create_forecast.py RENAMED Viewed

@@ -20,7 +20,7 @@ Example:
         $ python create_forecast.py
 """
-from datetime import datetime, timedelta
+from datetime import datetime, timedelta, UTC
 from pathlib import Path
 from openstef.data_classes.prediction_job import PredictionJobDataClass
@@ -73,8 +73,8 @@ def create_forecast_task(
     mlflow_tracking_uri = context.config.paths_mlflow_tracking_uri
     # Define datetime range for input data
-    datetime_start = datetime.utcnow() - timedelta(days=t_behind_days)
-    datetime_end = datetime.utcnow() + timedelta(seconds=pj.horizon_minutes * 60)
+    datetime_start = datetime.now(tz=UTC) - timedelta(days=t_behind_days)
+    datetime_end = datetime.now(tz=UTC) + timedelta(seconds=pj.horizon_minutes * 60)
     # Retrieve input data
     input_data = context.database.get_model_input(

{openstef-3.4.60 → openstef-3.4.62}/openstef/tasks/create_solar_forecast.py RENAMED Viewed

@@ -12,7 +12,7 @@ Example:
         $ python create_solar_forecast
 """
-from datetime import datetime, timedelta
+from datetime import datetime, timedelta, UTC
 from pathlib import Path
 import numpy as np
@@ -23,7 +23,6 @@ from openstef import PROJECT_ROOT
 from openstef.tasks.utils.predictionjobloop import PredictionJobLoop
 from openstef.tasks.utils.taskcontext import TaskContext
-# TODO move to config
 PV_COEFS_FILEPATH = PROJECT_ROOT / "openstef" / "data" / "pv_single_coefs.csv"
@@ -231,7 +230,7 @@ def main(config=None, database=None, **kwargs):
         num_prediction_jobs = len(prediction_jobs)
         # only make customer = Provincie once an hour
-        utc_now_minute = datetime.utcnow().minute
+        utc_now_minute = datetime.now(tz=UTC)().minute
         if utc_now_minute >= 15:
             prediction_jobs = [
                 pj for pj in prediction_jobs if str(pj["name"]).startswith("Provincie")

{openstef-3.4.60 → openstef-3.4.62}/openstef/tasks/optimize_hyperparameters.py RENAMED Viewed

@@ -16,7 +16,7 @@ Example:
         $ python optimize_hyperparameters.py
 """
-from datetime import datetime, timedelta
+from datetime import datetime, timedelta, UTC
 from pathlib import Path
 from openstef.data_classes.prediction_job import PredictionJobDataClass
@@ -88,8 +88,8 @@ def optimize_hyperparameters_task(
         )
         return
-    datetime_start = datetime.utcnow() - timedelta(days=DEFAULT_TRAINING_PERIOD_DAYS)
-    datetime_end = datetime.utcnow()
+    datetime_start = datetime.now(tz=UTC) - timedelta(days=DEFAULT_TRAINING_PERIOD_DAYS)
+    datetime_end = datetime.now(tz=UTC)
     input_data = context.database.get_model_input(
         pid=pj["id"],

{openstef-3.4.60 → openstef-3.4.62}/openstef/tasks/split_forecast.py RENAMED Viewed

@@ -23,7 +23,7 @@ Example:
 """
 import logging
-from datetime import datetime
+from datetime import datetime, UTC
 from pathlib import Path
 import numpy as np
@@ -93,7 +93,6 @@ def split_forecast_task(
     components, coefdict = find_components(input_split_function)
     # Calculate mean absolute error (MAE)
-    # TODO: use a standard metric function for this
     error = components[["load", "Inschatting"]].diff(axis=1).iloc[:, 1]
     mae = error.abs().mean()
     coefdict.update({"MAE": mae})
@@ -183,7 +182,7 @@ def convert_coefdict_to_coefsdf(
         pj["id"],
         input_split_function.index.min().date(),
         input_split_function.index.max().date(),
-        datetime.utcnow(),
+        datetime.now(tz=UTC),
     ]
     coefsdf = pd.DataFrame(
         {"coef_name": list(coefdict.keys()), "coef_value": list(coefdict.values())}
@@ -237,7 +236,7 @@ def find_components(
     # Carry out fitting
     # See https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.curve_fit.html # noqa
-    coefs, cov = scipy.optimize.curve_fit(
+    coefs, _ = scipy.optimize.curve_fit(
         weighted_sum,
         xdata=df.iloc[:, 1:].values.T,
         ydata=load.values,

{openstef-3.4.60 → openstef-3.4.62}/openstef/tasks/train_model.py RENAMED Viewed

@@ -19,7 +19,7 @@ Example:
         $ python model_train.py
 """
-from datetime import datetime, timedelta
+from datetime import datetime, timedelta, UTC
 from pathlib import Path
 import pandas as pd
@@ -123,7 +123,7 @@ def train_model_task(
     )
     if datetime_end is None:
-        datetime_end = datetime.utcnow()
+        datetime_end = datetime.now(tz=UTC)
     if datetime_start is None:
         datetime_start = datetime_end - timedelta(days=training_period_days_to_fetch)
@@ -184,9 +184,9 @@ def train_model_task(
                     "'save_train_forecasts option was activated.'"
                 )
             context.database.write_train_forecasts(pj, data_sets)
-            context.logger.debug(f"Saved Forecasts from trained model on datasets")
+            context.logger.debug("Saved Forecasts from trained model on datasets")
     except SkipSaveTrainingForecasts:
-        context.logger.debug(f"Skip saving forecasts")
+        context.logger.debug("Skip saving forecasts")
     except InputDataOngoingZeroFlatlinerError:
         if (
             context.config.known_zero_flatliners
@@ -213,7 +213,7 @@ def main(model_type=None, config=None, database=None):
         model_type = [ml.value for ml in ModelType]
     taskname = Path(__file__).name.replace(".py", "")
-    datetime_now = datetime.utcnow()
+    datetime_now = datetime.now(tz=UTC)
     with TaskContext(taskname, config, database) as context:
         PredictionJobLoop(context, model_type=model_type).map(
             train_model_task, context, datetime_end=datetime_now

{openstef-3.4.60 → openstef-3.4.62}/openstef/validation/validation.py RENAMED Viewed

@@ -3,7 +3,7 @@
 # SPDX-License-Identifier: MPL-2.0
 import logging
 import math
-from datetime import datetime, timedelta
+from datetime import datetime, timedelta, UTC
 from typing import Union
 import numpy as np
@@ -203,7 +203,7 @@ def calc_completeness_features(
         df_copy.drop("horizon", inplace=True, axis=1)
     if weights is None:
-        weights = np.array([1] * ((len(df_copy.columns))))
+        weights = np.array([1] * (len(df_copy.columns)))
     length_weights = len(weights)
     length_features = len(df_copy.columns)
@@ -243,7 +243,7 @@ def detect_ongoing_zero_flatliner(
     """
     # remove all timestamps in the future
-    load = load[load.index.tz_localize(None) <= datetime.utcnow()]
+    load = load[load.index <= datetime.now(tz=UTC)]
     latest_measurement_time = load.dropna().index.max()
     latest_measurements = load[
         latest_measurement_time - timedelta(minutes=duration_threshold_minutes) :
@@ -297,9 +297,10 @@ def calc_completeness_dataframe(
         # timecols: {delay:number of points expected to be missing}
         # number of points expected to be missing = numberOfPointsUpToTwoDaysAhead - numberOfPointsAvailable
         timecols = {
-            x: len(df) - eval(x[2:].replace("min", "/60").replace("d", "*24.0")) / 0.25
-            for x in df.columns
-            if x[:2] == "T-"
+            column: len(df)
+            - eval(column[2:].replace("min", "/60").replace("d", "*24.0")) / 0.25
+            for column in df.columns
+            if column.startswith("T-")
         }
         non_na_count = df.count()

{openstef-3.4.60 → openstef-3.4.62}/openstef.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: openstef
-Version: 3.4.60
+Version: 3.4.62
 Summary: Open short term energy forecaster
 Home-page: https://github.com/OpenSTEF/openstef
 Author: Alliander N.V

{openstef-3.4.60 → openstef-3.4.62}/setup.py RENAMED Viewed

@@ -29,7 +29,7 @@ def read_long_description_from_readme():
 setup(
     name="openstef",
-    version="3.4.60",
+    version="3.4.62",
     packages=find_packages(include=["openstef", "openstef.*"]),
     description="Open short term energy forecaster",
     long_description=read_long_description_from_readme(),

openstef-3.4.60/openstef/data_classes/prediction_job.py DELETED Viewed

@@ -1,135 +0,0 @@
-# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
-#
-# SPDX-License-Identifier: MPL-2.0
-"""Specifies the prediction job dataclass."""
-from typing import Optional, Union
-from pydantic.v1 import BaseModel
-from openstef.data_classes.data_prep import DataPrepDataClass
-from openstef.data_classes.model_specifications import ModelSpecificationDataClass
-from openstef.data_classes.split_function import SplitFuncDataClass
-from openstef.enums import PipelineType, BiddingZone, AggregateFunction
-class PredictionJobDataClass(BaseModel):
-    """Holds all information about the specific forecast that has to be made."""
-    id: Union[int, str]
-    """The predictions job id (often abreviated as pid)."""
-    model: str
-    """The model type that should be used.
-    Options are:
-        - ``"xgb"``
-        - ``"xgb_quantile"``
-        - ``"lgb"``
-        - ``"linear"``
-        - ``"linear_quantile"``
-        - ``"gblinear_quantile"``
-        - ``"xgb_multioutput_quantile"``
-        - ``"flatliner"``
-    If unsure what to pick, choose ``"xgb"``.
-    """
-    model_kwargs: Optional[dict]
-    """The model parameters that should be used."""
-    forecast_type: str
-    """The type of forecasts that should be made.
-    Options are:
-        - ``"demand"``
-        - ``"wind"``
-        - ``"basecase"``
-    If unsure what to pick, choose ``"demand"``.
-    """
-    horizon_minutes: Optional[int] = 2880
-    """The horizon of the desired forecast in minutes used in tasks. Defaults to 2880 minutes (i.e. 2 days)."""
-    resolution_minutes: int
-    """The resolution of the desired forecast in minutes."""
-    lat: Optional[float] = 52.132633
-    """Latitude of the forecasted location in degrees. Used for fetching weather data in tasks, calculating derrived features and component splitting."""
-    lon: Optional[float] = 5.291266
-    """Longitude of the forecasted location in degrees. Used for fetching weather data in tasks, calculating derrived features and component splitting."""
-    name: str
-    """Bidding zone is used to determine the electricity price. It is also used to determine the holidays that should be used. Currently only ENTSO-E bidding zones are supported."""
-    electricity_bidding_zone: Optional[BiddingZone] = BiddingZone.NL
-    """Name of the forecast, e.g. the location name."""
-    train_components: Optional[bool]
-    """Whether splitting the forecasts in wind, solar, rest is desired."""
-    description: Optional[str]
-    """Optional description of the prediction job for human reference."""
-    quantiles: Optional[list[float]]
-    """Quantiles that have to be forecasted."""
-    train_split_func: Optional[SplitFuncDataClass]
-    """Optional custom splitting function for operational procces."""
-    backtest_split_func: Optional[SplitFuncDataClass]
-    """Optional custom splitting function for backtesting."""
-    train_horizons_minutes: Optional[list[int]]
-    """List of horizons that should be taken into account during training."""
-    default_modelspecs: Optional[ModelSpecificationDataClass]
-    """Default model specifications"""
-    save_train_forecasts: bool = False
-    """Indicate wether the forecasts produced during the training process should be saved."""
-    completeness_threshold: float = 0.5
-    """Minimum fraction of data that should be available for making a regular forecast."""
-    minimal_table_length: int = 100
-    """Minimum length (in rows) of the forecast input for making a regular forecast."""
-    flatliner_threshold_minutes: int = 1440
-    """Number of minutes that the load has to be constant to detect a flatliner. """
-    data_balancing_ratio: Optional[float] = None
-    """If data balancing is enabled, the data will be balanced with data from 1 year
-    ago in the future."""
-    rolling_aggregate_features: Optional[list[AggregateFunction]] = None
-    """If not None, rolling aggregate(s) of load will be used as features in the model."""
-    depends_on: Optional[list[Union[int, str]]]
-    """Link to another prediction job on which this prediction job might depend."""
-    sid: Optional[str]
-    """Only required for create_solar_forecast task"""
-    turbine_type: Optional[str]
-    """Only required for create_wind_forecast task"""
-    n_turbines: Optional[float]
-    """Only required for create_wind_forecast task"""
-    hub_height: Optional[float]
-    """Only required for create_wind_forecast task"""
-    pipelines_to_run: list[PipelineType] = [
-        PipelineType.TRAIN,
-        PipelineType.HYPER_PARMATERS,
-        PipelineType.FORECAST,
-    ]
-    """The pipelines to run for this pj"""
-    alternative_forecast_model_pid: Optional[Union[int, str]]
-    """The pid that references another prediction job from which the model should be used for making forecasts."""
-    data_prep_class: Optional[DataPrepDataClass]
-    """The import string for the custom data prep class"""
-    class Config:
-        """Pydantic model configuration.
-        This following configuration is needed to prevent ids in "depends_on" to be converted from int to str when we
-        use integer ids.
-        """
-        smart_union = True
-    def __getitem__(self, item: str) -> any:
-        """Allows us to use subscription to get the items from the object."""
-        return getattr(self, item)
-    def __setitem__(self, key: str, value: any) -> None:
-        """Allows us to use subscription to set the items in the object."""
-        if hasattr(self, key):
-            self.__dict__[key] = value
-        else:
-            raise AttributeError(f"{key} not an attribute of prediction job.")
-    def get(self, key: str, default: any = None) -> any:
-        """Allows to use the get functions similar to a python dict."""
-        if hasattr(self, key):
-            return getattr(self, key)
-        else:
-            return default