PyPI - openstef - Versions diffs - 3.4.77__py3-none-any.whl → 3.4.79__py3-none-any.whl - Mend

openstef 3.4.77py3-none-any.whl → 3.4.79py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

openstef/data_classes/prediction_job.py CHANGED Viewed

@@ -140,6 +140,10 @@ class PredictionJobDataClass(BaseModel):
     data_prep_class: Optional[DataPrepDataClass] = Field(
         None, description="The import string for the custom data prep class"
     )
+    model_run_id: Optional[str] = Field(
+        None,
+        description="The specific model run number that should be used for the forecast. If not set, the latest model run will be used.",
+    )
     fallback_strategy: Optional[FallbackStrategy] = Field(
         FallbackStrategy.EXTREME_DAY,

openstef/feature_engineering/holiday_features.py CHANGED Viewed

@@ -1,8 +1,8 @@
 # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
 #
 # SPDX-License-Identifier: MPL-2.0
-"""This module contains all holiday related features."""
 from datetime import datetime, timedelta
+import collections
 import holidays
 import numpy as np
@@ -26,7 +26,6 @@ def generate_holiday_feature_functions(
         2022-12-24 - 2023-01-08 is the 'Kerstvakantie'
         2022-10-15 - 2022-10-23 is the 'HerfstvakantieNoord'
     The holidays are based on a manually generated csv file.
     The information is collected using:
     https://www.schoolvakanties-nederland.nl/ and the python holiday function
@@ -44,7 +43,6 @@ def generate_holiday_feature_functions(
         - Pinksteren
         - Kerst
     The 'Brugdagen' are updated untill dec 2020. (Generated using agenda)
     Args:
@@ -83,23 +81,34 @@ def generate_holiday_feature_functions(
             )
         }
     )
     # Define empty list to keep track of bridgedays
     bridge_days = []
-    # Loop over list of holidays names
+    # Group holiday dates by name
+    holiday_dates_by_name = collections.defaultdict(list)
     for date, holiday_name in sorted(country_holidays.items()):
-        # Define function explicitely to mitigate 'late binding' problem
-        def make_holiday_func(requested_date):
-            return lambda x: np.isin(x.index.date, np.array([requested_date]))
+        holiday_dates_by_name[holiday_name].append(date)
-        # Create lag function for each holiday
+    # Create one function per holiday name that checks all dates for that holiday
+    for holiday_name, dates in holiday_dates_by_name.items():
+        # Use a default argument to capture the dates at definition time
         holiday_functions.update(
-            {"is_" + holiday_name.replace(" ", "_").lower(): make_holiday_func(date)}
+            {
+                "is_"
+                + holiday_name.replace(
+                    " ", "_"
+                ).lower(): lambda x, dates_local=dates: np.isin(
+                    x.index.date, np.array(dates_local)
+                )
+            }
         )
-        # Check for bridge day
-        holiday_functions, bridge_days = check_for_bridge_day(
-            date, holiday_name, country_code, years, holiday_functions, bridge_days
-        )
+        # Check for bridge days for each date of this holiday
+        for date in dates:
+            holiday_functions, bridge_days = check_for_bridge_day(
+                date, holiday_name, country_code, years, holiday_functions, bridge_days
+            )
     # Add feature function that includes all bridgedays
     holiday_functions.update(
@@ -108,7 +117,7 @@ def generate_holiday_feature_functions(
     # Add school holidays if country is NL
     if country_code == "NL":
-        # Manully generated csv including all dutch schoolholidays for different regions
+        # Manually generated csv including all dutch schoolholidays for different regions
         df_holidays = pd.read_csv(path_to_school_holidays_csv, index_col=None)
         df_holidays["datum"] = pd.to_datetime(df_holidays.datum).apply(
             lambda x: x.date()
@@ -125,19 +134,17 @@ def generate_holiday_feature_functions(
         # Loop over list of holidays names
         for holiday_name in list(set(df_holidays.name)):
-            # Define function explicitely to mitigate 'late binding' problem
-            def make_holiday_func(holidayname=holiday_name):
-                return lambda x: np.isin(
-                    x.index.date,
-                    df_holidays.datum[df_holidays.name == holidayname].values,
-                )
-            # Create lag function for each holiday
+            # Use the holidayname as a default argument to capture it at definition time
             holiday_functions.update(
                 {
                     "is_"
-                    + holiday_name.replace(" ", "_").lower(): make_holiday_func(
-                        holidayname=holiday_name
+                    + holiday_name.replace(
+                        " ", "_"
+                    ).lower(): lambda x, holiday_name_local=holiday_name: np.isin(
+                        x.index.date,
+                        df_holidays.datum[
+                            df_holidays.name == holiday_name_local
+                        ].values,
                     )
                 }
             )
@@ -178,9 +185,10 @@ def check_for_bridge_day(
     if date in country_holidays:
         return holiday_functions, bridge_days
-    # Define function explicitely to mitigate 'late binding' problem
+    # Define function explicitly to mitigate 'late binding' problem
+    # Use a default argument to capture the date at definition time
     def make_holiday_func(requested_date):
-        return lambda x: np.isin(x.index.date, np.array([requested_date]))
+        return lambda x, dt=requested_date: np.isin(x.index.date, np.array([dt]))
     # Looking forward: If day after tomorow is a national holiday or
     # a saturday check if tomorow is not a national holiday

openstef/model/regressors/median.py CHANGED Viewed

@@ -304,9 +304,11 @@ class MedianRegressor(OpenstfRegressor, RegressorMixin):
         Which lag features are used is determined by the feature engineering step.
         """
-        feature_names, frequency, feature_to_lags_in_min = (
-            self._extract_and_validate_lags(x)
-        )
+        (
+            feature_names,
+            frequency,
+            feature_to_lags_in_min,
+        ) = self._extract_and_validate_lags(x)
         self.feature_names_ = list(feature_names)
         self.frequency_ = frequency

openstef/model/serializer.py CHANGED Viewed

@@ -18,6 +18,7 @@ from mlflow.store.artifact.artifact_repository_registry import get_artifact_repo
 from xgboost import XGBModel  # Temporary for backward compatibility
 from openstef.data_classes.model_specifications import ModelSpecificationDataClass
+from openstef.data_classes.prediction_job import PredictionJobDataClass
 from openstef.logging.logger_factory import get_logger
 from openstef.metrics.reporter import Report
 from openstef.model.regressors.regressor import OpenstfRegressor
@@ -143,20 +144,30 @@ class MLflowSerializer:
     def load_model(
         self,
         experiment_name: str,
+        model_run_id: Optional[str] = None,
     ) -> tuple[OpenstfRegressor, ModelSpecificationDataClass]:
-        """Load sklearn compatible model from MLFlow.
+        """ Load an sklearn-compatible model from MLflow.
+        This method retrieves a trained model and its specifications from MLflow
+        based on the provided PredictionJobDataClass instance. It supports loading
+        a specific model run if a run number is provided.
         Args:
-            experiment_name: Name of the experiment, often the id of the predition job.
+                experiment_name (str): Name of the experiment, often the id of the predition job.
+                model_run_id (Optional[str]): The specific model run number that should be used for the forecast.
+        Returns:
+            tuple[OpenstfRegressor, ModelSpecificationDataClass]: A tuple containing
+                the loaded model and its specifications.
-        Raises:
-            LookupError: If model is not found in MLflow.
+            LookupError: If the model is not found in MLflow or if an error occurs
+                during the loading process.
         """
         try:
             models_df = self._find_models(
-                self.experiment_name_prefix + experiment_name, max_results=1
-            )  # return the latest finished run of the model
+                self.experiment_name_prefix + experiment_name, max_results=1, model_run_id=model_run_id)
+             # return the latest finished run of the model
             if not models_df.empty:
                 latest_run = models_df.iloc[0]  # Use .iloc[0] to only get latest run
             else:
@@ -172,7 +183,7 @@ class MLflowSerializer:
             )  # Path without file:///
             self.logger.info("Model successfully loaded with MLflow")
             return loaded_model, model_specs
-        except (AttributeError, MlflowException, OSError) as exception:
+        except (AttributeError, MlflowException, OSError) as exception:
             raise LookupError("Model not found. First train a model!") from exception
     def get_model_age(
@@ -205,8 +216,13 @@ class MLflowSerializer:
         experiment_name: str,
         max_results: Optional[int] = 100,
         filter_string: str = "attribute.status = 'FINISHED'",
+        model_run_id: Optional[int] = None,
     ) -> pd.DataFrame:
         """Finds trained models for specific experiment_name sorted by age in descending order."""
+        if model_run_id is not None:
+            filter_string += f" AND attributes.run_id = '{model_run_id}'"
         models_df = mlflow.search_runs(
             experiment_names=[experiment_name],
             max_results=max_results,

openstef/pipeline/create_forecast.py CHANGED Viewed

@@ -52,11 +52,10 @@ def create_forecast_pipeline(
     # Use the alternative forecast model if it's specify in the pj
     if pj.alternative_forecast_model_pid:
         prediction_model_pid = pj.alternative_forecast_model_pid
     # Load most recent model for the given pid
     model, model_specs = MLflowSerializer(
         mlflow_tracking_uri=mlflow_tracking_uri
-    ).load_model(experiment_name=str(prediction_model_pid))
+    ).load_model(experiment_name=str(prediction_model_pid), model_run_id=pj.get("model_run_id"))
     return create_forecast_pipeline_core(pj, input_data, model, model_specs)

openstef/pipeline/train_model.py CHANGED Viewed

@@ -52,6 +52,7 @@ def train_model_pipeline(
         check_old_model_age: Check if training should be skipped because the model is too young
         mlflow_tracking_uri: Tracking URI for MLFlow
         artifact_folder: Path where artifacts, such as trained models, are stored
+        ignore_existing_models: If True, a new model is trained as if no old model exists.
     Returns:
         If pj.save_train_forecasts is False, None is returned
@@ -168,6 +169,7 @@ def train_model_pipeline_core(
         input_data: Input data
         old_model: Old model to compare to. Defaults to None.
         horizons: Horizons to train on in hours, relevant for feature engineering.
+        ignore_existing_models: If True, all existing models, including, hyperparameters are ignored and defsault values are used.
     Raises:
         InputDataInsufficientError: when input data is insufficient.
@@ -319,8 +321,9 @@ def train_pipeline_step_load_model(
     old_model: Optional[OpenstfRegressor]
     if not ignore_existing_models:
+        model_run_id = pj.get("model_run_id", None)
         try:
-            old_model, model_specs = serializer.load_model(experiment_name=str(pj.id))
+            old_model, model_specs = serializer.load_model(str(pj.id), model_run_id=model_run_id)
             old_model_age = old_model.age  # Age attribute is openstef specific
             return old_model, model_specs, old_model_age
         except (AttributeError, FileNotFoundError, LookupError):

{openstef-3.4.77.dist-info → openstef-3.4.79.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: openstef
-Version: 3.4.77
+Version: 3.4.79
 Summary: Open short term energy forecaster
 Home-page: https://github.com/OpenSTEF/openstef
 Author: Alliander N.V

{openstef-3.4.77.dist-info → openstef-3.4.79.dist-info}/RECORD RENAMED Viewed

@@ -17,7 +17,7 @@ openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md.license,sha25
 openstef/data_classes/__init__.py,sha256=bIyGTSA4V5VoOLTwdaiJJAnozmpSzvQooVYlsf8H4eU,163
 openstef/data_classes/data_prep.py,sha256=sANgFjfwmSWhLCfmLjfqXQnczuvVZfk2765jZd7LwuE,3691
 openstef/data_classes/model_specifications.py,sha256=PZeBLfH_MrP9-QorL1r0Hklp0befE8Nw05vNhTX9Y20,1338
-openstef/data_classes/prediction_job.py,sha256=HS2ZjhOoF4EdQRttOiTM0W1E7z5ZNjBglMpcSSZvoCY,6967
+openstef/data_classes/prediction_job.py,sha256=794joix2ynvCYvm-MbiA5eagT46CArr_n_K5UrVoFBs,7166
 openstef/data_classes/split_function.py,sha256=K8y1dsQC5exeIDh37f7UwJ11tV71_uVSNbnKmwXpnOM,3435
 openstef/feature_engineering/__init__.py,sha256=bIyGTSA4V5VoOLTwdaiJJAnozmpSzvQooVYlsf8H4eU,163
 openstef/feature_engineering/apply_features.py,sha256=pro4eUmOFexX_9g9kJtDcbrQ1hWKzXjVpiJBmmBi89o,5326
@@ -27,7 +27,7 @@ openstef/feature_engineering/data_preparation.py,sha256=TXAPTtSmBRC_LZP7o5Jlmj7J
 openstef/feature_engineering/feature_adder.py,sha256=aSqDl_gUrB3H2TD3cNvU5JniY_KOb4u4a2A6J7zB2BQ,6835
 openstef/feature_engineering/feature_applicator.py,sha256=bU1Pu5V1fxMCQCwh6HG66nmctBjrNa7gHUYqOqPmLTU,7501
 openstef/feature_engineering/general.py,sha256=PdvnDqkze31FggUuWHQ1ysroh_uDOa1hZ7NftMYH2_U,4130
-openstef/feature_engineering/holiday_features.py,sha256=CbolIP5bfiQkqDct-9TbD828-lhC48bfeNQ2-VFnsJA,8274
+openstef/feature_engineering/holiday_features.py,sha256=g3VBj9oU3wmp82iKcknX41S_7Z4tGIjlvgbZOcFqQaw,8572
 openstef/feature_engineering/lag_features.py,sha256=Dr6qS8UhdgEHPZZSe-w6ibtjl_lcbcQohhqdZN9fqEU,5652
 openstef/feature_engineering/missing_values_transformer.py,sha256=U8pdA61k8CRosO3yR2IsCy5C4Ka3c8BWCimDLIB4LCQ,5010
 openstef/feature_engineering/rolling_features.py,sha256=V-UulqWKuSksFQAASyVSQim1stEA4TmtHNULCrrdgjo,2160
@@ -49,7 +49,7 @@ openstef/model/fallback.py,sha256=x60GVyl1c5DpebzkjJEMToZpMTD1c4FrhM-tBN9uizk,31
 openstef/model/model_creator.py,sha256=fnhcVGUHskbuAys5kjlJ4GXKxbi9Eq5eAA19ex11Vv0,6658
 openstef/model/objective.py,sha256=0PZUbPzuyaYlpWEH_qPavO6ll7zwqTTUTfIrUzzFMbs,15585
 openstef/model/objective_creator.py,sha256=3jJgcmY1sm-Yoe3SfjKrJukrsqtYyloUFaPbBWqswhQ,2208
-openstef/model/serializer.py,sha256=k5GY8eRJdlii8mEY7Qheu4yb5USyIyxw77EYkSQJGYk,17034
+openstef/model/serializer.py,sha256=8vESYq2TmtEzEViBR7qbJ3rjm68LZkbiET2cUPGvFMs,17925
 openstef/model/standard_deviation_generator.py,sha256=OorRvX2wRScU7f4SIBoiT24yJeeM50sETP3xC6m5IG4,2865
 openstef/model/metamodels/__init__.py,sha256=bIyGTSA4V5VoOLTwdaiJJAnozmpSzvQooVYlsf8H4eU,163
 openstef/model/metamodels/feature_clipper.py,sha256=DNsyYdjUT7ZNimJJIyTvv1nmwTwDUk5fX9EDgV9FbUQ,2862
@@ -64,7 +64,7 @@ openstef/model/regressors/gblinear_quantile.py,sha256=PKQL_TAXa3Kw9oZrKC6Uvo_n2N
 openstef/model/regressors/lgbm.py,sha256=zCdn1euEdSFxYJzH8XqQFFnb6R4JVUnmineKjX_Gy-g,800
 openstef/model/regressors/linear.py,sha256=uOvZMLGZH_9nXfmS5honCMfyVeyGXP1Cza9A_BdXlVw,3665
 openstef/model/regressors/linear_quantile.py,sha256=zIpGo9deMeTZdwFWoZ3FstX74mYdlAhfg-YOsPRFl0k,10534
-openstef/model/regressors/median.py,sha256=i6nqSsKHnMxA06Ea6SNWIn4f8lvAaMz58Smx3bZ731E,14132
+openstef/model/regressors/median.py,sha256=f_yZWuJXAUbGbHAIMqpIAFSaUi0GnEe55DgFWGo7S5U,14157
 openstef/model/regressors/regressor.py,sha256=0um575rTEkzYb1E5IAOuTlsZDhmb7eI5byu5e062NRs,3469
 openstef/model/regressors/xgb.py,sha256=uhV9Wm90aOkjByTm-O2xpt2kpANRxAqQvv5mA0H1uBc,1294
 openstef/model/regressors/xgb_multioutput_quantile.py,sha256=xWzA7tymC_o-F1OS3I7vUKf9zP6RR1ZglEeY4NAgjU0,9146
@@ -77,10 +77,10 @@ openstef/monitoring/teams.py,sha256=klN7Ge-0VktJbZ_I-K8MJIc3LWgdNy0MGL8b2TdoUR8,
 openstef/pipeline/__init__.py,sha256=bIyGTSA4V5VoOLTwdaiJJAnozmpSzvQooVYlsf8H4eU,163
 openstef/pipeline/create_basecase_forecast.py,sha256=7IShIjEmjkzpNzWzQVKmYQvy0q_uwCGO-E0mSRmGdhw,4397
 openstef/pipeline/create_component_forecast.py,sha256=40fYKajdj4F9K7fzmL3euyvwTr0v-oO_5cXpya87A0c,5839
-openstef/pipeline/create_forecast.py,sha256=rLGU7DXqAQNH_pkqIF8tvjOq0NldnKTKH2sylLrNiRg,5640
+openstef/pipeline/create_forecast.py,sha256=z18MrnMW6f85mLjH9XKLniuCQ9oziWCqfgA5YdEgROM,5676
 openstef/pipeline/optimize_hyperparameters.py,sha256=w5LpZhW3KVklCJzaogNzyHfpMJfNqeRAnvyV4vi35wg,10953
 openstef/pipeline/train_create_forecast_backtest.py,sha256=hBJPxfDkbrmFSSGZrRH1vTiIVqJP-SWe0ibVpHT_8Qg,6048
-openstef/pipeline/train_model.py,sha256=O1pyATMQUkNZQ01FlOwG8r3gtKwRcx7YD73f-91umuo,19948
+openstef/pipeline/train_model.py,sha256=4mtNXosLxxLNDtyIBd58youAHx5zWIW7PoSeZdtDoXY,20234
 openstef/pipeline/utils.py,sha256=23mB31p19FoGWelLJzxNmqlzGwEr3fCDBEA37V2kpYY,2167
 openstef/plotting/__init__.py,sha256=KQjXzyafCt1bE7XDrSeV4TDUIO7MkwN_Br4ASOcNI2g,163
 openstef/plotting/load_forecast_plotter.py,sha256=GWHVmUB2YosNj7TnSrMnxYAfM2Z1mNg5oRV9A_lJmQY,8129
@@ -104,8 +104,8 @@ openstef/tasks/utils/predictionjobloop.py,sha256=Ysy3zF5lzPMz_asYDKeF5m0qgVT3tCt
 openstef/tasks/utils/taskcontext.py,sha256=O-LZ_wHEl5vbT8oB7EYtOeMkvk6EqCnI1-KiyER7Eu4,5407
 openstef/validation/__init__.py,sha256=bIyGTSA4V5VoOLTwdaiJJAnozmpSzvQooVYlsf8H4eU,163
 openstef/validation/validation.py,sha256=r6UqkdH5TMjsGfn8Ta07K1jkqmrVmwcPGfyQvMmZyO4,11459
-openstef-3.4.77.dist-info/licenses/LICENSE,sha256=7Pm2fWFFHHUG5lDHed1vl5CjzxObIXQglnYsEdtjo_k,14907
-openstef-3.4.77.dist-info/METADATA,sha256=d8ogfqGlSvJIz8uiS3H2n66frGodeTDiGS4dAhjkve0,8834
-openstef-3.4.77.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-openstef-3.4.77.dist-info/top_level.txt,sha256=kD0H4PqrQoncZ957FvqwfBxa89kTrun4Z_RAPs_HhLs,9
-openstef-3.4.77.dist-info/RECORD,,
+openstef-3.4.79.dist-info/licenses/LICENSE,sha256=7Pm2fWFFHHUG5lDHed1vl5CjzxObIXQglnYsEdtjo_k,14907
+openstef-3.4.79.dist-info/METADATA,sha256=zfFVPR_RhCyKZ50LSCxuA46CI8L8d2tIJH02ryc9bUk,8834
+openstef-3.4.79.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+openstef-3.4.79.dist-info/top_level.txt,sha256=kD0H4PqrQoncZ957FvqwfBxa89kTrun4Z_RAPs_HhLs,9
+openstef-3.4.79.dist-info/RECORD,,

{openstef-3.4.77.dist-info → openstef-3.4.79.dist-info}/WHEEL RENAMED Viewed

File without changes

{openstef-3.4.77.dist-info → openstef-3.4.79.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{openstef-3.4.77.dist-info → openstef-3.4.79.dist-info}/top_level.txt RENAMED Viewed

File without changes

openstef 3.4.77__py3-none-any.whl → 3.4.79__py3-none-any.whl

openstef 3.4.77py3-none-any.whl → 3.4.79py3-none-any.whl