openstef 3.4.56__py3-none-any.whl → 4.0.0a3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. openstef-4.0.0a3.dist-info/METADATA +177 -0
  2. openstef-4.0.0a3.dist-info/RECORD +4 -0
  3. {openstef-3.4.56.dist-info → openstef-4.0.0a3.dist-info}/WHEEL +1 -2
  4. openstef/__init__.py +0 -14
  5. openstef/__main__.py +0 -3
  6. openstef/app_settings.py +0 -19
  7. openstef/data/NL_terrestrial_radiation.csv +0 -25585
  8. openstef/data/NL_terrestrial_radiation.csv.license +0 -3
  9. openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z +0 -0
  10. openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z.license +0 -3
  11. openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md +0 -18
  12. openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md.license +0 -3
  13. openstef/data/dutch_holidays.csv +0 -1759
  14. openstef/data/dutch_holidays.csv.license +0 -3
  15. openstef/data/pv_single_coefs.csv +0 -601
  16. openstef/data/pv_single_coefs.csv.license +0 -3
  17. openstef/data_classes/__init__.py +0 -3
  18. openstef/data_classes/data_prep.py +0 -99
  19. openstef/data_classes/model_specifications.py +0 -30
  20. openstef/data_classes/prediction_job.py +0 -135
  21. openstef/data_classes/split_function.py +0 -97
  22. openstef/enums.py +0 -140
  23. openstef/exceptions.py +0 -74
  24. openstef/feature_engineering/__init__.py +0 -3
  25. openstef/feature_engineering/apply_features.py +0 -138
  26. openstef/feature_engineering/bidding_zone_to_country_mapping.py +0 -106
  27. openstef/feature_engineering/cyclic_features.py +0 -161
  28. openstef/feature_engineering/data_preparation.py +0 -152
  29. openstef/feature_engineering/feature_adder.py +0 -206
  30. openstef/feature_engineering/feature_applicator.py +0 -202
  31. openstef/feature_engineering/general.py +0 -141
  32. openstef/feature_engineering/holiday_features.py +0 -231
  33. openstef/feature_engineering/lag_features.py +0 -165
  34. openstef/feature_engineering/missing_values_transformer.py +0 -141
  35. openstef/feature_engineering/rolling_features.py +0 -58
  36. openstef/feature_engineering/weather_features.py +0 -492
  37. openstef/metrics/__init__.py +0 -3
  38. openstef/metrics/figure.py +0 -303
  39. openstef/metrics/metrics.py +0 -486
  40. openstef/metrics/reporter.py +0 -222
  41. openstef/model/__init__.py +0 -3
  42. openstef/model/basecase.py +0 -82
  43. openstef/model/confidence_interval_applicator.py +0 -242
  44. openstef/model/fallback.py +0 -77
  45. openstef/model/metamodels/__init__.py +0 -3
  46. openstef/model/metamodels/feature_clipper.py +0 -90
  47. openstef/model/metamodels/grouped_regressor.py +0 -222
  48. openstef/model/metamodels/missing_values_handler.py +0 -138
  49. openstef/model/model_creator.py +0 -214
  50. openstef/model/objective.py +0 -426
  51. openstef/model/objective_creator.py +0 -65
  52. openstef/model/regressors/__init__.py +0 -3
  53. openstef/model/regressors/arima.py +0 -197
  54. openstef/model/regressors/custom_regressor.py +0 -64
  55. openstef/model/regressors/dazls.py +0 -116
  56. openstef/model/regressors/flatliner.py +0 -95
  57. openstef/model/regressors/gblinear_quantile.py +0 -334
  58. openstef/model/regressors/lgbm.py +0 -29
  59. openstef/model/regressors/linear.py +0 -90
  60. openstef/model/regressors/linear_quantile.py +0 -305
  61. openstef/model/regressors/regressor.py +0 -114
  62. openstef/model/regressors/xgb.py +0 -52
  63. openstef/model/regressors/xgb_multioutput_quantile.py +0 -261
  64. openstef/model/regressors/xgb_quantile.py +0 -228
  65. openstef/model/serializer.py +0 -431
  66. openstef/model/standard_deviation_generator.py +0 -81
  67. openstef/model_selection/__init__.py +0 -3
  68. openstef/model_selection/model_selection.py +0 -311
  69. openstef/monitoring/__init__.py +0 -3
  70. openstef/monitoring/performance_meter.py +0 -92
  71. openstef/monitoring/teams.py +0 -203
  72. openstef/pipeline/__init__.py +0 -3
  73. openstef/pipeline/create_basecase_forecast.py +0 -133
  74. openstef/pipeline/create_component_forecast.py +0 -168
  75. openstef/pipeline/create_forecast.py +0 -171
  76. openstef/pipeline/optimize_hyperparameters.py +0 -317
  77. openstef/pipeline/train_create_forecast_backtest.py +0 -163
  78. openstef/pipeline/train_model.py +0 -561
  79. openstef/pipeline/utils.py +0 -52
  80. openstef/postprocessing/__init__.py +0 -3
  81. openstef/postprocessing/postprocessing.py +0 -275
  82. openstef/preprocessing/__init__.py +0 -3
  83. openstef/preprocessing/preprocessing.py +0 -42
  84. openstef/settings.py +0 -15
  85. openstef/tasks/__init__.py +0 -3
  86. openstef/tasks/calculate_kpi.py +0 -324
  87. openstef/tasks/create_basecase_forecast.py +0 -118
  88. openstef/tasks/create_components_forecast.py +0 -162
  89. openstef/tasks/create_forecast.py +0 -145
  90. openstef/tasks/create_solar_forecast.py +0 -420
  91. openstef/tasks/create_wind_forecast.py +0 -80
  92. openstef/tasks/optimize_hyperparameters.py +0 -135
  93. openstef/tasks/split_forecast.py +0 -273
  94. openstef/tasks/train_model.py +0 -224
  95. openstef/tasks/utils/__init__.py +0 -3
  96. openstef/tasks/utils/dependencies.py +0 -107
  97. openstef/tasks/utils/predictionjobloop.py +0 -243
  98. openstef/tasks/utils/taskcontext.py +0 -160
  99. openstef/validation/__init__.py +0 -3
  100. openstef/validation/validation.py +0 -322
  101. openstef-3.4.56.dist-info/METADATA +0 -154
  102. openstef-3.4.56.dist-info/RECORD +0 -102
  103. openstef-3.4.56.dist-info/top_level.txt +0 -1
  104. /openstef-3.4.56.dist-info/LICENSE → /openstef-4.0.0a3.dist-info/licenses/LICENSE.md +0 -0
@@ -1,82 +0,0 @@
1
- # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
- #
3
- # SPDX-License-Identifier: MPL-2.0
4
- import numpy as np
5
- import pandas as pd
6
- from sklearn.base import BaseEstimator, RegressorMixin
7
-
8
- MINIMAL_RESOLUTION: int = 15 # Used for validating the forecast input
9
-
10
-
11
- class BaseCaseModel(BaseEstimator, RegressorMixin):
12
- def predict(self, forecast_input_data: pd.DataFrame) -> pd.DataFrame:
13
- """Predict using the basecase method. The basecase forecast is determined by the T-7d and T-14d load.
14
-
15
- This means fitting the model is not required. However a fit method is still included to be fully comatible with sklearn.
16
-
17
- Args:
18
- forecast_input_data: Forecast input dataframe
19
-
20
- Returns:
21
- Basecase forecast
22
-
23
- """
24
- return self.make_basecase_forecast(forecast_input_data)
25
-
26
- def fit(self):
27
- return self
28
-
29
- @staticmethod
30
- def make_basecase_forecast(
31
- forecast_input_data: pd.DataFrame, overwrite_delay_hours: int = 48
32
- ) -> pd.DataFrame:
33
- """Make a basecase forecast.
34
-
35
- The idea of the basecase forecast is that if all else fails, this forecasts is
36
- still available. Basecase example: the load of last week.
37
-
38
- Args:
39
- forecast_input_data: Forecast input dataframe
40
- overwrite_delay_hours: times before this in the future are not
41
- forecasted
42
-
43
- Raises:
44
- ValueError: if columns T-7d or T-14d is not present
45
- ValueError: If the start of the forecast is before the horizon of the regular forecast
46
- Returns:
47
- Basecase forecast
48
-
49
- """
50
- # Check if required features are provided
51
- if not all(
52
- item in forecast_input_data.columns.to_list() for item in ["T-14d", "T-7d"]
53
- ):
54
- raise ValueError(
55
- "Could not make basecase, features T-7d and T-14d are required! Tip:"
56
- " Generate these features with a FeatureApplicator object."
57
- )
58
-
59
- # Make basecase forecast: Use load of last week
60
- basecase_forecast = (
61
- forecast_input_data[["T-7d"]].dropna().rename(columns={"T-7d": "forecast"})
62
- )
63
-
64
- # Maybe there is still missing data, for example if the cdb has been down for a
65
- # while in this case, use the load of 2 weeks before
66
- basecase_forecast = pd.concat(
67
- [
68
- basecase_forecast,
69
- forecast_input_data[["T-14d"]]
70
- .dropna()
71
- .rename(columns={"T-14d": "forecast"}),
72
- ]
73
- )
74
- basecase_forecast = basecase_forecast[
75
- np.invert(basecase_forecast.index.duplicated())
76
- ]
77
-
78
- return basecase_forecast.sort_index()
79
-
80
- @property
81
- def can_predict_quantiles(self):
82
- return False
@@ -1,242 +0,0 @@
1
- # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
- #
3
- # SPDX-License-Identifier: MPL-2.0
4
- import logging
5
- from datetime import datetime
6
-
7
- import numpy as np
8
- import pandas as pd
9
- import structlog
10
- from scipy import stats
11
- from sklearn.base import RegressorMixin
12
-
13
- from openstef.data_classes.prediction_job import PredictionJobDataClass
14
- from openstef.exceptions import ModelWithoutStDev
15
- from openstef.settings import Settings
16
-
17
-
18
- class ConfidenceIntervalApplicator:
19
- def __init__(self, model: RegressorMixin, forecast_input_data: pd.DataFrame):
20
- self.model = model
21
- self.forecast_input_data = forecast_input_data
22
- structlog.configure(
23
- wrapper_class=structlog.make_filtering_bound_logger(
24
- logging.getLevelName(Settings.log_level)
25
- )
26
- )
27
- self.logger = structlog.get_logger(self.__class__.__name__)
28
-
29
- def add_confidence_interval(
30
- self,
31
- forecast: pd.DataFrame,
32
- pj: PredictionJobDataClass,
33
- ) -> pd.DataFrame:
34
- """Add a confidence interval to a forecast.
35
-
36
- Adds a confidence interval to a forecast in two ways:
37
- 1. "stdev" column, this is a column with a standard deviation that is
38
- determined during training (ConfidenceGenerator)
39
- 2. Quantile columns, these columns give a more precise defenition of the
40
- confidence interval. Quantile columns are determined with one of two
41
- methods, depending on the model type group:
42
-
43
- a. Default, using the "stdev" column and the assumption the error is
44
- normally distributed.
45
- b. Quantile regression, this method is only available for quantile
46
- models and uses specifically trained models to estimate the
47
- quantiles of the confidence interval.
48
-
49
- Depending on the model type (quantile or non quantile),
50
- a confidence interval is added to the forecast based on quantile
51
- regression or the default method.
52
-
53
- Args:
54
- forecast: Forecast DataFrame with columns: "forecast"
55
- pj: Prediction job
56
-
57
- Returns:
58
- Forecast DataFrame with columns; "forecast", "stdev" and quantile columns.
59
-
60
- """
61
- temp_forecast = self._add_standard_deviation_to_forecast(forecast)
62
-
63
- if self.model.can_predict_quantiles:
64
- # Try to generate the quantiles that were requested
65
- try:
66
- result = self._add_quantiles_to_forecast_quantile_regression(
67
- temp_forecast, pj["quantiles"]
68
- )
69
- return result
70
- except Exception:
71
- # Fallback on quantiles of the model if the requested quantiles cant be generated by the model.
72
- # Can happen when the model was trained on different quantiles than are requested
73
- result = self._add_quantiles_to_forecast_quantile_regression(
74
- temp_forecast, self.model.quantiles
75
- )
76
- self.logger.warning(
77
- "Quantiles are requested the model was not trained on. Using the quantiles the model was trained on",
78
- requested_quantiles=pj["quantiles"],
79
- trained_quantiles=self.model.quantiles,
80
- )
81
- return result
82
-
83
- return self._add_quantiles_to_forecast_default(temp_forecast, pj["quantiles"])
84
-
85
- def _add_standard_deviation_to_forecast(
86
- self, forecast: pd.DataFrame
87
- ) -> pd.DataFrame:
88
- """Add a standard deviation to a live forecast.
89
-
90
- The stdev for intermediate forecast horizons is interpolated.
91
-
92
- Args:
93
- forecast: Forecast DataFrame with columns: "forecast"
94
-
95
- Returns:
96
- Forecast with added standard deviation. DataFrame with columns:
97
- "forecast", "stdev"
98
-
99
- Raises:
100
- ModelWithoutStDev: If the model does not have a valid standard deviation.
101
-
102
- """
103
- minimal_resolution: int = 15 # Minimal time resolution in minutes
104
- standard_deviation = self.model.standard_deviation
105
-
106
- # raise an exception if no valid standard deviation is available
107
- if standard_deviation is None:
108
- raise ModelWithoutStDev("No stdev available")
109
-
110
- if standard_deviation.empty: # make separate statement to avoid None.empty
111
- raise ModelWithoutStDev("No stdev available")
112
-
113
- if standard_deviation.stdev.isnull().values.all():
114
- raise ModelWithoutStDev("All stdev values are NA")
115
-
116
- # Fill stdev nans with the mean of all stdev values
117
- if standard_deviation.stdev.isnull().values.any():
118
- self.logger.warning(
119
- "Stdev for some hours is not known, filling in with mean."
120
- )
121
- standard_deviation["stdev"] = standard_deviation.stdev.fillna(
122
- standard_deviation.stdev.mean()
123
- )
124
-
125
- # pivot to have a dataframe with columns [stdev, hour, horizon] for a
126
- # 'near' and a 'far' horizon
127
- stdev = standard_deviation.pivot_table(columns=["horizon"], index="hour")[
128
- "stdev"
129
- ]
130
- # Prepare input dataframes for near and far horizon
131
- near = stdev.columns.min()
132
- far = stdev.columns.max()
133
-
134
- forecast_copy = forecast.copy()
135
- # add time ahead column if not already present
136
- if "tAhead" not in forecast_copy.columns:
137
- # Determine now, rounded on 15 minutes,
138
- # Rounding helps to prevent fractional t_aheads
139
- now = (
140
- pd.Series(datetime.utcnow().replace(tzinfo=forecast_copy.index.tzinfo))
141
- .min()
142
- .round(f"{minimal_resolution}T")
143
- .to_pydatetime()
144
- )
145
- # Determine t_aheads by subtracting with now
146
- forecast_copy["tAhead"] = (
147
- forecast_copy.index - now
148
- ).total_seconds() / 3600.0
149
-
150
- # add helper column hour
151
- forecast_copy["hour"] = forecast_copy.index.hour
152
-
153
- # Define functions which can be used to approximate the error for in-between
154
- # time horizons
155
- # Let's fit and exponential decay of accuracy
156
- def calc_exp_dec(t, stdev_row, near, far):
157
- # We use the formula sigma(t) = (1 - A * exp(-t/tau)) + b
158
- # Strictly speaking, tau is specific for each time series.
159
- # However, for simplicity, we use tau = Far/4.
160
- # This represents a situation where the stdev at 25% of the Far horizon,
161
- # has increased by two.
162
- tau = far / 4.0
163
- # Filling in the known sigma(Near) and sigma(Far) gives:
164
- sf, sn = stdev_row[far], stdev_row[near]
165
- A = (sf - sn) / ((1 - np.exp(-far / tau)) - (1 - np.exp(-near / tau)))
166
- b = sn - A * (1 - np.exp(-near / tau))
167
- value = A * (1 - np.exp(-t / tau)) + b
168
- # cap the value to keep it between near and far
169
- if value < sn:
170
- return sn
171
- return sf if value > sf else value
172
-
173
- # If only one horizon is available use that one
174
- if len(stdev.columns) == 1:
175
- forecast_copy["stdev"] = forecast_copy.apply(
176
- lambda x: stdev.loc[x.hour], axis=1
177
- )
178
- # If more are available do something fancy with interpolation
179
- else:
180
- # Add stdev to forecast_copy dataframe
181
- forecast_copy["stdev"] = forecast_copy.apply(
182
- lambda x: calc_exp_dec(x.tAhead, stdev.loc[x.hour], near, far), axis=1
183
- )
184
- return forecast_copy.drop(columns=["hour"])
185
-
186
- @staticmethod
187
- def _add_quantiles_to_forecast_default(
188
- forecast: pd.DataFrame, quantiles: list[float]
189
- ) -> pd.DataFrame:
190
- """Add quantiles to forecast.
191
-
192
- Use the standard deviation to calculate the quantiles.
193
-
194
- Args:
195
- forecast: Forecast (should contain a 'forecast' + 'stdev' column)
196
- quantiles: List with desired quantiles,
197
- for example: [0.01, 0.1, 0.9, 0.99]
198
-
199
- Returns:
200
- Forecast DataFrame with quantile (e.g. 'quantile_PXX')
201
- columns added.
202
-
203
- """
204
- # Check if stdev and forecast are in the dataframe
205
- if not all(elem in forecast.columns for elem in ["forecast", "stdev"]):
206
- raise ValueError("Forecast should contain a 'forecast' and 'stdev' column")
207
-
208
- for quantile in quantiles:
209
- quantile_key = f"quantile_P{quantile * 100:02.0f}"
210
- forecast[quantile_key] = (
211
- forecast["forecast"] + stats.norm.ppf(quantile) * forecast["stdev"]
212
- )
213
-
214
- return forecast
215
-
216
- def _add_quantiles_to_forecast_quantile_regression(
217
- self, forecast: pd.DataFrame, quantiles: list[float]
218
- ) -> pd.DataFrame:
219
- """Add quantiles to forecast.
220
-
221
- Use trained quantile regression model to calculate the quantiles.
222
-
223
- Args:
224
- forecast: Forecast
225
- quantiles: List with desired quantiles
226
-
227
- Returns:
228
- Forecast DataFrame with quantile (e.g. 'quantile_PXX')
229
- columns added.
230
-
231
- """
232
- # Only determine quantiles for datetimes in forecast
233
- quantile_df = pd.DataFrame(index=self.forecast_input_data.index)
234
- for quantile in quantiles:
235
- quantile_key = f"quantile_P{quantile * 100:02.0f}"
236
- quantile_df[quantile_key] = self.model.predict(
237
- self.forecast_input_data, quantile=quantile
238
- )
239
-
240
- return forecast.merge(
241
- quantile_df, left_index=True, right_index=True, how="left"
242
- )
@@ -1,77 +0,0 @@
1
- # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
- #
3
- # SPDX-License-Identifier: MPL-2.0
4
- from datetime import datetime
5
-
6
- import pandas as pd
7
-
8
-
9
- def generate_fallback(
10
- forecast_input: pd.DataFrame,
11
- load: pd.DataFrame,
12
- fallback_strategy: str = "extreme_day",
13
- ) -> pd.DataFrame:
14
- """Make a fall back forecast, Set the value of the forecast 'quality' column to 'substituted'.
15
-
16
- Currently only fallback_strategy=extreme day is implemented which return historic profile of most extreme day.
17
-
18
- Args:
19
- forecast_input : dataframe desired for the forecast
20
- load: index=datetime, columns=['load']
21
- fallback_strategy: strategy to determine fallback. options:
22
- - extreme_day: use daily profile of most extreme day
23
- Returns:
24
- Fallback forecast DataFrame with columns; 'forecast', 'quality'
25
-
26
- Raises:
27
- ValueError if len(load) == 0
28
- NotImplementedError if fallback_strategy != 'extreme_day'
29
-
30
- """
31
- # Check if load is completely empty
32
- if len(load.dropna()) == 0:
33
- raise ValueError("No historic load data available")
34
-
35
- if fallback_strategy != "extreme_day":
36
- raise NotImplementedError(
37
- f'fallback_strategy should be "extreme_day", received:{fallback_strategy}'
38
- )
39
-
40
- if fallback_strategy == "extreme_day":
41
- # Execute this fallback strategy
42
- # Find most extreme historic day and merge it by time-of-day to the requested moments
43
-
44
- # Find most extreme historic day (do not count today as it is incomplete)
45
- day_with_highest_load_date = (
46
- load[load.index.tz_localize(None).date != datetime.utcnow().date()]
47
- .idxmax()
48
- .load.date()
49
- )
50
- # generate datetime range of the day with the highest load
51
- from_datetime = pd.Timestamp(day_with_highest_load_date, tz=load.index.tz)
52
- till_datetime = from_datetime + pd.Timedelta("1 days")
53
-
54
- # slice load dataframe, only rows for the day with the highest load
55
- highest_daily_loadprofile = load.loc[
56
- (load.index >= from_datetime) & (load.index < till_datetime)
57
- ]
58
-
59
- # Match moments by time-of-day
60
- highest_daily_loadprofile.loc[:, "time"] = highest_daily_loadprofile.index.time
61
- forecast = pd.DataFrame(index=forecast_input.index)
62
- forecast["time"] = forecast.index.time
63
- forecast = (
64
- forecast.reset_index()
65
- .merge(
66
- highest_daily_loadprofile, left_on="time", right_on="time", how="outer"
67
- )
68
- .set_index("index")
69
- )
70
-
71
- # Rename so column is called forecast
72
- forecast = forecast[["load"]].rename(columns=dict(load="forecast"))
73
-
74
- # Add a column quality.
75
- forecast["quality"] = "substituted"
76
-
77
- return forecast
@@ -1,3 +0,0 @@
1
- # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
- #
3
- # SPDX-License-Identifier: MPL-2.0
@@ -1,90 +0,0 @@
1
- # SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
2
- #
3
- # SPDX-License-Identifier: MPL-2.0
4
- from sklearn.base import BaseEstimator, TransformerMixin
5
- import pandas as pd
6
- from typing import List, Dict, Tuple, Optional
7
-
8
-
9
- class FeatureClipper(BaseEstimator, TransformerMixin):
10
- """
11
- A transformer that clips the values of specified columns to the minimum and
12
- maximum values observed during training. This prevents the model from
13
- extrapolating beyond these values during prediction.
14
- """
15
-
16
- def __init__(self, columns: List[str]):
17
- """
18
- Initialize the FeatureClipper.
19
-
20
- Parameters:
21
- ----------
22
- columns : List[str]
23
- List of column names to be clipped.
24
- """
25
- self.columns: List[str] = columns
26
- self.feature_ranges: Dict[str, Tuple[float, float]] = {}
27
-
28
- def fit(self, X: pd.DataFrame, y: Optional[pd.Series] = None) -> "FeatureClipper":
29
- """
30
- Fits the transformer on the training data by calculating the min and max
31
- values for the specified columns.
32
-
33
- Parameters:
34
- ----------
35
- X : pd.DataFrame
36
- The input DataFrame containing training data.
37
-
38
- y : Optional[pd.Series]
39
- Ignored. This parameter exists for compatibility with scikit-learn's pipeline.
40
-
41
- Returns:
42
- -------
43
- self : FeatureClipper
44
- Fitted transformer.
45
-
46
- Raises:
47
- ------
48
- ValueError:
49
- If the input is not a pandas DataFrame.
50
- """
51
- if not isinstance(X, pd.DataFrame):
52
- raise ValueError("Input must be a pandas DataFrame")
53
-
54
- for col in self.columns:
55
- if col in X.columns:
56
- self.feature_ranges[col] = (X[col].min(), X[col].max())
57
-
58
- return self
59
-
60
- def transform(self, X: pd.DataFrame) -> pd.DataFrame:
61
- """
62
- Transforms new data by clipping the specified columns' values to be within
63
- the min and max range observed during fitting.
64
-
65
- Parameters:
66
- ----------
67
- X : pd.DataFrame
68
- The input DataFrame containing new data to be transformed.
69
-
70
- Returns:
71
- -------
72
- X_ : pd.DataFrame
73
- A copy of the input DataFrame with clipped values in the specified columns.
74
-
75
- Raises:
76
- ------
77
- ValueError:
78
- If the input is not a pandas DataFrame.
79
- """
80
- if not isinstance(X, pd.DataFrame):
81
- raise ValueError("Input must be a pandas DataFrame")
82
-
83
- X_copy = X.copy()
84
-
85
- for col in self.columns:
86
- if col in X_copy.columns and col in self.feature_ranges:
87
- min_val, max_val = self.feature_ranges[col]
88
- X_copy[col] = X_copy[col].clip(lower=min_val, upper=max_val)
89
-
90
- return X_copy