openstef 3.4.78__py3-none-any.whl → 3.4.79__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openstef/feature_engineering/holiday_features.py +34 -26
- openstef/model/regressors/median.py +5 -3
- {openstef-3.4.78.dist-info → openstef-3.4.79.dist-info}/METADATA +1 -1
- {openstef-3.4.78.dist-info → openstef-3.4.79.dist-info}/RECORD +7 -7
- {openstef-3.4.78.dist-info → openstef-3.4.79.dist-info}/WHEEL +0 -0
- {openstef-3.4.78.dist-info → openstef-3.4.79.dist-info}/licenses/LICENSE +0 -0
- {openstef-3.4.78.dist-info → openstef-3.4.79.dist-info}/top_level.txt +0 -0
@@ -1,8 +1,8 @@
|
|
1
1
|
# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
|
2
2
|
#
|
3
3
|
# SPDX-License-Identifier: MPL-2.0
|
4
|
-
"""This module contains all holiday related features."""
|
5
4
|
from datetime import datetime, timedelta
|
5
|
+
import collections
|
6
6
|
|
7
7
|
import holidays
|
8
8
|
import numpy as np
|
@@ -26,7 +26,6 @@ def generate_holiday_feature_functions(
|
|
26
26
|
2022-12-24 - 2023-01-08 is the 'Kerstvakantie'
|
27
27
|
2022-10-15 - 2022-10-23 is the 'HerfstvakantieNoord'
|
28
28
|
|
29
|
-
|
30
29
|
The holidays are based on a manually generated csv file.
|
31
30
|
The information is collected using:
|
32
31
|
https://www.schoolvakanties-nederland.nl/ and the python holiday function
|
@@ -44,7 +43,6 @@ def generate_holiday_feature_functions(
|
|
44
43
|
- Pinksteren
|
45
44
|
- Kerst
|
46
45
|
|
47
|
-
|
48
46
|
The 'Brugdagen' are updated untill dec 2020. (Generated using agenda)
|
49
47
|
|
50
48
|
Args:
|
@@ -83,23 +81,34 @@ def generate_holiday_feature_functions(
|
|
83
81
|
)
|
84
82
|
}
|
85
83
|
)
|
84
|
+
|
86
85
|
# Define empty list to keep track of bridgedays
|
87
86
|
bridge_days = []
|
88
|
-
|
87
|
+
|
88
|
+
# Group holiday dates by name
|
89
|
+
holiday_dates_by_name = collections.defaultdict(list)
|
89
90
|
for date, holiday_name in sorted(country_holidays.items()):
|
90
|
-
|
91
|
-
def make_holiday_func(requested_date):
|
92
|
-
return lambda x: np.isin(x.index.date, np.array([requested_date]))
|
91
|
+
holiday_dates_by_name[holiday_name].append(date)
|
93
92
|
|
94
|
-
|
93
|
+
# Create one function per holiday name that checks all dates for that holiday
|
94
|
+
for holiday_name, dates in holiday_dates_by_name.items():
|
95
|
+
# Use a default argument to capture the dates at definition time
|
95
96
|
holiday_functions.update(
|
96
|
-
{
|
97
|
+
{
|
98
|
+
"is_"
|
99
|
+
+ holiday_name.replace(
|
100
|
+
" ", "_"
|
101
|
+
).lower(): lambda x, dates_local=dates: np.isin(
|
102
|
+
x.index.date, np.array(dates_local)
|
103
|
+
)
|
104
|
+
}
|
97
105
|
)
|
98
106
|
|
99
|
-
# Check for bridge
|
100
|
-
|
101
|
-
|
102
|
-
|
107
|
+
# Check for bridge days for each date of this holiday
|
108
|
+
for date in dates:
|
109
|
+
holiday_functions, bridge_days = check_for_bridge_day(
|
110
|
+
date, holiday_name, country_code, years, holiday_functions, bridge_days
|
111
|
+
)
|
103
112
|
|
104
113
|
# Add feature function that includes all bridgedays
|
105
114
|
holiday_functions.update(
|
@@ -108,7 +117,7 @@ def generate_holiday_feature_functions(
|
|
108
117
|
|
109
118
|
# Add school holidays if country is NL
|
110
119
|
if country_code == "NL":
|
111
|
-
#
|
120
|
+
# Manually generated csv including all dutch schoolholidays for different regions
|
112
121
|
df_holidays = pd.read_csv(path_to_school_holidays_csv, index_col=None)
|
113
122
|
df_holidays["datum"] = pd.to_datetime(df_holidays.datum).apply(
|
114
123
|
lambda x: x.date()
|
@@ -125,19 +134,17 @@ def generate_holiday_feature_functions(
|
|
125
134
|
|
126
135
|
# Loop over list of holidays names
|
127
136
|
for holiday_name in list(set(df_holidays.name)):
|
128
|
-
#
|
129
|
-
def make_holiday_func(holidayname=holiday_name):
|
130
|
-
return lambda x: np.isin(
|
131
|
-
x.index.date,
|
132
|
-
df_holidays.datum[df_holidays.name == holidayname].values,
|
133
|
-
)
|
134
|
-
|
135
|
-
# Create lag function for each holiday
|
137
|
+
# Use the holidayname as a default argument to capture it at definition time
|
136
138
|
holiday_functions.update(
|
137
139
|
{
|
138
140
|
"is_"
|
139
|
-
+ holiday_name.replace(
|
140
|
-
|
141
|
+
+ holiday_name.replace(
|
142
|
+
" ", "_"
|
143
|
+
).lower(): lambda x, holiday_name_local=holiday_name: np.isin(
|
144
|
+
x.index.date,
|
145
|
+
df_holidays.datum[
|
146
|
+
df_holidays.name == holiday_name_local
|
147
|
+
].values,
|
141
148
|
)
|
142
149
|
}
|
143
150
|
)
|
@@ -178,9 +185,10 @@ def check_for_bridge_day(
|
|
178
185
|
if date in country_holidays:
|
179
186
|
return holiday_functions, bridge_days
|
180
187
|
|
181
|
-
# Define function
|
188
|
+
# Define function explicitly to mitigate 'late binding' problem
|
189
|
+
# Use a default argument to capture the date at definition time
|
182
190
|
def make_holiday_func(requested_date):
|
183
|
-
return lambda x: np.isin(x.index.date, np.array([
|
191
|
+
return lambda x, dt=requested_date: np.isin(x.index.date, np.array([dt]))
|
184
192
|
|
185
193
|
# Looking forward: If day after tomorow is a national holiday or
|
186
194
|
# a saturday check if tomorow is not a national holiday
|
@@ -304,9 +304,11 @@ class MedianRegressor(OpenstfRegressor, RegressorMixin):
|
|
304
304
|
|
305
305
|
Which lag features are used is determined by the feature engineering step.
|
306
306
|
"""
|
307
|
-
|
308
|
-
|
309
|
-
|
307
|
+
(
|
308
|
+
feature_names,
|
309
|
+
frequency,
|
310
|
+
feature_to_lags_in_min,
|
311
|
+
) = self._extract_and_validate_lags(x)
|
310
312
|
|
311
313
|
self.feature_names_ = list(feature_names)
|
312
314
|
self.frequency_ = frequency
|
@@ -27,7 +27,7 @@ openstef/feature_engineering/data_preparation.py,sha256=TXAPTtSmBRC_LZP7o5Jlmj7J
|
|
27
27
|
openstef/feature_engineering/feature_adder.py,sha256=aSqDl_gUrB3H2TD3cNvU5JniY_KOb4u4a2A6J7zB2BQ,6835
|
28
28
|
openstef/feature_engineering/feature_applicator.py,sha256=bU1Pu5V1fxMCQCwh6HG66nmctBjrNa7gHUYqOqPmLTU,7501
|
29
29
|
openstef/feature_engineering/general.py,sha256=PdvnDqkze31FggUuWHQ1ysroh_uDOa1hZ7NftMYH2_U,4130
|
30
|
-
openstef/feature_engineering/holiday_features.py,sha256=
|
30
|
+
openstef/feature_engineering/holiday_features.py,sha256=g3VBj9oU3wmp82iKcknX41S_7Z4tGIjlvgbZOcFqQaw,8572
|
31
31
|
openstef/feature_engineering/lag_features.py,sha256=Dr6qS8UhdgEHPZZSe-w6ibtjl_lcbcQohhqdZN9fqEU,5652
|
32
32
|
openstef/feature_engineering/missing_values_transformer.py,sha256=U8pdA61k8CRosO3yR2IsCy5C4Ka3c8BWCimDLIB4LCQ,5010
|
33
33
|
openstef/feature_engineering/rolling_features.py,sha256=V-UulqWKuSksFQAASyVSQim1stEA4TmtHNULCrrdgjo,2160
|
@@ -64,7 +64,7 @@ openstef/model/regressors/gblinear_quantile.py,sha256=PKQL_TAXa3Kw9oZrKC6Uvo_n2N
|
|
64
64
|
openstef/model/regressors/lgbm.py,sha256=zCdn1euEdSFxYJzH8XqQFFnb6R4JVUnmineKjX_Gy-g,800
|
65
65
|
openstef/model/regressors/linear.py,sha256=uOvZMLGZH_9nXfmS5honCMfyVeyGXP1Cza9A_BdXlVw,3665
|
66
66
|
openstef/model/regressors/linear_quantile.py,sha256=zIpGo9deMeTZdwFWoZ3FstX74mYdlAhfg-YOsPRFl0k,10534
|
67
|
-
openstef/model/regressors/median.py,sha256=
|
67
|
+
openstef/model/regressors/median.py,sha256=f_yZWuJXAUbGbHAIMqpIAFSaUi0GnEe55DgFWGo7S5U,14157
|
68
68
|
openstef/model/regressors/regressor.py,sha256=0um575rTEkzYb1E5IAOuTlsZDhmb7eI5byu5e062NRs,3469
|
69
69
|
openstef/model/regressors/xgb.py,sha256=uhV9Wm90aOkjByTm-O2xpt2kpANRxAqQvv5mA0H1uBc,1294
|
70
70
|
openstef/model/regressors/xgb_multioutput_quantile.py,sha256=xWzA7tymC_o-F1OS3I7vUKf9zP6RR1ZglEeY4NAgjU0,9146
|
@@ -104,8 +104,8 @@ openstef/tasks/utils/predictionjobloop.py,sha256=Ysy3zF5lzPMz_asYDKeF5m0qgVT3tCt
|
|
104
104
|
openstef/tasks/utils/taskcontext.py,sha256=O-LZ_wHEl5vbT8oB7EYtOeMkvk6EqCnI1-KiyER7Eu4,5407
|
105
105
|
openstef/validation/__init__.py,sha256=bIyGTSA4V5VoOLTwdaiJJAnozmpSzvQooVYlsf8H4eU,163
|
106
106
|
openstef/validation/validation.py,sha256=r6UqkdH5TMjsGfn8Ta07K1jkqmrVmwcPGfyQvMmZyO4,11459
|
107
|
-
openstef-3.4.
|
108
|
-
openstef-3.4.
|
109
|
-
openstef-3.4.
|
110
|
-
openstef-3.4.
|
111
|
-
openstef-3.4.
|
107
|
+
openstef-3.4.79.dist-info/licenses/LICENSE,sha256=7Pm2fWFFHHUG5lDHed1vl5CjzxObIXQglnYsEdtjo_k,14907
|
108
|
+
openstef-3.4.79.dist-info/METADATA,sha256=zfFVPR_RhCyKZ50LSCxuA46CI8L8d2tIJH02ryc9bUk,8834
|
109
|
+
openstef-3.4.79.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
110
|
+
openstef-3.4.79.dist-info/top_level.txt,sha256=kD0H4PqrQoncZ957FvqwfBxa89kTrun4Z_RAPs_HhLs,9
|
111
|
+
openstef-3.4.79.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|