ebm 0.99.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ebm/__init__.py +0 -0
- ebm/__main__.py +152 -0
- ebm/__version__.py +1 -0
- ebm/cmd/__init__.py +0 -0
- ebm/cmd/calibrate.py +83 -0
- ebm/cmd/calibrate_excel_com_io.py +128 -0
- ebm/cmd/heating_systems_by_year.py +18 -0
- ebm/cmd/helpers.py +134 -0
- ebm/cmd/initialize.py +167 -0
- ebm/cmd/migrate.py +92 -0
- ebm/cmd/pipeline.py +227 -0
- ebm/cmd/prepare_main.py +174 -0
- ebm/cmd/result_handler.py +272 -0
- ebm/cmd/run_calculation.py +221 -0
- ebm/data/area.csv +92 -0
- ebm/data/area_new_residential_buildings.csv +3 -0
- ebm/data/area_per_person.csv +12 -0
- ebm/data/building_code_parameters.csv +9 -0
- ebm/data/energy_need_behaviour_factor.csv +6 -0
- ebm/data/energy_need_improvements.csv +7 -0
- ebm/data/energy_need_original_condition.csv +534 -0
- ebm/data/heating_system_efficiencies.csv +13 -0
- ebm/data/heating_system_forecast.csv +9 -0
- ebm/data/heating_system_initial_shares.csv +1113 -0
- ebm/data/holiday_home_energy_consumption.csv +24 -0
- ebm/data/holiday_home_stock.csv +25 -0
- ebm/data/improvement_building_upgrade.csv +9 -0
- ebm/data/new_buildings_residential.csv +32 -0
- ebm/data/population_forecast.csv +51 -0
- ebm/data/s_curve.csv +40 -0
- ebm/energy_consumption.py +307 -0
- ebm/extractors.py +115 -0
- ebm/heating_system_forecast.py +472 -0
- ebm/holiday_home_energy.py +341 -0
- ebm/migrations.py +224 -0
- ebm/model/__init__.py +0 -0
- ebm/model/area.py +403 -0
- ebm/model/bema.py +149 -0
- ebm/model/building_category.py +150 -0
- ebm/model/building_condition.py +78 -0
- ebm/model/calibrate_energy_requirements.py +84 -0
- ebm/model/calibrate_heating_systems.py +180 -0
- ebm/model/column_operations.py +157 -0
- ebm/model/construction.py +827 -0
- ebm/model/data_classes.py +223 -0
- ebm/model/database_manager.py +410 -0
- ebm/model/dataframemodels.py +115 -0
- ebm/model/defaults.py +30 -0
- ebm/model/energy_need.py +6 -0
- ebm/model/energy_need_filter.py +182 -0
- ebm/model/energy_purpose.py +115 -0
- ebm/model/energy_requirement.py +353 -0
- ebm/model/energy_use.py +202 -0
- ebm/model/enums.py +8 -0
- ebm/model/exceptions.py +4 -0
- ebm/model/file_handler.py +388 -0
- ebm/model/filter_scurve_params.py +83 -0
- ebm/model/filter_tek.py +152 -0
- ebm/model/heat_pump.py +53 -0
- ebm/model/heating_systems.py +20 -0
- ebm/model/heating_systems_parameter.py +17 -0
- ebm/model/heating_systems_projection.py +3 -0
- ebm/model/heating_systems_share.py +28 -0
- ebm/model/scurve.py +224 -0
- ebm/model/tek.py +1 -0
- ebm/s_curve.py +515 -0
- ebm/services/__init__.py +0 -0
- ebm/services/calibration_writer.py +262 -0
- ebm/services/console.py +106 -0
- ebm/services/excel_loader.py +66 -0
- ebm/services/files.py +38 -0
- ebm/services/spreadsheet.py +289 -0
- ebm/temp_calc.py +99 -0
- ebm/validators.py +565 -0
- ebm-0.99.5.dist-info/METADATA +212 -0
- ebm-0.99.5.dist-info/RECORD +80 -0
- ebm-0.99.5.dist-info/WHEEL +5 -0
- ebm-0.99.5.dist-info/entry_points.txt +3 -0
- ebm-0.99.5.dist-info/licenses/LICENSE +21 -0
- ebm-0.99.5.dist-info/top_level.txt +1 -0
@@ -0,0 +1,341 @@
|
|
1
|
+
import logging
|
2
|
+
import typing
|
3
|
+
|
4
|
+
import numpy as np
|
5
|
+
import pandas as pd
|
6
|
+
|
7
|
+
from ebm.model.database_manager import DatabaseManager
|
8
|
+
from ebm.model.data_classes import YearRange
|
9
|
+
|
10
|
+
|
11
|
+
class HolidayHomeEnergy:
|
12
|
+
def __init__(self,
|
13
|
+
population: pd.Series,
|
14
|
+
holiday_homes_by_category: pd.DataFrame,
|
15
|
+
electricity_usage_stats: pd.Series,
|
16
|
+
fuelwood_usage_stats: pd.Series,
|
17
|
+
fossil_fuel_usage_stats: pd.Series):
|
18
|
+
self.population = population
|
19
|
+
self.fossil_fuel_usage_stats = fossil_fuel_usage_stats
|
20
|
+
self.fuelwood_usage_stats = fuelwood_usage_stats
|
21
|
+
self.electricity_usage_stats = electricity_usage_stats
|
22
|
+
self.holiday_homes_by_category = holiday_homes_by_category
|
23
|
+
|
24
|
+
def calculate_energy_usage(self) -> typing.Iterable[pd.Series]:
|
25
|
+
"""
|
26
|
+
Calculate projected energy usage for holiday homes.
|
27
|
+
|
28
|
+
This method projects future energy usage for electricity, fuelwood, and fossil fuels
|
29
|
+
based on historical data and combines these projections with existing statistics.
|
30
|
+
|
31
|
+
Yields
|
32
|
+
------
|
33
|
+
Iterable[pd.Series]
|
34
|
+
A series of projected energy usage values for electricity, fuelwood, and fossil fuels,
|
35
|
+
with NaN values filled from the existing statistics.
|
36
|
+
"""
|
37
|
+
electricity_projection = project_electricity_usage(self.electricity_usage_stats,
|
38
|
+
self.holiday_homes_by_category,
|
39
|
+
self.population)
|
40
|
+
yield electricity_projection.combine_first(self.electricity_usage_stats)
|
41
|
+
|
42
|
+
fuelwood_projection = project_fuelwood_usage(self.fuelwood_usage_stats,
|
43
|
+
self.holiday_homes_by_category,
|
44
|
+
self.population)
|
45
|
+
yield fuelwood_projection.combine_first(self.fuelwood_usage_stats)
|
46
|
+
|
47
|
+
fossil_fuel_projection = project_fossil_fuel_usage(self.fossil_fuel_usage_stats,
|
48
|
+
self.holiday_homes_by_category,
|
49
|
+
self.population)
|
50
|
+
yield fossil_fuel_projection
|
51
|
+
|
52
|
+
@staticmethod
|
53
|
+
def new_instance(database_manager: DatabaseManager = None) -> 'HolidayHomeEnergy':
|
54
|
+
dm = database_manager or DatabaseManager()
|
55
|
+
holiday_homes = dm.get_holiday_home_by_year()
|
56
|
+
|
57
|
+
# 02 Elektrisitet i fritidsboliger statistikk (GWh) (input)
|
58
|
+
electricity_usage_stats = dm.get_holiday_home_electricity_consumption()
|
59
|
+
|
60
|
+
# 04 Ved i fritidsboliger statistikk (GWh)
|
61
|
+
fuelwood_usage_stats = dm.get_holiday_home_fuelwood_consumption()
|
62
|
+
|
63
|
+
# 06 Fossilt brensel i fritidsboliger statistikk (GWh)
|
64
|
+
fossil_fuel_usage_stats = dm.get_holiday_home_fossilfuel_consumption()
|
65
|
+
|
66
|
+
# logging.warning('Loading fossil_fuel_usage_stats from hard coded data')
|
67
|
+
# fossil_fuel_usage_stats = pd.Series(data=[100], index=YearRange(2006, 2006).to_index(), name='kwh')
|
68
|
+
|
69
|
+
population = dm.file_handler.get_file(dm.file_handler.POPULATION_FORECAST).set_index('year').population
|
70
|
+
|
71
|
+
return HolidayHomeEnergy(population,
|
72
|
+
holiday_homes,
|
73
|
+
electricity_usage_stats,
|
74
|
+
fuelwood_usage_stats,
|
75
|
+
fossil_fuel_usage_stats)
|
76
|
+
|
77
|
+
|
78
|
+
def project_electricity_usage(electricity_usage_stats: pd.Series,
|
79
|
+
holiday_homes_by_category: pd.DataFrame,
|
80
|
+
population: pd.Series) -> pd.Series:
|
81
|
+
"""
|
82
|
+
Calculate the projected electricity usage for holiday homes.
|
83
|
+
|
84
|
+
This function projects the future electricity usage for holiday homes based on historical
|
85
|
+
electricity usage statistics, the number of holiday homes by category, and population data.
|
86
|
+
|
87
|
+
Population is used to work out what years are needed in the projection.
|
88
|
+
|
89
|
+
Parameters
|
90
|
+
----------
|
91
|
+
electricity_usage_stats : pd.Series
|
92
|
+
A pandas Series containing historical electricity usage statistics.
|
93
|
+
holiday_homes_by_category : pd.DataFrame
|
94
|
+
A pandas DataFrame containing the number of holiday homes by year. Each column is considered as a category.
|
95
|
+
population : pd.Series
|
96
|
+
A pandas Series containing population data.
|
97
|
+
|
98
|
+
Returns
|
99
|
+
-------
|
100
|
+
pd.Series
|
101
|
+
A pandas Series with the projected electricity usage in gigawatt-hours (GWh) for future years.
|
102
|
+
|
103
|
+
Raises
|
104
|
+
------
|
105
|
+
ValueError
|
106
|
+
If the input Series do not meet the expected criteria.
|
107
|
+
"""
|
108
|
+
total_holiday_homes_by_year = sum_holiday_homes(holiday_homes_by_category.iloc[:, 0],
|
109
|
+
holiday_homes_by_category.iloc[:, 1])
|
110
|
+
|
111
|
+
people_per_holiday_home = population_over_holiday_homes(population, total_holiday_homes_by_year)
|
112
|
+
projected_holiday_homes_by_year = projected_holiday_homes(population, people_per_holiday_home)
|
113
|
+
|
114
|
+
usage_by_homes = energy_usage_by_holiday_homes(electricity_usage_stats, total_holiday_homes_by_year)
|
115
|
+
nan_padded_usage_by_homes = usage_by_homes.reindex(population.index, fill_value=np.nan)
|
116
|
+
projected_electricity_usage = projected_electricity_usage_holiday_homes(nan_padded_usage_by_homes)
|
117
|
+
|
118
|
+
projected_electricity_usage_kwh = projected_holiday_homes_by_year * projected_electricity_usage
|
119
|
+
projected_electricity_usage_gwh = projected_electricity_usage_kwh / 1_000_000
|
120
|
+
projected_electricity_usage_gwh.name = 'gwh'
|
121
|
+
|
122
|
+
return projected_electricity_usage_gwh
|
123
|
+
|
124
|
+
|
125
|
+
def project_fuelwood_usage(fuelwood_usage_stats: pd.Series,
|
126
|
+
holiday_homes_by_category: pd.DataFrame,
|
127
|
+
population: pd.Series) -> pd.Series:
|
128
|
+
total_holiday_homes_by_year = sum_holiday_homes(holiday_homes_by_category.iloc[:, 0],
|
129
|
+
holiday_homes_by_category.iloc[:, 1])
|
130
|
+
|
131
|
+
people_per_holiday_home = population_over_holiday_homes(population, total_holiday_homes_by_year)
|
132
|
+
projected_holiday_homes_by_year = projected_holiday_homes(population, people_per_holiday_home)
|
133
|
+
|
134
|
+
usage_by_homes = energy_usage_by_holiday_homes(fuelwood_usage_stats, total_holiday_homes_by_year)
|
135
|
+
nan_padded_usage_by_homes = usage_by_homes.reindex(population.index, fill_value=np.nan)
|
136
|
+
projected_fuelwood_usage = projected_fuelwood_usage_holiday_homes(nan_padded_usage_by_homes)
|
137
|
+
|
138
|
+
projected_fuelwood_usage_kwh = projected_holiday_homes_by_year * projected_fuelwood_usage
|
139
|
+
projected_fuelwood_usage_gwh = projected_fuelwood_usage_kwh / 1_000_000
|
140
|
+
projected_fuelwood_usage_gwh.name = 'gwh'
|
141
|
+
|
142
|
+
return projected_fuelwood_usage_gwh
|
143
|
+
|
144
|
+
|
145
|
+
def project_fossil_fuel_usage(fossil_fuel_usage_stats: pd.Series,
|
146
|
+
holiday_homes_by_category: pd.DataFrame,
|
147
|
+
population: pd.Series) -> pd.Series:
|
148
|
+
projected_fossil_fuel_usage_gwh = fossil_fuel_usage_stats.reindex(population.index, fill_value=np.nan)
|
149
|
+
|
150
|
+
not_na = projected_fossil_fuel_usage_gwh.loc[~projected_fossil_fuel_usage_gwh.isna()].index
|
151
|
+
projection_filter = projected_fossil_fuel_usage_gwh.index > max(not_na)
|
152
|
+
projected_fossil_fuel_usage_gwh.loc[projection_filter] = projected_fossil_fuel_usage_gwh.loc[not_na].mean()
|
153
|
+
projected_fossil_fuel_usage_gwh.name = 'gwh'
|
154
|
+
return projected_fossil_fuel_usage_gwh
|
155
|
+
|
156
|
+
|
157
|
+
def sum_holiday_homes(*holiday_homes: pd.Series) -> pd.Series:
|
158
|
+
return pd.DataFrame(holiday_homes).sum(axis=0)
|
159
|
+
|
160
|
+
|
161
|
+
def population_over_holiday_homes(population: pd.Series,
|
162
|
+
holiday_homes: pd.Series) -> pd.Series:
|
163
|
+
"""
|
164
|
+
Average number of holiday homes by population.
|
165
|
+
|
166
|
+
Parameters
|
167
|
+
----------
|
168
|
+
population : pd.Series
|
169
|
+
holiday_homes : pd.Series
|
170
|
+
|
171
|
+
Returns
|
172
|
+
-------
|
173
|
+
pd.Series
|
174
|
+
|
175
|
+
"""
|
176
|
+
return population / holiday_homes
|
177
|
+
|
178
|
+
|
179
|
+
def projected_holiday_homes(population: pd.Series,
|
180
|
+
holiday_homes: pd.Series) -> pd.Series:
|
181
|
+
"""
|
182
|
+
Projects future number of holiday homes based on the population and historical average number of holiday homes
|
183
|
+
|
184
|
+
Parameters
|
185
|
+
----------
|
186
|
+
population : pd.Series
|
187
|
+
population in every year of the projection
|
188
|
+
holiday_homes : pd.Series
|
189
|
+
historical number of holiday homes
|
190
|
+
Returns
|
191
|
+
-------
|
192
|
+
pd.Series
|
193
|
+
population over average number of holiday homes
|
194
|
+
"""
|
195
|
+
return population / holiday_homes.mean()
|
196
|
+
|
197
|
+
|
198
|
+
def energy_usage_by_holiday_homes(
|
199
|
+
energy_usage: pd.Series,
|
200
|
+
holiday_homes: pd.Series
|
201
|
+
) -> pd.Series:
|
202
|
+
"""
|
203
|
+
|
204
|
+
(08) 14 Elektrisitet pr fritidsbolig staitsikk (kWh) in Energibruk fritidsboliger.xlsx
|
205
|
+
(10) 16 Ved pr fritidsbolig statistikk (kWh) 2019 - 2023
|
206
|
+
|
207
|
+
Parameters
|
208
|
+
----------
|
209
|
+
energy_usage : pd.Series
|
210
|
+
Electricity usage by year from SSB https://www.ssb.no/statbank/sq/10103348 2001 - 2023
|
211
|
+
holiday_homes : pd.Series
|
212
|
+
Total number of holiday homes of any category from SSB https://www.ssb.no/statbank/sq/10103336
|
213
|
+
Returns
|
214
|
+
-------
|
215
|
+
|
216
|
+
"""
|
217
|
+
s = energy_usage * 1_000_000 / holiday_homes
|
218
|
+
s.name = 'kwh'
|
219
|
+
return s
|
220
|
+
|
221
|
+
|
222
|
+
def projected_fuelwood_usage_holiday_homes(historical_fuelwood_usage: pd.Series) -> pd.Series:
|
223
|
+
"""
|
224
|
+
Projects future fuelwood usage for holiday homes based on historical data. The projection
|
225
|
+
is calculated as the mean of the last 5 years of historical_fuelwood_usage.
|
226
|
+
|
227
|
+
Parameters
|
228
|
+
----------
|
229
|
+
historical_fuelwood_usage : pd.Series
|
230
|
+
|
231
|
+
Returns
|
232
|
+
-------
|
233
|
+
pd.Series
|
234
|
+
A pandas Series with with NaN values in fuelwood usage replaced by projected use. Years present
|
235
|
+
in historical_fuelwood_usage is returned as NaN
|
236
|
+
"""
|
237
|
+
projected_fuelwood_usage = pd.Series(data=[np.nan] * len(historical_fuelwood_usage),
|
238
|
+
index=historical_fuelwood_usage.index)
|
239
|
+
|
240
|
+
not_na = historical_fuelwood_usage.loc[~historical_fuelwood_usage.isna()].index
|
241
|
+
average = historical_fuelwood_usage.loc[not_na].iloc[-5:].mean()
|
242
|
+
projection_filter = projected_fuelwood_usage.index > max(not_na)
|
243
|
+
projected_fuelwood_usage.loc[projection_filter] = average
|
244
|
+
return projected_fuelwood_usage
|
245
|
+
|
246
|
+
|
247
|
+
def projected_electricity_usage_holiday_homes(electricity_usage: pd.Series):
|
248
|
+
"""
|
249
|
+
Project future electricity usage for holiday homes based on historical data.
|
250
|
+
|
251
|
+
This function projects future electricity usage by creating three ranges of projections
|
252
|
+
and padding the series with NaN values and the last projection value as needed.
|
253
|
+
|
254
|
+
15 (09) Elektrisitet pr fritidsbolig framskrevet (kWh) in Energibruk fritidsboliger.xlsx
|
255
|
+
|
256
|
+
Parameters
|
257
|
+
----------
|
258
|
+
electricity_usage : pd.Series
|
259
|
+
A pandas Series containing historical electricity usage data. The index should include the year 2019,
|
260
|
+
and the Series should contain at least 40 years of data with some NaN values for projection.
|
261
|
+
|
262
|
+
Returns
|
263
|
+
-------
|
264
|
+
pd.Series
|
265
|
+
A pandas Series with with NaN values in electricity usage replaced by projected energy use. Years with
|
266
|
+
values in energy_usage has a projected usage of NaN
|
267
|
+
|
268
|
+
Raises
|
269
|
+
------
|
270
|
+
ValueError
|
271
|
+
If the year 2019 is not in the index of the provided Series.
|
272
|
+
If there are no NaN values in the provided Series.
|
273
|
+
If the length of the Series is less than or equal to 40.
|
274
|
+
"""
|
275
|
+
if 2019 not in electricity_usage.index:
|
276
|
+
msg = 'The required year 2019 is not in the index of electricity_usage for the electricity projection'
|
277
|
+
raise ValueError(msg)
|
278
|
+
if not any(electricity_usage.isna()):
|
279
|
+
raise ValueError('Expected empty energy_usage for projection')
|
280
|
+
if len(electricity_usage.index) <= 40:
|
281
|
+
raise ValueError('At least 41 years of electricity_usage is required to predict future electricity use')
|
282
|
+
left_pad_len = len(electricity_usage) - electricity_usage.isna().sum()
|
283
|
+
|
284
|
+
initial_e_u = electricity_usage[2019]
|
285
|
+
first_range = [initial_e_u + (i * 75) for i in range(1, 6)]
|
286
|
+
|
287
|
+
second_range = [first_range[-1] + (i * 50) for i in range(1, 5)]
|
288
|
+
|
289
|
+
third_range = [second_range[-1] + (i * 25) for i in range(1, 9)]
|
290
|
+
|
291
|
+
right_pad_len = len(electricity_usage) - left_pad_len - len(first_range) - len(second_range) - len(third_range)
|
292
|
+
right_padding = [third_range[-1]] * right_pad_len
|
293
|
+
|
294
|
+
return pd.Series(([np.nan] * left_pad_len) +
|
295
|
+
first_range +
|
296
|
+
second_range +
|
297
|
+
third_range +
|
298
|
+
right_padding,
|
299
|
+
name='projected_electricity_usage_kwh',
|
300
|
+
index=electricity_usage.index)
|
301
|
+
|
302
|
+
|
303
|
+
if __name__ == '__main__':
|
304
|
+
holiday_home_energy = HolidayHomeEnergy.new_instance()
|
305
|
+
for energy_usage, h in zip(holiday_home_energy.calculate_energy_usage(), ['electricity', 'fuelwood', 'fossil fuel']):
|
306
|
+
print('====', h, '====')
|
307
|
+
print(energy_usage)
|
308
|
+
|
309
|
+
|
310
|
+
def calculate_energy_use(database_manager: DatabaseManager) -> pd.DataFrame:
|
311
|
+
"""
|
312
|
+
Calculates holiday home energy use by from HolidayHomeEnergy.calculate_energy_usage()
|
313
|
+
|
314
|
+
Parameters
|
315
|
+
----------
|
316
|
+
database_manager : DatabaseManager
|
317
|
+
|
318
|
+
Returns
|
319
|
+
-------
|
320
|
+
pd.DataFrame
|
321
|
+
"""
|
322
|
+
holiday_home_energy = HolidayHomeEnergy.new_instance(database_manager=database_manager)
|
323
|
+
el, wood, fossil = [e_u for e_u in holiday_home_energy.calculate_energy_usage()]
|
324
|
+
df = pd.DataFrame(data=[el, wood, fossil])
|
325
|
+
df.insert(0, 'building_category', 'holiday_home')
|
326
|
+
df.insert(1, 'energy_type', 'n/a')
|
327
|
+
df['building_category'] = 'holiday_home'
|
328
|
+
df['energy_type'] = ('electricity', 'fuelwood', 'fossil')
|
329
|
+
output = df.reset_index().rename(columns={'index': 'unit'})
|
330
|
+
output = output.set_index(['building_category', 'energy_type', 'unit'])
|
331
|
+
return output
|
332
|
+
|
333
|
+
|
334
|
+
def transform_holiday_homes_to_horizontal(df: pd.DataFrame) -> pd.DataFrame:
|
335
|
+
df = df.reset_index()
|
336
|
+
df = df.rename(columns={'energy_type': 'energy_source'})
|
337
|
+
columns_to_keep = [y for y in YearRange(2020, 2050)] + ['building_category', 'energy_source']
|
338
|
+
df = df.drop(columns=[c for c in df.columns if c not in columns_to_keep])
|
339
|
+
df['energy_source'] = df['energy_source'].apply(lambda x: 'Elektrisitet' if x == 'electricity' else 'Bio' if x == 'fuelwood' else x)
|
340
|
+
df['building_category'] = 'Fritidsboliger'
|
341
|
+
return df
|
ebm/migrations.py
ADDED
@@ -0,0 +1,224 @@
|
|
1
|
+
import pathlib
|
2
|
+
from typing import Callable
|
3
|
+
|
4
|
+
import pandas as pd
|
5
|
+
from loguru import logger
|
6
|
+
|
7
|
+
|
8
|
+
def drop_unnamed(df: pd.DataFrame) -> pd.DataFrame:
|
9
|
+
"""
|
10
|
+
Remove columns starting with 'Unnamed:' from a DataFrame, and log a warning if any are not sequential.
|
11
|
+
|
12
|
+
Parameters
|
13
|
+
----------
|
14
|
+
df : pandas.DataFrame
|
15
|
+
The input DataFrame from which to drop 'Unnamed:' columns.
|
16
|
+
|
17
|
+
Returns
|
18
|
+
-------
|
19
|
+
pandas.DataFrame
|
20
|
+
A copy of the input DataFrame with 'Unnamed:' columns removed.
|
21
|
+
|
22
|
+
Notes
|
23
|
+
-----
|
24
|
+
A column is considered sequential if the difference between consecutive values is constant.
|
25
|
+
If any 'Unnamed:' columns are found to be non-sequential, a warning is logged.
|
26
|
+
|
27
|
+
Examples
|
28
|
+
--------
|
29
|
+
>>> import pandas as pd
|
30
|
+
>>> df = pd.DataFrame({
|
31
|
+
... 'Unnamed: 0': [0, 1, 2],
|
32
|
+
... 'Unnamed: 1': [5, 7, 9],
|
33
|
+
... 'data': [10, 20, 30]
|
34
|
+
... })
|
35
|
+
>>> drop_unnamed(df)
|
36
|
+
data
|
37
|
+
0 10
|
38
|
+
1 20
|
39
|
+
2 30
|
40
|
+
"""
|
41
|
+
|
42
|
+
unnamed = [c for c in df.columns if c.startswith('Unnamed:')]
|
43
|
+
if unnamed:
|
44
|
+
drop_df = df.copy()
|
45
|
+
not_sequential = [s for s in unnamed if drop_df[s].diff().dropna().nunique() != 1]
|
46
|
+
if not_sequential:
|
47
|
+
msg=f'Columns {not_sequential} {"was" if len(not_sequential)==1 else "were"} not sequential'
|
48
|
+
logger.warning(msg)
|
49
|
+
return drop_df.drop(columns=unnamed)
|
50
|
+
return df
|
51
|
+
|
52
|
+
|
53
|
+
def rename_columns(df: pd.DataFrame, translation: dict[str:str]) -> pd.DataFrame:
|
54
|
+
|
55
|
+
"""
|
56
|
+
Rename columns in a DataFrame using a translation dictionary.
|
57
|
+
|
58
|
+
Parameters
|
59
|
+
----------
|
60
|
+
df : pandas.DataFrame
|
61
|
+
The input DataFrame whose columns are to be renamed.
|
62
|
+
translation : dict of str
|
63
|
+
A dictionary mapping existing column names (keys) to new column names (values).
|
64
|
+
|
65
|
+
Returns
|
66
|
+
-------
|
67
|
+
pandas.DataFrame
|
68
|
+
A new DataFrame with columns renamed according to the translation dictionary.
|
69
|
+
If the translation dictionary is empty, the original DataFrame is returned unchanged.
|
70
|
+
|
71
|
+
Examples
|
72
|
+
--------
|
73
|
+
>>> import pandas as pd
|
74
|
+
>>> data = pd.DataFrame({'A': [1, 2], 'B': [3, 4]})
|
75
|
+
>>> rename_columns(data, {'A': 'Alpha', 'B': 'Beta'})
|
76
|
+
Alpha Beta
|
77
|
+
0 1 3
|
78
|
+
1 2 4
|
79
|
+
"""
|
80
|
+
if not translation:
|
81
|
+
logger.debug('No translation dictionary provided')
|
82
|
+
return df
|
83
|
+
columns_to_rename = {k:v for k,v in translation.items() if k in df.columns}
|
84
|
+
if not columns_to_rename:
|
85
|
+
logger.debug(f'None of columns {translation.keys()} found in the dataframe')
|
86
|
+
return df
|
87
|
+
logger.debug(f'Renaming columns: {", ".join(columns_to_rename.keys())}')
|
88
|
+
return df.copy().rename(columns=columns_to_rename)
|
89
|
+
|
90
|
+
|
91
|
+
def drop_columns(df: pd.DataFrame, columns: list[str]) -> pd.DataFrame:
|
92
|
+
"""
|
93
|
+
Drop specified columns from a DataFrame with logging and validation.
|
94
|
+
|
95
|
+
Parameters
|
96
|
+
----------
|
97
|
+
df : pandas.DataFrame
|
98
|
+
The input DataFrame from which columns will be dropped.
|
99
|
+
columns : list of str
|
100
|
+
A list of column names to drop from the DataFrame.
|
101
|
+
|
102
|
+
Returns
|
103
|
+
-------
|
104
|
+
pandas.DataFrame
|
105
|
+
A new DataFrame with the specified columns removed. If none of the columns
|
106
|
+
are found, the original DataFrame is returned unchanged.
|
107
|
+
|
108
|
+
Logs
|
109
|
+
----
|
110
|
+
- Logs a debug message if no columns are provided.
|
111
|
+
- Logs a warning if any specified columns are not found in the DataFrame.
|
112
|
+
- Logs a debug message listing the columns that will be dropped.
|
113
|
+
|
114
|
+
Examples
|
115
|
+
--------
|
116
|
+
>>> import pandas as pd
|
117
|
+
>>> df = pd.DataFrame({'A': [1], 'B': [2], 'C': [3]})
|
118
|
+
>>> drop_columns(df, ['B', 'D'])
|
119
|
+
WARNING: Column ['D'] missing from dataframe
|
120
|
+
A C
|
121
|
+
0 1 3
|
122
|
+
"""
|
123
|
+
|
124
|
+
if not columns:
|
125
|
+
logger.debug('No columns to drop')
|
126
|
+
return df
|
127
|
+
logger.debug(f'drop columns {columns}')
|
128
|
+
not_found = [c for c in columns if c not in df.columns]
|
129
|
+
found = [c for c in columns if c in df.columns]
|
130
|
+
|
131
|
+
if not_found:
|
132
|
+
plural = 's' if len(not_found) == 1 else ''
|
133
|
+
msg = f'Column{plural} {not_found} missing from dataframe'
|
134
|
+
logger.warning(msg)
|
135
|
+
if not found:
|
136
|
+
logger.debug('No columns to drop')
|
137
|
+
return df
|
138
|
+
return df.copy().drop(columns=found)
|
139
|
+
|
140
|
+
|
141
|
+
def translate_heating_system_efficiencies(df: pd.DataFrame) -> pd.DataFrame:
|
142
|
+
"""
|
143
|
+
Translate and drop columns in heating_system_efficiencies.csv
|
144
|
+
|
145
|
+
- Translate column names from norwegian to english
|
146
|
+
- Drop redundant columns
|
147
|
+
|
148
|
+
"""
|
149
|
+
|
150
|
+
tr_df = df.copy()
|
151
|
+
translation = {"Grunnlast energivare": "base_load_energy_product",
|
152
|
+
"Grunnlast virkningsgrad": "base_load_efficiency",
|
153
|
+
"Grunnlast andel": "base_load_coverage",
|
154
|
+
"Spisslast andel": "peak_load_coverage",
|
155
|
+
"Spisslast energivare": "peak_load_energy_product",
|
156
|
+
"Spisslast virkningsgrad": "peak_load_efficiency",
|
157
|
+
"Ekstralast energivare": "tertiary_load_energy_product",
|
158
|
+
"Ekstralast andel": "tertiary_load_coverage",
|
159
|
+
"Ekstralast virkningsgrad": "tertiary_load_efficiency",
|
160
|
+
"Tappevann energivare": "domestic_hot_water_energy_product",
|
161
|
+
"Tappevann virkningsgrad": "domestic_hot_water_efficiency",
|
162
|
+
"Kjoling virkningsgrad": "cooling_efficiency",
|
163
|
+
}
|
164
|
+
tr_df = rename_columns(tr_df, translation)
|
165
|
+
|
166
|
+
delete_columns = ['Grunnlast', 'Spisslast', 'Ekstralast', 'Tappevann']
|
167
|
+
tr_df = drop_columns(tr_df, delete_columns)
|
168
|
+
|
169
|
+
return tr_df
|
170
|
+
|
171
|
+
|
172
|
+
def migrate_input_directory(directory: pathlib.Path, migration: Callable) -> None:
|
173
|
+
"""
|
174
|
+
Migrates heating system efficiency data in a given directory using a specified transformation function.
|
175
|
+
|
176
|
+
This function renames legacy input files if necessary, validates the presence of the expected input file,
|
177
|
+
reads the data, applies a migration/transformation function, and writes the result back to the same file.
|
178
|
+
|
179
|
+
Parameters
|
180
|
+
----------
|
181
|
+
directory : pathlib.Path
|
182
|
+
The path to the directory containing the input CSV file.
|
183
|
+
migration : Callable[[pd.DataFrame], pd.DataFrame]
|
184
|
+
A function that takes a pandas DataFrame and returns a transformed DataFrame.
|
185
|
+
|
186
|
+
Raises
|
187
|
+
------
|
188
|
+
FileNotFoundError
|
189
|
+
If the expected input file does not exist or is not a file.
|
190
|
+
Exception
|
191
|
+
If reading, transforming, or writing the file fails.
|
192
|
+
|
193
|
+
Notes
|
194
|
+
-----
|
195
|
+
- If a legacy file named 'heating_systems_efficiencies.csv' exists and the target file
|
196
|
+
'heating_system_efficiencies.csv' does not, the legacy file will be renamed.
|
197
|
+
- The transformation is applied in-place and overwrites the original file.
|
198
|
+
|
199
|
+
Examples
|
200
|
+
--------
|
201
|
+
>>> from pathlib import Path
|
202
|
+
>>> migrate_input_directory(Path("data"), translate_heating_system_efficiencies)
|
203
|
+
"""
|
204
|
+
|
205
|
+
logger.info(f'Migrating {directory} using {migration}')
|
206
|
+
old_name = directory / 'heating_systems_efficiencies.csv'
|
207
|
+
input_file = directory / 'heating_system_efficiencies.csv'
|
208
|
+
if old_name.is_file():
|
209
|
+
if input_file.is_file():
|
210
|
+
logger.info(f'Found existing {input_file}')
|
211
|
+
else:
|
212
|
+
logger.debug(f'Rename {old_name.name} to {input_file.name}')
|
213
|
+
old_name.rename(input_file)
|
214
|
+
logger.success(f'Renamed {old_name.name} to {input_file.name}')
|
215
|
+
|
216
|
+
if not input_file.exists():
|
217
|
+
raise FileNotFoundError(f'{input_file} not found')
|
218
|
+
if not input_file.is_file():
|
219
|
+
raise FileNotFoundError(f'{input_file} is not a file')
|
220
|
+
|
221
|
+
df = pd.read_csv(input_file)
|
222
|
+
tr_df = migration(df)
|
223
|
+
tr_df.to_csv(input_file, index=False)
|
224
|
+
logger.success(f'Migrated {input_file}')
|
ebm/model/__init__.py
ADDED
File without changes
|