ebm 0.99.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. ebm/__init__.py +0 -0
  2. ebm/__main__.py +152 -0
  3. ebm/__version__.py +1 -0
  4. ebm/cmd/__init__.py +0 -0
  5. ebm/cmd/calibrate.py +83 -0
  6. ebm/cmd/calibrate_excel_com_io.py +128 -0
  7. ebm/cmd/heating_systems_by_year.py +18 -0
  8. ebm/cmd/helpers.py +134 -0
  9. ebm/cmd/initialize.py +167 -0
  10. ebm/cmd/migrate.py +92 -0
  11. ebm/cmd/pipeline.py +227 -0
  12. ebm/cmd/prepare_main.py +174 -0
  13. ebm/cmd/result_handler.py +272 -0
  14. ebm/cmd/run_calculation.py +221 -0
  15. ebm/data/area.csv +92 -0
  16. ebm/data/area_new_residential_buildings.csv +3 -0
  17. ebm/data/area_per_person.csv +12 -0
  18. ebm/data/building_code_parameters.csv +9 -0
  19. ebm/data/energy_need_behaviour_factor.csv +6 -0
  20. ebm/data/energy_need_improvements.csv +7 -0
  21. ebm/data/energy_need_original_condition.csv +534 -0
  22. ebm/data/heating_system_efficiencies.csv +13 -0
  23. ebm/data/heating_system_forecast.csv +9 -0
  24. ebm/data/heating_system_initial_shares.csv +1113 -0
  25. ebm/data/holiday_home_energy_consumption.csv +24 -0
  26. ebm/data/holiday_home_stock.csv +25 -0
  27. ebm/data/improvement_building_upgrade.csv +9 -0
  28. ebm/data/new_buildings_residential.csv +32 -0
  29. ebm/data/population_forecast.csv +51 -0
  30. ebm/data/s_curve.csv +40 -0
  31. ebm/energy_consumption.py +307 -0
  32. ebm/extractors.py +115 -0
  33. ebm/heating_system_forecast.py +472 -0
  34. ebm/holiday_home_energy.py +341 -0
  35. ebm/migrations.py +224 -0
  36. ebm/model/__init__.py +0 -0
  37. ebm/model/area.py +403 -0
  38. ebm/model/bema.py +149 -0
  39. ebm/model/building_category.py +150 -0
  40. ebm/model/building_condition.py +78 -0
  41. ebm/model/calibrate_energy_requirements.py +84 -0
  42. ebm/model/calibrate_heating_systems.py +180 -0
  43. ebm/model/column_operations.py +157 -0
  44. ebm/model/construction.py +827 -0
  45. ebm/model/data_classes.py +223 -0
  46. ebm/model/database_manager.py +410 -0
  47. ebm/model/dataframemodels.py +115 -0
  48. ebm/model/defaults.py +30 -0
  49. ebm/model/energy_need.py +6 -0
  50. ebm/model/energy_need_filter.py +182 -0
  51. ebm/model/energy_purpose.py +115 -0
  52. ebm/model/energy_requirement.py +353 -0
  53. ebm/model/energy_use.py +202 -0
  54. ebm/model/enums.py +8 -0
  55. ebm/model/exceptions.py +4 -0
  56. ebm/model/file_handler.py +388 -0
  57. ebm/model/filter_scurve_params.py +83 -0
  58. ebm/model/filter_tek.py +152 -0
  59. ebm/model/heat_pump.py +53 -0
  60. ebm/model/heating_systems.py +20 -0
  61. ebm/model/heating_systems_parameter.py +17 -0
  62. ebm/model/heating_systems_projection.py +3 -0
  63. ebm/model/heating_systems_share.py +28 -0
  64. ebm/model/scurve.py +224 -0
  65. ebm/model/tek.py +1 -0
  66. ebm/s_curve.py +515 -0
  67. ebm/services/__init__.py +0 -0
  68. ebm/services/calibration_writer.py +262 -0
  69. ebm/services/console.py +106 -0
  70. ebm/services/excel_loader.py +66 -0
  71. ebm/services/files.py +38 -0
  72. ebm/services/spreadsheet.py +289 -0
  73. ebm/temp_calc.py +99 -0
  74. ebm/validators.py +565 -0
  75. ebm-0.99.5.dist-info/METADATA +212 -0
  76. ebm-0.99.5.dist-info/RECORD +80 -0
  77. ebm-0.99.5.dist-info/WHEEL +5 -0
  78. ebm-0.99.5.dist-info/entry_points.txt +3 -0
  79. ebm-0.99.5.dist-info/licenses/LICENSE +21 -0
  80. ebm-0.99.5.dist-info/top_level.txt +1 -0
@@ -0,0 +1,341 @@
1
+ import logging
2
+ import typing
3
+
4
+ import numpy as np
5
+ import pandas as pd
6
+
7
+ from ebm.model.database_manager import DatabaseManager
8
+ from ebm.model.data_classes import YearRange
9
+
10
+
11
+ class HolidayHomeEnergy:
12
+ def __init__(self,
13
+ population: pd.Series,
14
+ holiday_homes_by_category: pd.DataFrame,
15
+ electricity_usage_stats: pd.Series,
16
+ fuelwood_usage_stats: pd.Series,
17
+ fossil_fuel_usage_stats: pd.Series):
18
+ self.population = population
19
+ self.fossil_fuel_usage_stats = fossil_fuel_usage_stats
20
+ self.fuelwood_usage_stats = fuelwood_usage_stats
21
+ self.electricity_usage_stats = electricity_usage_stats
22
+ self.holiday_homes_by_category = holiday_homes_by_category
23
+
24
+ def calculate_energy_usage(self) -> typing.Iterable[pd.Series]:
25
+ """
26
+ Calculate projected energy usage for holiday homes.
27
+
28
+ This method projects future energy usage for electricity, fuelwood, and fossil fuels
29
+ based on historical data and combines these projections with existing statistics.
30
+
31
+ Yields
32
+ ------
33
+ Iterable[pd.Series]
34
+ A series of projected energy usage values for electricity, fuelwood, and fossil fuels,
35
+ with NaN values filled from the existing statistics.
36
+ """
37
+ electricity_projection = project_electricity_usage(self.electricity_usage_stats,
38
+ self.holiday_homes_by_category,
39
+ self.population)
40
+ yield electricity_projection.combine_first(self.electricity_usage_stats)
41
+
42
+ fuelwood_projection = project_fuelwood_usage(self.fuelwood_usage_stats,
43
+ self.holiday_homes_by_category,
44
+ self.population)
45
+ yield fuelwood_projection.combine_first(self.fuelwood_usage_stats)
46
+
47
+ fossil_fuel_projection = project_fossil_fuel_usage(self.fossil_fuel_usage_stats,
48
+ self.holiday_homes_by_category,
49
+ self.population)
50
+ yield fossil_fuel_projection
51
+
52
+ @staticmethod
53
+ def new_instance(database_manager: DatabaseManager = None) -> 'HolidayHomeEnergy':
54
+ dm = database_manager or DatabaseManager()
55
+ holiday_homes = dm.get_holiday_home_by_year()
56
+
57
+ # 02 Elektrisitet i fritidsboliger statistikk (GWh) (input)
58
+ electricity_usage_stats = dm.get_holiday_home_electricity_consumption()
59
+
60
+ # 04 Ved i fritidsboliger statistikk (GWh)
61
+ fuelwood_usage_stats = dm.get_holiday_home_fuelwood_consumption()
62
+
63
+ # 06 Fossilt brensel i fritidsboliger statistikk (GWh)
64
+ fossil_fuel_usage_stats = dm.get_holiday_home_fossilfuel_consumption()
65
+
66
+ # logging.warning('Loading fossil_fuel_usage_stats from hard coded data')
67
+ # fossil_fuel_usage_stats = pd.Series(data=[100], index=YearRange(2006, 2006).to_index(), name='kwh')
68
+
69
+ population = dm.file_handler.get_file(dm.file_handler.POPULATION_FORECAST).set_index('year').population
70
+
71
+ return HolidayHomeEnergy(population,
72
+ holiday_homes,
73
+ electricity_usage_stats,
74
+ fuelwood_usage_stats,
75
+ fossil_fuel_usage_stats)
76
+
77
+
78
+ def project_electricity_usage(electricity_usage_stats: pd.Series,
79
+ holiday_homes_by_category: pd.DataFrame,
80
+ population: pd.Series) -> pd.Series:
81
+ """
82
+ Calculate the projected electricity usage for holiday homes.
83
+
84
+ This function projects the future electricity usage for holiday homes based on historical
85
+ electricity usage statistics, the number of holiday homes by category, and population data.
86
+
87
+ Population is used to work out what years are needed in the projection.
88
+
89
+ Parameters
90
+ ----------
91
+ electricity_usage_stats : pd.Series
92
+ A pandas Series containing historical electricity usage statistics.
93
+ holiday_homes_by_category : pd.DataFrame
94
+ A pandas DataFrame containing the number of holiday homes by year. Each column is considered as a category.
95
+ population : pd.Series
96
+ A pandas Series containing population data.
97
+
98
+ Returns
99
+ -------
100
+ pd.Series
101
+ A pandas Series with the projected electricity usage in gigawatt-hours (GWh) for future years.
102
+
103
+ Raises
104
+ ------
105
+ ValueError
106
+ If the input Series do not meet the expected criteria.
107
+ """
108
+ total_holiday_homes_by_year = sum_holiday_homes(holiday_homes_by_category.iloc[:, 0],
109
+ holiday_homes_by_category.iloc[:, 1])
110
+
111
+ people_per_holiday_home = population_over_holiday_homes(population, total_holiday_homes_by_year)
112
+ projected_holiday_homes_by_year = projected_holiday_homes(population, people_per_holiday_home)
113
+
114
+ usage_by_homes = energy_usage_by_holiday_homes(electricity_usage_stats, total_holiday_homes_by_year)
115
+ nan_padded_usage_by_homes = usage_by_homes.reindex(population.index, fill_value=np.nan)
116
+ projected_electricity_usage = projected_electricity_usage_holiday_homes(nan_padded_usage_by_homes)
117
+
118
+ projected_electricity_usage_kwh = projected_holiday_homes_by_year * projected_electricity_usage
119
+ projected_electricity_usage_gwh = projected_electricity_usage_kwh / 1_000_000
120
+ projected_electricity_usage_gwh.name = 'gwh'
121
+
122
+ return projected_electricity_usage_gwh
123
+
124
+
125
+ def project_fuelwood_usage(fuelwood_usage_stats: pd.Series,
126
+ holiday_homes_by_category: pd.DataFrame,
127
+ population: pd.Series) -> pd.Series:
128
+ total_holiday_homes_by_year = sum_holiday_homes(holiday_homes_by_category.iloc[:, 0],
129
+ holiday_homes_by_category.iloc[:, 1])
130
+
131
+ people_per_holiday_home = population_over_holiday_homes(population, total_holiday_homes_by_year)
132
+ projected_holiday_homes_by_year = projected_holiday_homes(population, people_per_holiday_home)
133
+
134
+ usage_by_homes = energy_usage_by_holiday_homes(fuelwood_usage_stats, total_holiday_homes_by_year)
135
+ nan_padded_usage_by_homes = usage_by_homes.reindex(population.index, fill_value=np.nan)
136
+ projected_fuelwood_usage = projected_fuelwood_usage_holiday_homes(nan_padded_usage_by_homes)
137
+
138
+ projected_fuelwood_usage_kwh = projected_holiday_homes_by_year * projected_fuelwood_usage
139
+ projected_fuelwood_usage_gwh = projected_fuelwood_usage_kwh / 1_000_000
140
+ projected_fuelwood_usage_gwh.name = 'gwh'
141
+
142
+ return projected_fuelwood_usage_gwh
143
+
144
+
145
+ def project_fossil_fuel_usage(fossil_fuel_usage_stats: pd.Series,
146
+ holiday_homes_by_category: pd.DataFrame,
147
+ population: pd.Series) -> pd.Series:
148
+ projected_fossil_fuel_usage_gwh = fossil_fuel_usage_stats.reindex(population.index, fill_value=np.nan)
149
+
150
+ not_na = projected_fossil_fuel_usage_gwh.loc[~projected_fossil_fuel_usage_gwh.isna()].index
151
+ projection_filter = projected_fossil_fuel_usage_gwh.index > max(not_na)
152
+ projected_fossil_fuel_usage_gwh.loc[projection_filter] = projected_fossil_fuel_usage_gwh.loc[not_na].mean()
153
+ projected_fossil_fuel_usage_gwh.name = 'gwh'
154
+ return projected_fossil_fuel_usage_gwh
155
+
156
+
157
+ def sum_holiday_homes(*holiday_homes: pd.Series) -> pd.Series:
158
+ return pd.DataFrame(holiday_homes).sum(axis=0)
159
+
160
+
161
+ def population_over_holiday_homes(population: pd.Series,
162
+ holiday_homes: pd.Series) -> pd.Series:
163
+ """
164
+ Average number of holiday homes by population.
165
+
166
+ Parameters
167
+ ----------
168
+ population : pd.Series
169
+ holiday_homes : pd.Series
170
+
171
+ Returns
172
+ -------
173
+ pd.Series
174
+
175
+ """
176
+ return population / holiday_homes
177
+
178
+
179
+ def projected_holiday_homes(population: pd.Series,
180
+ holiday_homes: pd.Series) -> pd.Series:
181
+ """
182
+ Projects future number of holiday homes based on the population and historical average number of holiday homes
183
+
184
+ Parameters
185
+ ----------
186
+ population : pd.Series
187
+ population in every year of the projection
188
+ holiday_homes : pd.Series
189
+ historical number of holiday homes
190
+ Returns
191
+ -------
192
+ pd.Series
193
+ population over average number of holiday homes
194
+ """
195
+ return population / holiday_homes.mean()
196
+
197
+
198
+ def energy_usage_by_holiday_homes(
199
+ energy_usage: pd.Series,
200
+ holiday_homes: pd.Series
201
+ ) -> pd.Series:
202
+ """
203
+
204
+ (08) 14 Elektrisitet pr fritidsbolig staitsikk (kWh) in Energibruk fritidsboliger.xlsx
205
+ (10) 16 Ved pr fritidsbolig statistikk (kWh) 2019 - 2023
206
+
207
+ Parameters
208
+ ----------
209
+ energy_usage : pd.Series
210
+ Electricity usage by year from SSB https://www.ssb.no/statbank/sq/10103348 2001 - 2023
211
+ holiday_homes : pd.Series
212
+ Total number of holiday homes of any category from SSB https://www.ssb.no/statbank/sq/10103336
213
+ Returns
214
+ -------
215
+
216
+ """
217
+ s = energy_usage * 1_000_000 / holiday_homes
218
+ s.name = 'kwh'
219
+ return s
220
+
221
+
222
+ def projected_fuelwood_usage_holiday_homes(historical_fuelwood_usage: pd.Series) -> pd.Series:
223
+ """
224
+ Projects future fuelwood usage for holiday homes based on historical data. The projection
225
+ is calculated as the mean of the last 5 years of historical_fuelwood_usage.
226
+
227
+ Parameters
228
+ ----------
229
+ historical_fuelwood_usage : pd.Series
230
+
231
+ Returns
232
+ -------
233
+ pd.Series
234
+ A pandas Series with with NaN values in fuelwood usage replaced by projected use. Years present
235
+ in historical_fuelwood_usage is returned as NaN
236
+ """
237
+ projected_fuelwood_usage = pd.Series(data=[np.nan] * len(historical_fuelwood_usage),
238
+ index=historical_fuelwood_usage.index)
239
+
240
+ not_na = historical_fuelwood_usage.loc[~historical_fuelwood_usage.isna()].index
241
+ average = historical_fuelwood_usage.loc[not_na].iloc[-5:].mean()
242
+ projection_filter = projected_fuelwood_usage.index > max(not_na)
243
+ projected_fuelwood_usage.loc[projection_filter] = average
244
+ return projected_fuelwood_usage
245
+
246
+
247
+ def projected_electricity_usage_holiday_homes(electricity_usage: pd.Series):
248
+ """
249
+ Project future electricity usage for holiday homes based on historical data.
250
+
251
+ This function projects future electricity usage by creating three ranges of projections
252
+ and padding the series with NaN values and the last projection value as needed.
253
+
254
+ 15 (09) Elektrisitet pr fritidsbolig framskrevet (kWh) in Energibruk fritidsboliger.xlsx
255
+
256
+ Parameters
257
+ ----------
258
+ electricity_usage : pd.Series
259
+ A pandas Series containing historical electricity usage data. The index should include the year 2019,
260
+ and the Series should contain at least 40 years of data with some NaN values for projection.
261
+
262
+ Returns
263
+ -------
264
+ pd.Series
265
+ A pandas Series with with NaN values in electricity usage replaced by projected energy use. Years with
266
+ values in energy_usage has a projected usage of NaN
267
+
268
+ Raises
269
+ ------
270
+ ValueError
271
+ If the year 2019 is not in the index of the provided Series.
272
+ If there are no NaN values in the provided Series.
273
+ If the length of the Series is less than or equal to 40.
274
+ """
275
+ if 2019 not in electricity_usage.index:
276
+ msg = 'The required year 2019 is not in the index of electricity_usage for the electricity projection'
277
+ raise ValueError(msg)
278
+ if not any(electricity_usage.isna()):
279
+ raise ValueError('Expected empty energy_usage for projection')
280
+ if len(electricity_usage.index) <= 40:
281
+ raise ValueError('At least 41 years of electricity_usage is required to predict future electricity use')
282
+ left_pad_len = len(electricity_usage) - electricity_usage.isna().sum()
283
+
284
+ initial_e_u = electricity_usage[2019]
285
+ first_range = [initial_e_u + (i * 75) for i in range(1, 6)]
286
+
287
+ second_range = [first_range[-1] + (i * 50) for i in range(1, 5)]
288
+
289
+ third_range = [second_range[-1] + (i * 25) for i in range(1, 9)]
290
+
291
+ right_pad_len = len(electricity_usage) - left_pad_len - len(first_range) - len(second_range) - len(third_range)
292
+ right_padding = [third_range[-1]] * right_pad_len
293
+
294
+ return pd.Series(([np.nan] * left_pad_len) +
295
+ first_range +
296
+ second_range +
297
+ third_range +
298
+ right_padding,
299
+ name='projected_electricity_usage_kwh',
300
+ index=electricity_usage.index)
301
+
302
+
303
+ if __name__ == '__main__':
304
+ holiday_home_energy = HolidayHomeEnergy.new_instance()
305
+ for energy_usage, h in zip(holiday_home_energy.calculate_energy_usage(), ['electricity', 'fuelwood', 'fossil fuel']):
306
+ print('====', h, '====')
307
+ print(energy_usage)
308
+
309
+
310
+ def calculate_energy_use(database_manager: DatabaseManager) -> pd.DataFrame:
311
+ """
312
+ Calculates holiday home energy use by from HolidayHomeEnergy.calculate_energy_usage()
313
+
314
+ Parameters
315
+ ----------
316
+ database_manager : DatabaseManager
317
+
318
+ Returns
319
+ -------
320
+ pd.DataFrame
321
+ """
322
+ holiday_home_energy = HolidayHomeEnergy.new_instance(database_manager=database_manager)
323
+ el, wood, fossil = [e_u for e_u in holiday_home_energy.calculate_energy_usage()]
324
+ df = pd.DataFrame(data=[el, wood, fossil])
325
+ df.insert(0, 'building_category', 'holiday_home')
326
+ df.insert(1, 'energy_type', 'n/a')
327
+ df['building_category'] = 'holiday_home'
328
+ df['energy_type'] = ('electricity', 'fuelwood', 'fossil')
329
+ output = df.reset_index().rename(columns={'index': 'unit'})
330
+ output = output.set_index(['building_category', 'energy_type', 'unit'])
331
+ return output
332
+
333
+
334
+ def transform_holiday_homes_to_horizontal(df: pd.DataFrame) -> pd.DataFrame:
335
+ df = df.reset_index()
336
+ df = df.rename(columns={'energy_type': 'energy_source'})
337
+ columns_to_keep = [y for y in YearRange(2020, 2050)] + ['building_category', 'energy_source']
338
+ df = df.drop(columns=[c for c in df.columns if c not in columns_to_keep])
339
+ df['energy_source'] = df['energy_source'].apply(lambda x: 'Elektrisitet' if x == 'electricity' else 'Bio' if x == 'fuelwood' else x)
340
+ df['building_category'] = 'Fritidsboliger'
341
+ return df
ebm/migrations.py ADDED
@@ -0,0 +1,224 @@
1
+ import pathlib
2
+ from typing import Callable
3
+
4
+ import pandas as pd
5
+ from loguru import logger
6
+
7
+
8
+ def drop_unnamed(df: pd.DataFrame) -> pd.DataFrame:
9
+ """
10
+ Remove columns starting with 'Unnamed:' from a DataFrame, and log a warning if any are not sequential.
11
+
12
+ Parameters
13
+ ----------
14
+ df : pandas.DataFrame
15
+ The input DataFrame from which to drop 'Unnamed:' columns.
16
+
17
+ Returns
18
+ -------
19
+ pandas.DataFrame
20
+ A copy of the input DataFrame with 'Unnamed:' columns removed.
21
+
22
+ Notes
23
+ -----
24
+ A column is considered sequential if the difference between consecutive values is constant.
25
+ If any 'Unnamed:' columns are found to be non-sequential, a warning is logged.
26
+
27
+ Examples
28
+ --------
29
+ >>> import pandas as pd
30
+ >>> df = pd.DataFrame({
31
+ ... 'Unnamed: 0': [0, 1, 2],
32
+ ... 'Unnamed: 1': [5, 7, 9],
33
+ ... 'data': [10, 20, 30]
34
+ ... })
35
+ >>> drop_unnamed(df)
36
+ data
37
+ 0 10
38
+ 1 20
39
+ 2 30
40
+ """
41
+
42
+ unnamed = [c for c in df.columns if c.startswith('Unnamed:')]
43
+ if unnamed:
44
+ drop_df = df.copy()
45
+ not_sequential = [s for s in unnamed if drop_df[s].diff().dropna().nunique() != 1]
46
+ if not_sequential:
47
+ msg=f'Columns {not_sequential} {"was" if len(not_sequential)==1 else "were"} not sequential'
48
+ logger.warning(msg)
49
+ return drop_df.drop(columns=unnamed)
50
+ return df
51
+
52
+
53
+ def rename_columns(df: pd.DataFrame, translation: dict[str:str]) -> pd.DataFrame:
54
+
55
+ """
56
+ Rename columns in a DataFrame using a translation dictionary.
57
+
58
+ Parameters
59
+ ----------
60
+ df : pandas.DataFrame
61
+ The input DataFrame whose columns are to be renamed.
62
+ translation : dict of str
63
+ A dictionary mapping existing column names (keys) to new column names (values).
64
+
65
+ Returns
66
+ -------
67
+ pandas.DataFrame
68
+ A new DataFrame with columns renamed according to the translation dictionary.
69
+ If the translation dictionary is empty, the original DataFrame is returned unchanged.
70
+
71
+ Examples
72
+ --------
73
+ >>> import pandas as pd
74
+ >>> data = pd.DataFrame({'A': [1, 2], 'B': [3, 4]})
75
+ >>> rename_columns(data, {'A': 'Alpha', 'B': 'Beta'})
76
+ Alpha Beta
77
+ 0 1 3
78
+ 1 2 4
79
+ """
80
+ if not translation:
81
+ logger.debug('No translation dictionary provided')
82
+ return df
83
+ columns_to_rename = {k:v for k,v in translation.items() if k in df.columns}
84
+ if not columns_to_rename:
85
+ logger.debug(f'None of columns {translation.keys()} found in the dataframe')
86
+ return df
87
+ logger.debug(f'Renaming columns: {", ".join(columns_to_rename.keys())}')
88
+ return df.copy().rename(columns=columns_to_rename)
89
+
90
+
91
+ def drop_columns(df: pd.DataFrame, columns: list[str]) -> pd.DataFrame:
92
+ """
93
+ Drop specified columns from a DataFrame with logging and validation.
94
+
95
+ Parameters
96
+ ----------
97
+ df : pandas.DataFrame
98
+ The input DataFrame from which columns will be dropped.
99
+ columns : list of str
100
+ A list of column names to drop from the DataFrame.
101
+
102
+ Returns
103
+ -------
104
+ pandas.DataFrame
105
+ A new DataFrame with the specified columns removed. If none of the columns
106
+ are found, the original DataFrame is returned unchanged.
107
+
108
+ Logs
109
+ ----
110
+ - Logs a debug message if no columns are provided.
111
+ - Logs a warning if any specified columns are not found in the DataFrame.
112
+ - Logs a debug message listing the columns that will be dropped.
113
+
114
+ Examples
115
+ --------
116
+ >>> import pandas as pd
117
+ >>> df = pd.DataFrame({'A': [1], 'B': [2], 'C': [3]})
118
+ >>> drop_columns(df, ['B', 'D'])
119
+ WARNING: Column ['D'] missing from dataframe
120
+ A C
121
+ 0 1 3
122
+ """
123
+
124
+ if not columns:
125
+ logger.debug('No columns to drop')
126
+ return df
127
+ logger.debug(f'drop columns {columns}')
128
+ not_found = [c for c in columns if c not in df.columns]
129
+ found = [c for c in columns if c in df.columns]
130
+
131
+ if not_found:
132
+ plural = 's' if len(not_found) == 1 else ''
133
+ msg = f'Column{plural} {not_found} missing from dataframe'
134
+ logger.warning(msg)
135
+ if not found:
136
+ logger.debug('No columns to drop')
137
+ return df
138
+ return df.copy().drop(columns=found)
139
+
140
+
141
+ def translate_heating_system_efficiencies(df: pd.DataFrame) -> pd.DataFrame:
142
+ """
143
+ Translate and drop columns in heating_system_efficiencies.csv
144
+
145
+ - Translate column names from norwegian to english
146
+ - Drop redundant columns
147
+
148
+ """
149
+
150
+ tr_df = df.copy()
151
+ translation = {"Grunnlast energivare": "base_load_energy_product",
152
+ "Grunnlast virkningsgrad": "base_load_efficiency",
153
+ "Grunnlast andel": "base_load_coverage",
154
+ "Spisslast andel": "peak_load_coverage",
155
+ "Spisslast energivare": "peak_load_energy_product",
156
+ "Spisslast virkningsgrad": "peak_load_efficiency",
157
+ "Ekstralast energivare": "tertiary_load_energy_product",
158
+ "Ekstralast andel": "tertiary_load_coverage",
159
+ "Ekstralast virkningsgrad": "tertiary_load_efficiency",
160
+ "Tappevann energivare": "domestic_hot_water_energy_product",
161
+ "Tappevann virkningsgrad": "domestic_hot_water_efficiency",
162
+ "Kjoling virkningsgrad": "cooling_efficiency",
163
+ }
164
+ tr_df = rename_columns(tr_df, translation)
165
+
166
+ delete_columns = ['Grunnlast', 'Spisslast', 'Ekstralast', 'Tappevann']
167
+ tr_df = drop_columns(tr_df, delete_columns)
168
+
169
+ return tr_df
170
+
171
+
172
+ def migrate_input_directory(directory: pathlib.Path, migration: Callable) -> None:
173
+ """
174
+ Migrates heating system efficiency data in a given directory using a specified transformation function.
175
+
176
+ This function renames legacy input files if necessary, validates the presence of the expected input file,
177
+ reads the data, applies a migration/transformation function, and writes the result back to the same file.
178
+
179
+ Parameters
180
+ ----------
181
+ directory : pathlib.Path
182
+ The path to the directory containing the input CSV file.
183
+ migration : Callable[[pd.DataFrame], pd.DataFrame]
184
+ A function that takes a pandas DataFrame and returns a transformed DataFrame.
185
+
186
+ Raises
187
+ ------
188
+ FileNotFoundError
189
+ If the expected input file does not exist or is not a file.
190
+ Exception
191
+ If reading, transforming, or writing the file fails.
192
+
193
+ Notes
194
+ -----
195
+ - If a legacy file named 'heating_systems_efficiencies.csv' exists and the target file
196
+ 'heating_system_efficiencies.csv' does not, the legacy file will be renamed.
197
+ - The transformation is applied in-place and overwrites the original file.
198
+
199
+ Examples
200
+ --------
201
+ >>> from pathlib import Path
202
+ >>> migrate_input_directory(Path("data"), translate_heating_system_efficiencies)
203
+ """
204
+
205
+ logger.info(f'Migrating {directory} using {migration}')
206
+ old_name = directory / 'heating_systems_efficiencies.csv'
207
+ input_file = directory / 'heating_system_efficiencies.csv'
208
+ if old_name.is_file():
209
+ if input_file.is_file():
210
+ logger.info(f'Found existing {input_file}')
211
+ else:
212
+ logger.debug(f'Rename {old_name.name} to {input_file.name}')
213
+ old_name.rename(input_file)
214
+ logger.success(f'Renamed {old_name.name} to {input_file.name}')
215
+
216
+ if not input_file.exists():
217
+ raise FileNotFoundError(f'{input_file} not found')
218
+ if not input_file.is_file():
219
+ raise FileNotFoundError(f'{input_file} is not a file')
220
+
221
+ df = pd.read_csv(input_file)
222
+ tr_df = migration(df)
223
+ tr_df.to_csv(input_file, index=False)
224
+ logger.success(f'Migrated {input_file}')
ebm/model/__init__.py ADDED
File without changes