pygazpar 0.1.21__py3-none-any.whl → 1.3.0b2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pygazpar/datasource.py ADDED
@@ -0,0 +1,629 @@
1
+ import glob
2
+ import http.cookiejar
3
+ import json
4
+ import logging
5
+ import os
6
+ import time
7
+ from abc import ABC, abstractmethod
8
+ from datetime import date, timedelta
9
+ from typing import Any, Dict, List, Optional, cast
10
+
11
+ import pandas as pd
12
+ from requests import Session
13
+
14
+ from pygazpar.enum import Frequency, PropertyName
15
+ from pygazpar.excelparser import ExcelParser
16
+ from pygazpar.jsonparser import JsonParser
17
+
18
+ SESSION_TOKEN_URL = "https://connexion.grdf.fr/api/v1/authn"
19
+ SESSION_TOKEN_PAYLOAD = """{{
20
+ "username": "{0}",
21
+ "password": "{1}",
22
+ "options": {{
23
+ "multiOptionalFactorEnroll": "false",
24
+ "warnBeforePasswordExpired": "false"
25
+ }}
26
+ }}"""
27
+
28
+ AUTH_TOKEN_URL = "https://connexion.grdf.fr/login/sessionCookieRedirect"
29
+ AUTH_TOKEN_PARAMS = """{{
30
+ "checkAccountSetupComplete": "true",
31
+ "token": "{0}",
32
+ "redirectUrl": "https://monespace.grdf.fr"
33
+ }}"""
34
+
35
+ Logger = logging.getLogger(__name__)
36
+
37
+ MeterReading = Dict[str, Any]
38
+
39
+ MeterReadings = List[MeterReading]
40
+
41
+ MeterReadingsByFrequency = Dict[str, MeterReadings]
42
+
43
+
44
+ # ------------------------------------------------------------------------------------------------------------
45
+ class IDataSource(ABC): # pylint: disable=too-few-public-methods
46
+
47
+ @abstractmethod
48
+ def load(
49
+ self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None
50
+ ) -> MeterReadingsByFrequency:
51
+ pass
52
+
53
+
54
+ # ------------------------------------------------------------------------------------------------------------
55
+ class WebDataSource(IDataSource): # pylint: disable=too-few-public-methods
56
+
57
+ # ------------------------------------------------------
58
+ def __init__(self, username: str, password: str):
59
+
60
+ self.__username = username
61
+ self.__password = password
62
+
63
+ # ------------------------------------------------------
64
+ def load(
65
+ self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None
66
+ ) -> MeterReadingsByFrequency:
67
+
68
+ self._login(self.__username, self.__password) # We ignore the return value.
69
+
70
+ res = self._loadFromSession(pceIdentifier, startDate, endDate, frequencies)
71
+
72
+ Logger.debug("The data update terminates normally")
73
+
74
+ return res
75
+
76
+ # ------------------------------------------------------
77
+ def _login(self, username: str, password: str) -> str:
78
+
79
+ session = Session()
80
+ session.headers.update({"domain": "grdf.fr"})
81
+ session.headers.update({"Content-Type": "application/json"})
82
+ session.headers.update({"X-Requested-With": "XMLHttpRequest"})
83
+
84
+ payload = SESSION_TOKEN_PAYLOAD.format(username, password)
85
+
86
+ response = session.post(SESSION_TOKEN_URL, data=payload)
87
+
88
+ if response.status_code != 200:
89
+ raise ValueError(
90
+ f"An error occurred while logging in. Status code: {response.status_code} - {response.text}"
91
+ )
92
+
93
+ session_token = response.json().get("sessionToken")
94
+
95
+ Logger.debug("Session token: %s", session_token)
96
+
97
+ jar = http.cookiejar.CookieJar()
98
+
99
+ self._session = Session() # pylint: disable=attribute-defined-outside-init
100
+ self._session.headers.update({"Content-Type": "application/json"})
101
+ self._session.headers.update({"X-Requested-With": "XMLHttpRequest"})
102
+
103
+ params = json.loads(AUTH_TOKEN_PARAMS.format(session_token))
104
+
105
+ response = self._session.get(AUTH_TOKEN_URL, params=params, allow_redirects=True, cookies=jar) # type: ignore
106
+
107
+ if response.status_code != 200:
108
+ raise ValueError(
109
+ f"An error occurred while getting the auth token. Status code: {response.status_code} - {response.text}"
110
+ )
111
+
112
+ auth_token = self._session.cookies.get("auth_token", domain="monespace.grdf.fr")
113
+
114
+ return auth_token # type: ignore
115
+
116
+ @abstractmethod
117
+ def _loadFromSession(
118
+ self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None
119
+ ) -> MeterReadingsByFrequency:
120
+ pass
121
+
122
+
123
+ # ------------------------------------------------------------------------------------------------------------
124
+ class ExcelWebDataSource(WebDataSource): # pylint: disable=too-few-public-methods
125
+
126
+ DATA_URL = "https://monespace.grdf.fr/api/e-conso/pce/consommation/informatives/telecharger?dateDebut={0}&dateFin={1}&frequence={3}&pceList[]={2}"
127
+
128
+ DATE_FORMAT = "%Y-%m-%d"
129
+
130
+ FREQUENCY_VALUES = {
131
+ Frequency.HOURLY: "Horaire",
132
+ Frequency.DAILY: "Journalier",
133
+ Frequency.WEEKLY: "Hebdomadaire",
134
+ Frequency.MONTHLY: "Mensuel",
135
+ Frequency.YEARLY: "Journalier",
136
+ }
137
+
138
+ DATA_FILENAME = "Donnees_informatives_*.xlsx"
139
+
140
+ # ------------------------------------------------------
141
+ def __init__(self, username: str, password: str, tmpDirectory: str):
142
+
143
+ super().__init__(username, password)
144
+
145
+ self.__tmpDirectory = tmpDirectory
146
+
147
+ # ------------------------------------------------------
148
+ def _loadFromSession( # pylint: disable=too-many-branches
149
+ self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None
150
+ ) -> MeterReadingsByFrequency: # pylint: disable=too-many-branches
151
+
152
+ res = {}
153
+
154
+ # XLSX is in the TMP directory
155
+ data_file_path_pattern = self.__tmpDirectory + "/" + ExcelWebDataSource.DATA_FILENAME
156
+
157
+ # We remove an eventual existing data file (from a previous run that has not deleted it).
158
+ file_list = glob.glob(data_file_path_pattern)
159
+ for filename in file_list:
160
+ if os.path.isfile(filename):
161
+ try:
162
+ os.remove(filename)
163
+ except PermissionError:
164
+ pass
165
+
166
+ if frequencies is None:
167
+ # Transform Enum in List.
168
+ frequencyList = list(Frequency)
169
+ else:
170
+ # Get distinct values.
171
+ frequencyList = list(set(frequencies))
172
+
173
+ for frequency in frequencyList:
174
+ # Inject parameters.
175
+ downloadUrl = ExcelWebDataSource.DATA_URL.format(
176
+ startDate.strftime(ExcelWebDataSource.DATE_FORMAT),
177
+ endDate.strftime(ExcelWebDataSource.DATE_FORMAT),
178
+ pceIdentifier,
179
+ ExcelWebDataSource.FREQUENCY_VALUES[frequency],
180
+ )
181
+
182
+ Logger.debug(
183
+ f"Loading data of frequency {ExcelWebDataSource.FREQUENCY_VALUES[frequency]} from {startDate.strftime(ExcelWebDataSource.DATE_FORMAT)} to {endDate.strftime(ExcelWebDataSource.DATE_FORMAT)}"
184
+ )
185
+
186
+ # Retry mechanism.
187
+ retry = 10
188
+ while retry > 0:
189
+
190
+ try:
191
+ self.__downloadFile(self._session, downloadUrl, self.__tmpDirectory)
192
+ break
193
+ except Exception as e: # pylint: disable=broad-exception-caught
194
+
195
+ if retry == 1:
196
+ raise e
197
+
198
+ Logger.error("An error occurred while loading data. Retry in 3 seconds.")
199
+ time.sleep(3)
200
+ retry -= 1
201
+
202
+ # Load the XLSX file into the data structure
203
+ file_list = glob.glob(data_file_path_pattern)
204
+
205
+ if len(file_list) == 0:
206
+ Logger.warning(f"Not any data file has been found in '{self.__tmpDirectory}' directory")
207
+
208
+ for filename in file_list:
209
+ res[frequency.value] = ExcelParser.parse(
210
+ filename, frequency if frequency != Frequency.YEARLY else Frequency.DAILY
211
+ )
212
+ try:
213
+ # openpyxl does not close the file properly.
214
+ os.remove(filename)
215
+ except PermissionError:
216
+ pass
217
+
218
+ # We compute yearly from daily data.
219
+ if frequency == Frequency.YEARLY:
220
+ res[frequency.value] = FrequencyConverter.computeYearly(res[frequency.value])
221
+
222
+ return res
223
+
224
+ # ------------------------------------------------------
225
+ def __downloadFile(self, session: Session, url: str, path: str):
226
+
227
+ response = session.get(url)
228
+
229
+ if "text/html" in response.headers.get("Content-Type"): # type: ignore
230
+ raise ValueError("An error occurred while loading data. Please check your credentials.")
231
+
232
+ if response.status_code != 200:
233
+ raise ValueError(
234
+ f"An error occurred while loading data. Status code: {response.status_code} - {response.text}"
235
+ )
236
+
237
+ response.raise_for_status()
238
+
239
+ filename = response.headers["Content-Disposition"].split("filename=")[1]
240
+
241
+ with open(f"{path}/{filename}", "wb") as file:
242
+ file.write(response.content)
243
+
244
+
245
+ # ------------------------------------------------------------------------------------------------------------
246
+ class ExcelFileDataSource(IDataSource): # pylint: disable=too-few-public-methods
247
+
248
+ def __init__(self, excelFile: str):
249
+
250
+ self.__excelFile = excelFile
251
+
252
+ def load(
253
+ self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None
254
+ ) -> MeterReadingsByFrequency:
255
+
256
+ res = {}
257
+
258
+ if frequencies is None:
259
+ # Transform Enum in List.
260
+ frequencyList = list(Frequency)
261
+ else:
262
+ # Get unique values.
263
+ frequencyList = list(set(frequencies))
264
+
265
+ for frequency in frequencyList:
266
+ if frequency != Frequency.YEARLY:
267
+ res[frequency.value] = ExcelParser.parse(self.__excelFile, frequency)
268
+ else:
269
+ daily = ExcelParser.parse(self.__excelFile, Frequency.DAILY)
270
+ res[frequency.value] = FrequencyConverter.computeYearly(daily)
271
+
272
+ return res
273
+
274
+
275
+ # ------------------------------------------------------------------------------------------------------------
276
+ class JsonWebDataSource(WebDataSource): # pylint: disable=too-few-public-methods
277
+
278
+ DATA_URL = (
279
+ "https://monespace.grdf.fr/api/e-conso/pce/consommation/informatives?dateDebut={0}&dateFin={1}&pceList[]={2}"
280
+ )
281
+
282
+ TEMPERATURES_URL = "https://monespace.grdf.fr/api/e-conso/pce/{0}/meteo?dateFinPeriode={1}&nbJours={2}"
283
+
284
+ INPUT_DATE_FORMAT = "%Y-%m-%d"
285
+
286
+ OUTPUT_DATE_FORMAT = "%d/%m/%Y"
287
+
288
+ def _loadFromSession(
289
+ self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None
290
+ ) -> MeterReadingsByFrequency:
291
+
292
+ res = {}
293
+
294
+ computeByFrequency = {
295
+ Frequency.HOURLY: FrequencyConverter.computeHourly,
296
+ Frequency.DAILY: FrequencyConverter.computeDaily,
297
+ Frequency.WEEKLY: FrequencyConverter.computeWeekly,
298
+ Frequency.MONTHLY: FrequencyConverter.computeMonthly,
299
+ Frequency.YEARLY: FrequencyConverter.computeYearly,
300
+ }
301
+
302
+ # Data URL: Inject parameters.
303
+ downloadUrl = JsonWebDataSource.DATA_URL.format(
304
+ startDate.strftime(JsonWebDataSource.INPUT_DATE_FORMAT),
305
+ endDate.strftime(JsonWebDataSource.INPUT_DATE_FORMAT),
306
+ pceIdentifier,
307
+ )
308
+
309
+ # Retry mechanism.
310
+ retry = 10
311
+ while retry > 0:
312
+
313
+ try:
314
+ response = self._session.get(downloadUrl)
315
+
316
+ if "text/html" in response.headers.get("Content-Type"): # type: ignore
317
+ raise ValueError("An error occurred while loading data. Please check your credentials.")
318
+
319
+ if response.status_code != 200:
320
+ raise ValueError(
321
+ f"An error occurred while loading data. Status code: {response.status_code} - {response.text}"
322
+ )
323
+
324
+ break
325
+ except Exception as e: # pylint: disable=broad-exception-caught
326
+
327
+ if retry == 1:
328
+ raise e
329
+
330
+ Logger.error("An error occurred while loading data. Retry in 3 seconds.")
331
+ time.sleep(3)
332
+ retry -= 1
333
+
334
+ data = response.text
335
+
336
+ Logger.debug("Json meter data: %s", data)
337
+
338
+ # Temperatures URL: Inject parameters.
339
+ endDate = date.today() - timedelta(days=1) if endDate >= date.today() else endDate
340
+ days = min((endDate - startDate).days, 730)
341
+ temperaturesUrl = JsonWebDataSource.TEMPERATURES_URL.format(
342
+ pceIdentifier, endDate.strftime(JsonWebDataSource.INPUT_DATE_FORMAT), days
343
+ )
344
+
345
+ # Get weather data.
346
+ temperatures = self._session.get(temperaturesUrl).text
347
+
348
+ Logger.debug("Json temperature data: %s", temperatures)
349
+
350
+ # Transform all the data into the target structure.
351
+ daily = JsonParser.parse(data, temperatures, pceIdentifier)
352
+
353
+ Logger.debug("Processed daily data: %s", daily)
354
+
355
+ if frequencies is None:
356
+ # Transform Enum in List.
357
+ frequencyList = list(Frequency)
358
+ else:
359
+ # Get unique values.
360
+ frequencyList = list(set(frequencies))
361
+
362
+ for frequency in frequencyList:
363
+ res[frequency.value] = computeByFrequency[frequency](daily)
364
+
365
+ return res
366
+
367
+
368
+ # ------------------------------------------------------------------------------------------------------------
369
+ class JsonFileDataSource(IDataSource): # pylint: disable=too-few-public-methods
370
+
371
+ def __init__(self, consumptionJsonFile: str, temperatureJsonFile):
372
+
373
+ self.__consumptionJsonFile = consumptionJsonFile
374
+ self.__temperatureJsonFile = temperatureJsonFile
375
+
376
+ def load(
377
+ self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None
378
+ ) -> MeterReadingsByFrequency:
379
+
380
+ res = {}
381
+
382
+ with open(self.__consumptionJsonFile, mode="r", encoding="utf-8") as consumptionJsonFile:
383
+ with open(self.__temperatureJsonFile, mode="r", encoding="utf-8") as temperatureJsonFile:
384
+ daily = JsonParser.parse(consumptionJsonFile.read(), temperatureJsonFile.read(), pceIdentifier)
385
+
386
+ computeByFrequency = {
387
+ Frequency.HOURLY: FrequencyConverter.computeHourly,
388
+ Frequency.DAILY: FrequencyConverter.computeDaily,
389
+ Frequency.WEEKLY: FrequencyConverter.computeWeekly,
390
+ Frequency.MONTHLY: FrequencyConverter.computeMonthly,
391
+ Frequency.YEARLY: FrequencyConverter.computeYearly,
392
+ }
393
+
394
+ if frequencies is None:
395
+ # Transform Enum in List.
396
+ frequencyList = list(Frequency)
397
+ else:
398
+ # Get unique values.
399
+ frequencyList = list(set(frequencies))
400
+
401
+ for frequency in frequencyList:
402
+ res[frequency.value] = computeByFrequency[frequency](daily)
403
+
404
+ return res
405
+
406
+
407
+ # ------------------------------------------------------------------------------------------------------------
408
+ class TestDataSource(IDataSource): # pylint: disable=too-few-public-methods
409
+
410
+ __test__ = False # Will not be discovered as a test
411
+
412
+ def __init__(self):
413
+
414
+ pass
415
+
416
+ def load(
417
+ self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None
418
+ ) -> MeterReadingsByFrequency:
419
+
420
+ res = dict[str, Any]()
421
+
422
+ dataSampleFilenameByFrequency = {
423
+ Frequency.HOURLY: "hourly_data_sample.json",
424
+ Frequency.DAILY: "daily_data_sample.json",
425
+ Frequency.WEEKLY: "weekly_data_sample.json",
426
+ Frequency.MONTHLY: "monthly_data_sample.json",
427
+ Frequency.YEARLY: "yearly_data_sample.json",
428
+ }
429
+
430
+ if frequencies is None:
431
+ # Transform Enum in List.
432
+ frequencyList = list(Frequency)
433
+ else:
434
+ # Get unique values.
435
+ frequencyList = list(set(frequencies))
436
+
437
+ for frequency in frequencyList:
438
+ dataSampleFilename = (
439
+ f"{os.path.dirname(os.path.abspath(__file__))}/resources/{dataSampleFilenameByFrequency[frequency]}"
440
+ )
441
+
442
+ with open(dataSampleFilename, mode="r", encoding="utf-8") as jsonFile:
443
+ res[frequency.value] = cast(List[Dict[PropertyName, Any]], json.load(jsonFile))
444
+
445
+ return res
446
+
447
+
448
+ # ------------------------------------------------------------------------------------------------------------
449
+ class FrequencyConverter:
450
+
451
+ MONTHS = [
452
+ "Janvier",
453
+ "Février",
454
+ "Mars",
455
+ "Avril",
456
+ "Mai",
457
+ "Juin",
458
+ "Juillet",
459
+ "Août",
460
+ "Septembre",
461
+ "Octobre",
462
+ "Novembre",
463
+ "Décembre",
464
+ ]
465
+
466
+ # ------------------------------------------------------
467
+ @staticmethod
468
+ def computeHourly(daily: List[Dict[str, Any]]) -> List[Dict[str, Any]]: # pylint: disable=unused-argument
469
+
470
+ return []
471
+
472
+ # ------------------------------------------------------
473
+ @staticmethod
474
+ def computeDaily(daily: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
475
+
476
+ return daily
477
+
478
+ # ------------------------------------------------------
479
+ @staticmethod
480
+ def computeWeekly(daily: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
481
+
482
+ df = pd.DataFrame(daily)
483
+
484
+ # Trimming head and trailing spaces and convert to datetime.
485
+ df["date_time"] = pd.to_datetime(df["time_period"].str.strip(), format=JsonWebDataSource.OUTPUT_DATE_FORMAT)
486
+
487
+ # Get the first day of week.
488
+ df["first_day_of_week"] = pd.to_datetime(df["date_time"].dt.strftime("%W %Y 1"), format="%W %Y %w")
489
+
490
+ # Get the last day of week.
491
+ df["last_day_of_week"] = pd.to_datetime(df["date_time"].dt.strftime("%W %Y 0"), format="%W %Y %w")
492
+
493
+ # Reformat the time period.
494
+ df["time_period"] = (
495
+ "Du "
496
+ + df["first_day_of_week"].dt.strftime(JsonWebDataSource.OUTPUT_DATE_FORMAT).astype(str)
497
+ + " au "
498
+ + df["last_day_of_week"].dt.strftime(JsonWebDataSource.OUTPUT_DATE_FORMAT).astype(str)
499
+ )
500
+
501
+ # Aggregate rows by month_year.
502
+ df = (
503
+ df[
504
+ [
505
+ "first_day_of_week",
506
+ "time_period",
507
+ "start_index_m3",
508
+ "end_index_m3",
509
+ "volume_m3",
510
+ "energy_kwh",
511
+ "timestamp",
512
+ ]
513
+ ]
514
+ .groupby("time_period")
515
+ .agg(
516
+ first_day_of_week=("first_day_of_week", "min"),
517
+ start_index_m3=("start_index_m3", "min"),
518
+ end_index_m3=("end_index_m3", "max"),
519
+ volume_m3=("volume_m3", "sum"),
520
+ energy_kwh=("energy_kwh", "sum"),
521
+ timestamp=("timestamp", "min"),
522
+ count=("energy_kwh", "count"),
523
+ )
524
+ .reset_index()
525
+ )
526
+
527
+ # Sort rows by month ascending.
528
+ df = df.sort_values(by=["first_day_of_week"])
529
+
530
+ # Select rows where we have a full week (7 days) except for the current week.
531
+ df = pd.concat([df[(df["count"] >= 7)], df.tail(1)[df.tail(1)["count"] < 7]])
532
+
533
+ # Select target columns.
534
+ df = df[["time_period", "start_index_m3", "end_index_m3", "volume_m3", "energy_kwh", "timestamp"]]
535
+
536
+ res = cast(List[Dict[str, Any]], df.to_dict("records"))
537
+
538
+ return res
539
+
540
+ # ------------------------------------------------------
541
+ @staticmethod
542
+ def computeMonthly(daily: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
543
+
544
+ df = pd.DataFrame(daily)
545
+
546
+ # Trimming head and trailing spaces and convert to datetime.
547
+ df["date_time"] = pd.to_datetime(df["time_period"].str.strip(), format=JsonWebDataSource.OUTPUT_DATE_FORMAT)
548
+
549
+ # Get the corresponding month-year.
550
+ df["month_year"] = (
551
+ df["date_time"].apply(lambda x: FrequencyConverter.MONTHS[x.month - 1]).astype(str)
552
+ + " "
553
+ + df["date_time"].dt.strftime("%Y").astype(str)
554
+ )
555
+
556
+ # Aggregate rows by month_year.
557
+ df = (
558
+ df[["date_time", "month_year", "start_index_m3", "end_index_m3", "volume_m3", "energy_kwh", "timestamp"]]
559
+ .groupby("month_year")
560
+ .agg(
561
+ first_day_of_month=("date_time", "min"),
562
+ start_index_m3=("start_index_m3", "min"),
563
+ end_index_m3=("end_index_m3", "max"),
564
+ volume_m3=("volume_m3", "sum"),
565
+ energy_kwh=("energy_kwh", "sum"),
566
+ timestamp=("timestamp", "min"),
567
+ count=("energy_kwh", "count"),
568
+ )
569
+ .reset_index()
570
+ )
571
+
572
+ # Sort rows by month ascending.
573
+ df = df.sort_values(by=["first_day_of_month"])
574
+
575
+ # Select rows where we have a full month (more than 27 days) except for the current month.
576
+ df = pd.concat([df[(df["count"] >= 28)], df.tail(1)[df.tail(1)["count"] < 28]])
577
+
578
+ # Rename columns for their target names.
579
+ df = df.rename(columns={"month_year": "time_period"})
580
+
581
+ # Select target columns.
582
+ df = df[["time_period", "start_index_m3", "end_index_m3", "volume_m3", "energy_kwh", "timestamp"]]
583
+
584
+ res = cast(List[Dict[str, Any]], df.to_dict("records"))
585
+
586
+ return res
587
+
588
+ # ------------------------------------------------------
589
+ @staticmethod
590
+ def computeYearly(daily: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
591
+
592
+ df = pd.DataFrame(daily)
593
+
594
+ # Trimming head and trailing spaces and convert to datetime.
595
+ df["date_time"] = pd.to_datetime(df["time_period"].str.strip(), format=JsonWebDataSource.OUTPUT_DATE_FORMAT)
596
+
597
+ # Get the corresponding year.
598
+ df["year"] = df["date_time"].dt.strftime("%Y")
599
+
600
+ # Aggregate rows by month_year.
601
+ df = (
602
+ df[["year", "start_index_m3", "end_index_m3", "volume_m3", "energy_kwh", "timestamp"]]
603
+ .groupby("year")
604
+ .agg(
605
+ start_index_m3=("start_index_m3", "min"),
606
+ end_index_m3=("end_index_m3", "max"),
607
+ volume_m3=("volume_m3", "sum"),
608
+ energy_kwh=("energy_kwh", "sum"),
609
+ timestamp=("timestamp", "min"),
610
+ count=("energy_kwh", "count"),
611
+ )
612
+ .reset_index()
613
+ )
614
+
615
+ # Sort rows by month ascending.
616
+ df = df.sort_values(by=["year"])
617
+
618
+ # Select rows where we have almost a full year (more than 360) except for the current year.
619
+ df = pd.concat([df[(df["count"] >= 360)], df.tail(1)[df.tail(1)["count"] < 360]])
620
+
621
+ # Rename columns for their target names.
622
+ df = df.rename(columns={"year": "time_period"})
623
+
624
+ # Select target columns.
625
+ df = df[["time_period", "start_index_m3", "end_index_m3", "volume_m3", "energy_kwh", "timestamp"]]
626
+
627
+ res = cast(List[Dict[str, Any]], df.to_dict("records"))
628
+
629
+ return res
pygazpar/enum.py CHANGED
@@ -1,12 +1,35 @@
1
1
  from enum import Enum
2
2
 
3
- class PropertyNameEnum(Enum):
4
- DATE = "date"
5
- START_INDEX_M3 = "start_index_m3"
6
- END_INDEX_M3 = "end_index_m3"
7
- VOLUME_M3 = "volume_m3"
8
- ENERGY_KWH = "energy_kwh"
9
- CONVERTER_FACTOR = "converter_factor"
10
- LOCAL_TEMPERATURE = "local_temperature"
3
+
4
+ # ------------------------------------------------------------------------------------------------------------
5
+ class PropertyName(Enum):
6
+ TIME_PERIOD = "time_period"
7
+ START_INDEX = "start_index_m3"
8
+ END_INDEX = "end_index_m3"
9
+ VOLUME = "volume_m3"
10
+ ENERGY = "energy_kwh"
11
+ CONVERTER_FACTOR = "converter_factor_kwh/m3"
12
+ TEMPERATURE = "temperature_degC"
11
13
  TYPE = "type"
12
14
  TIMESTAMP = "timestamp"
15
+
16
+ def __str__(self):
17
+ return self.value
18
+
19
+ def __repr__(self):
20
+ return self.__str__()
21
+
22
+
23
+ # ------------------------------------------------------------------------------------------------------------
24
+ class Frequency(Enum):
25
+ HOURLY = "hourly"
26
+ DAILY = "daily"
27
+ WEEKLY = "weekly"
28
+ MONTHLY = "monthly"
29
+ YEARLY = "yearly"
30
+
31
+ def __str__(self):
32
+ return self.value
33
+
34
+ def __repr__(self):
35
+ return self.__str__()