pygazpar 1.2.7__py312-none-any.whl → 1.2.8__py312-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pygazpar/datasource.py CHANGED
@@ -1,522 +1,528 @@
1
- import logging
2
- import glob
3
- import os
4
- import json
5
- import time
6
- import pandas as pd
7
- import http.cookiejar
8
- from abc import ABC, abstractmethod
9
- from typing import Any, List, Dict, cast, Optional
10
- from requests import Session
11
- from datetime import date, timedelta
12
- from pygazpar.enum import Frequency, PropertyName
13
- from pygazpar.excelparser import ExcelParser
14
- from pygazpar.jsonparser import JsonParser
15
-
16
- SESSION_TOKEN_URL = "https://connexion.grdf.fr/api/v1/authn"
17
- SESSION_TOKEN_PAYLOAD = """{{
18
- "username": "{0}",
19
- "password": "{1}",
20
- "options": {{
21
- "multiOptionalFactorEnroll": "false",
22
- "warnBeforePasswordExpired": "false"
23
- }}
24
- }}"""
25
-
26
- AUTH_TOKEN_URL = "https://connexion.grdf.fr/login/sessionCookieRedirect"
27
- AUTH_TOKEN_PARAMS = """{{
28
- "checkAccountSetupComplete": "true",
29
- "token": "{0}",
30
- "redirectUrl": "https://monespace.grdf.fr"
31
- }}"""
32
-
33
- Logger = logging.getLogger(__name__)
34
-
35
- MeterReading = Dict[str, Any]
36
-
37
- MeterReadings = List[MeterReading]
38
-
39
- MeterReadingsByFrequency = Dict[str, MeterReadings]
40
-
41
-
42
- # ------------------------------------------------------------------------------------------------------------
43
- class IDataSource(ABC):
44
-
45
- @abstractmethod
46
- def load(self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
47
- pass
48
-
49
-
50
- # ------------------------------------------------------------------------------------------------------------
51
- class WebDataSource(IDataSource):
52
-
53
- # ------------------------------------------------------
54
- def __init__(self, username: str, password: str):
55
-
56
- self.__username = username
57
- self.__password = password
58
-
59
- # ------------------------------------------------------
60
- def load(self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
61
-
62
- self._login(self.__username, self.__password) # We ignore the return value.
63
-
64
- res = self._loadFromSession(pceIdentifier, startDate, endDate, frequencies)
65
-
66
- Logger.debug("The data update terminates normally")
67
-
68
- return res
69
-
70
- # ------------------------------------------------------
71
- def _login(self, username: str, password: str) -> str:
72
-
73
- session = Session()
74
- session.headers.update({"domain": "grdf.fr"})
75
- session.headers.update({"Content-Type": "application/json"})
76
- session.headers.update({"X-Requested-With": "XMLHttpRequest"})
77
-
78
- payload = SESSION_TOKEN_PAYLOAD.format(username, password)
79
-
80
- response = session.post(SESSION_TOKEN_URL, data=payload)
81
-
82
- if response.status_code != 200:
83
- raise Exception(f"An error occurred while logging in. Status code: {response.status_code} - {response.text}")
84
-
85
- session_token = response.json().get("sessionToken")
86
-
87
- Logger.debug("Session token: %s", session_token)
88
-
89
- jar = http.cookiejar.CookieJar()
90
-
91
- self._session = Session()
92
- self._session.headers.update({"Content-Type": "application/json"})
93
- self._session.headers.update({"X-Requested-With": "XMLHttpRequest"})
94
-
95
- params = json.loads(AUTH_TOKEN_PARAMS.format(session_token))
96
-
97
- response = self._session.get(AUTH_TOKEN_URL, params=params, allow_redirects=True, cookies=jar) # type: ignore
98
-
99
- if response.status_code != 200:
100
- raise Exception(f"An error occurred while getting the auth token. Status code: {response.status_code} - {response.text}")
101
-
102
- auth_token = self._session.cookies.get("auth_token", domain="monespace.grdf.fr")
103
-
104
- return auth_token # type: ignore
105
-
106
- @abstractmethod
107
- def _loadFromSession(self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
108
- pass
109
-
110
-
111
- # ------------------------------------------------------------------------------------------------------------
112
- class ExcelWebDataSource(WebDataSource):
113
-
114
- DATA_URL = "https://monespace.grdf.fr/api/e-conso/pce/consommation/informatives/telecharger?dateDebut={0}&dateFin={1}&frequence={3}&pceList[]={2}"
115
-
116
- DATE_FORMAT = "%Y-%m-%d"
117
-
118
- FREQUENCY_VALUES = {
119
- Frequency.HOURLY: "Horaire",
120
- Frequency.DAILY: "Journalier",
121
- Frequency.WEEKLY: "Hebdomadaire",
122
- Frequency.MONTHLY: "Mensuel",
123
- Frequency.YEARLY: "Journalier"
124
- }
125
-
126
- DATA_FILENAME = 'Donnees_informatives_*.xlsx'
127
-
128
- # ------------------------------------------------------
129
- def __init__(self, username: str, password: str, tmpDirectory: str):
130
-
131
- super().__init__(username, password)
132
-
133
- self.__tmpDirectory = tmpDirectory
134
-
135
- # ------------------------------------------------------
136
- def _loadFromSession(self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
137
-
138
- res = {}
139
-
140
- # XLSX is in the TMP directory
141
- data_file_path_pattern = self.__tmpDirectory + '/' + ExcelWebDataSource.DATA_FILENAME
142
-
143
- # We remove an eventual existing data file (from a previous run that has not deleted it).
144
- file_list = glob.glob(data_file_path_pattern)
145
- for filename in file_list:
146
- if os.path.isfile(filename):
147
- try:
148
- os.remove(filename)
149
- except PermissionError:
150
- pass
151
-
152
- if frequencies is None:
153
- # Transform Enum in List.
154
- frequencyList = [frequency for frequency in Frequency]
155
- else:
156
- # Get unique values.
157
- frequencyList = set(frequencies)
158
-
159
- for frequency in frequencyList:
160
- # Inject parameters.
161
- downloadUrl = ExcelWebDataSource.DATA_URL.format(startDate.strftime(ExcelWebDataSource.DATE_FORMAT), endDate.strftime(ExcelWebDataSource.DATE_FORMAT), pceIdentifier, ExcelWebDataSource.FREQUENCY_VALUES[frequency])
162
-
163
- Logger.debug(f"Loading data of frequency {ExcelWebDataSource.FREQUENCY_VALUES[frequency]} from {startDate.strftime(ExcelWebDataSource.DATE_FORMAT)} to {endDate.strftime(ExcelWebDataSource.DATE_FORMAT)}")
164
-
165
- # Retry mechanism.
166
- retry = 10
167
- while retry > 0:
168
-
169
- try:
170
- self.__downloadFile(self._session, downloadUrl, self.__tmpDirectory)
171
- break
172
- except Exception as e:
173
-
174
- if retry == 1:
175
- raise e
176
-
177
- Logger.error("An error occurred while loading data. Retry in 3 seconds.")
178
- time.sleep(3)
179
- retry -= 1
180
-
181
- # Load the XLSX file into the data structure
182
- file_list = glob.glob(data_file_path_pattern)
183
-
184
- if len(file_list) == 0:
185
- Logger.warning(f"Not any data file has been found in '{self.__tmpDirectory}' directory")
186
-
187
- for filename in file_list:
188
- res[frequency.value] = ExcelParser.parse(filename, frequency if frequency != Frequency.YEARLY else Frequency.DAILY)
189
- try:
190
- # openpyxl does not close the file properly.
191
- os.remove(filename)
192
- except PermissionError:
193
- pass
194
-
195
- # We compute yearly from daily data.
196
- if frequency == Frequency.YEARLY:
197
- res[frequency.value] = FrequencyConverter.computeYearly(res[frequency.value])
198
-
199
- return res
200
-
201
- # ------------------------------------------------------
202
- def __downloadFile(self, session: Session, url: str, path: str):
203
-
204
- response = session.get(url)
205
-
206
- if "text/html" in response.headers.get("Content-Type"): # type: ignore
207
- raise Exception("An error occurred while loading data. Please check your credentials.")
208
-
209
- if response.status_code != 200:
210
- raise Exception(f"An error occurred while loading data. Status code: {response.status_code} - {response.text}")
211
-
212
- response.raise_for_status()
213
-
214
- filename = response.headers["Content-Disposition"].split("filename=")[1]
215
-
216
- open(f"{path}/{filename}", "wb").write(response.content)
217
-
218
-
219
- # ------------------------------------------------------------------------------------------------------------
220
- class ExcelFileDataSource(IDataSource):
221
-
222
- def __init__(self, excelFile: str):
223
-
224
- self.__excelFile = excelFile
225
-
226
- def load(self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
227
-
228
- res = {}
229
-
230
- if frequencies is None:
231
- # Transform Enum in List.
232
- frequencyList = [frequency for frequency in Frequency]
233
- else:
234
- # Get unique values.
235
- frequencyList = set(frequencies)
236
-
237
- for frequency in frequencyList:
238
- if frequency != Frequency.YEARLY:
239
- res[frequency.value] = ExcelParser.parse(self.__excelFile, frequency)
240
- else:
241
- daily = ExcelParser.parse(self.__excelFile, Frequency.DAILY)
242
- res[frequency.value] = FrequencyConverter.computeYearly(daily)
243
-
244
- return res
245
-
246
-
247
- # ------------------------------------------------------------------------------------------------------------
248
- class JsonWebDataSource(WebDataSource):
249
-
250
- DATA_URL = "https://monespace.grdf.fr/api/e-conso/pce/consommation/informatives?dateDebut={0}&dateFin={1}&pceList[]={2}"
251
-
252
- TEMPERATURES_URL = "https://monespace.grdf.fr/api/e-conso/pce/{0}/meteo?dateFinPeriode={1}&nbJours={2}"
253
-
254
- INPUT_DATE_FORMAT = "%Y-%m-%d"
255
-
256
- OUTPUT_DATE_FORMAT = "%d/%m/%Y"
257
-
258
- def __init__(self, username: str, password: str):
259
-
260
- super().__init__(username, password)
261
-
262
- def _loadFromSession(self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
263
-
264
- res = {}
265
-
266
- computeByFrequency = {
267
- Frequency.HOURLY: FrequencyConverter.computeHourly,
268
- Frequency.DAILY: FrequencyConverter.computeDaily,
269
- Frequency.WEEKLY: FrequencyConverter.computeWeekly,
270
- Frequency.MONTHLY: FrequencyConverter.computeMonthly,
271
- Frequency.YEARLY: FrequencyConverter.computeYearly
272
- }
273
-
274
- # Data URL: Inject parameters.
275
- downloadUrl = JsonWebDataSource.DATA_URL.format(startDate.strftime(JsonWebDataSource.INPUT_DATE_FORMAT), endDate.strftime(JsonWebDataSource.INPUT_DATE_FORMAT), pceIdentifier)
276
-
277
- # Retry mechanism.
278
- retry = 10
279
- while retry > 0:
280
-
281
- try:
282
- response = self._session.get(downloadUrl)
283
-
284
- if "text/html" in response.headers.get("Content-Type"): # type: ignore
285
- raise Exception("An error occurred while loading data. Please check your credentials.")
286
-
287
- if response.status_code != 200:
288
- raise Exception(f"An error occurred while loading data. Status code: {response.status_code} - {response.text}")
289
-
290
- break
291
- except Exception as e:
292
-
293
- if retry == 1:
294
- raise e
295
-
296
- Logger.error("An error occurred while loading data. Retry in 3 seconds.")
297
- time.sleep(3)
298
- retry -= 1
299
-
300
- data = response.text
301
-
302
- # Temperatures URL: Inject parameters.
303
- endDate = date.today() - timedelta(days=1) if endDate >= date.today() else endDate
304
- days = min((endDate - startDate).days, 730)
305
- temperaturesUrl = JsonWebDataSource.TEMPERATURES_URL.format(pceIdentifier, endDate.strftime(JsonWebDataSource.INPUT_DATE_FORMAT), days)
306
-
307
- # Get weather data.
308
- temperatures = self._session.get(temperaturesUrl).text
309
-
310
- # Transform all the data into the target structure.
311
- daily = JsonParser.parse(data, temperatures, pceIdentifier)
312
-
313
- if frequencies is None:
314
- # Transform Enum in List.
315
- frequencyList = [frequency for frequency in Frequency]
316
- else:
317
- # Get unique values.
318
- frequencyList = set(frequencies)
319
-
320
- for frequency in frequencyList:
321
- res[frequency.value] = computeByFrequency[frequency](daily)
322
-
323
- return res
324
-
325
-
326
- # ------------------------------------------------------------------------------------------------------------
327
- class JsonFileDataSource(IDataSource):
328
-
329
- def __init__(self, consumptionJsonFile: str, temperatureJsonFile):
330
-
331
- self.__consumptionJsonFile = consumptionJsonFile
332
- self.__temperatureJsonFile = temperatureJsonFile
333
-
334
- def load(self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
335
-
336
- res = {}
337
-
338
- with open(self.__consumptionJsonFile) as consumptionJsonFile:
339
- with open(self.__temperatureJsonFile) as temperatureJsonFile:
340
- daily = JsonParser.parse(consumptionJsonFile.read(), temperatureJsonFile.read(), pceIdentifier)
341
-
342
- computeByFrequency = {
343
- Frequency.HOURLY: FrequencyConverter.computeHourly,
344
- Frequency.DAILY: FrequencyConverter.computeDaily,
345
- Frequency.WEEKLY: FrequencyConverter.computeWeekly,
346
- Frequency.MONTHLY: FrequencyConverter.computeMonthly,
347
- Frequency.YEARLY: FrequencyConverter.computeYearly
348
- }
349
-
350
- if frequencies is None:
351
- # Transform Enum in List.
352
- frequencyList = [frequency for frequency in Frequency]
353
- else:
354
- # Get unique values.
355
- frequencyList = set(frequencies)
356
-
357
- for frequency in frequencyList:
358
- res[frequency.value] = computeByFrequency[frequency](daily)
359
-
360
- return res
361
-
362
-
363
- # ------------------------------------------------------------------------------------------------------------
364
- class TestDataSource(IDataSource):
365
-
366
- def __init__(self):
367
-
368
- pass
369
-
370
- def load(self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
371
-
372
- res = {}
373
-
374
- dataSampleFilenameByFrequency = {
375
- Frequency.HOURLY: "hourly_data_sample.json",
376
- Frequency.DAILY: "daily_data_sample.json",
377
- Frequency.WEEKLY: "weekly_data_sample.json",
378
- Frequency.MONTHLY: "monthly_data_sample.json",
379
- Frequency.YEARLY: "yearly_data_sample.json"
380
- }
381
-
382
- if frequencies is None:
383
- # Transform Enum in List.
384
- frequencyList = [frequency for frequency in Frequency]
385
- else:
386
- # Get unique values.
387
- frequencyList = set(frequencies)
388
-
389
- for frequency in frequencyList:
390
- dataSampleFilename = f"{os.path.dirname(os.path.abspath(__file__))}/resources/{dataSampleFilenameByFrequency[frequency]}"
391
-
392
- with open(dataSampleFilename) as jsonFile:
393
- res[frequency.value] = cast(List[Dict[PropertyName, Any]], json.load(jsonFile))
394
-
395
- return res
396
-
397
-
398
- # ------------------------------------------------------------------------------------------------------------
399
- class FrequencyConverter:
400
-
401
- MONTHS = [
402
- "Janvier",
403
- "Février",
404
- "Mars",
405
- "Avril",
406
- "Mai",
407
- "Juin",
408
- "Juillet",
409
- "Août",
410
- "Septembre",
411
- "Octobre",
412
- "Novembre",
413
- "Décembre"
414
- ]
415
-
416
- # ------------------------------------------------------
417
- @staticmethod
418
- def computeHourly(daily: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
419
-
420
- return []
421
-
422
- # ------------------------------------------------------
423
- @staticmethod
424
- def computeDaily(daily: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
425
-
426
- return daily
427
-
428
- # ------------------------------------------------------
429
- @staticmethod
430
- def computeWeekly(daily: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
431
-
432
- df = pd.DataFrame(daily)
433
-
434
- # Trimming head and trailing spaces and convert to datetime.
435
- df["date_time"] = pd.to_datetime(df["time_period"].str.strip(), format=JsonWebDataSource.OUTPUT_DATE_FORMAT)
436
-
437
- # Get the first day of week.
438
- df["first_day_of_week"] = pd.to_datetime(df["date_time"].dt.strftime("%W %Y 1"), format="%W %Y %w")
439
-
440
- # Get the last day of week.
441
- df["last_day_of_week"] = pd.to_datetime(df["date_time"].dt.strftime("%W %Y 0"), format="%W %Y %w")
442
-
443
- # Reformat the time period.
444
- df["time_period"] = "Du " + df["first_day_of_week"].dt.strftime(JsonWebDataSource.OUTPUT_DATE_FORMAT).astype(str) + " au " + df["last_day_of_week"].dt.strftime(JsonWebDataSource.OUTPUT_DATE_FORMAT).astype(str)
445
-
446
- # Aggregate rows by month_year.
447
- df = df[["first_day_of_week", "time_period", "start_index_m3", "end_index_m3", "volume_m3", "energy_kwh", "timestamp"]].groupby("time_period").agg(first_day_of_week=('first_day_of_week', 'min'), start_index_m3=('start_index_m3', 'min'), end_index_m3=('end_index_m3', 'max'), volume_m3=('volume_m3', 'sum'), energy_kwh=('energy_kwh', 'sum'), timestamp=('timestamp', 'min'), count=('energy_kwh', 'count')).reset_index()
448
-
449
- # Sort rows by month ascending.
450
- df = df.sort_values(by=['first_day_of_week'])
451
-
452
- # Select rows where we have a full week (7 days) except for the current week.
453
- df = pd.concat([df[(df["count"] >= 7)], df.tail(1)[df.tail(1)["count"] < 7]])
454
-
455
- # Select target columns.
456
- df = df[["time_period", "start_index_m3", "end_index_m3", "volume_m3", "energy_kwh", "timestamp"]]
457
-
458
- res = cast(List[Dict[str, Any]], df.to_dict('records'))
459
-
460
- return res
461
-
462
- # ------------------------------------------------------
463
- @staticmethod
464
- def computeMonthly(daily: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
465
-
466
- df = pd.DataFrame(daily)
467
-
468
- # Trimming head and trailing spaces and convert to datetime.
469
- df["date_time"] = pd.to_datetime(df["time_period"].str.strip(), format=JsonWebDataSource.OUTPUT_DATE_FORMAT)
470
-
471
- # Get the corresponding month-year.
472
- df["month_year"] = df["date_time"].apply(lambda x: FrequencyConverter.MONTHS[x.month - 1]).astype(str) + " " + df["date_time"].dt.strftime("%Y").astype(str)
473
-
474
- # Aggregate rows by month_year.
475
- df = df[["date_time", "month_year", "start_index_m3", "end_index_m3", "volume_m3", "energy_kwh", "timestamp"]].groupby("month_year").agg(first_day_of_month=('date_time', 'min'), start_index_m3=('start_index_m3', 'min'), end_index_m3=('end_index_m3', 'max'), volume_m3=('volume_m3', 'sum'), energy_kwh=('energy_kwh', 'sum'), timestamp=('timestamp', 'min'), count=('energy_kwh', 'count')).reset_index()
476
-
477
- # Sort rows by month ascending.
478
- df = df.sort_values(by=['first_day_of_month'])
479
-
480
- # Select rows where we have a full month (more than 27 days) except for the current month.
481
- df = pd.concat([df[(df["count"] >= 28)], df.tail(1)[df.tail(1)["count"] < 28]])
482
-
483
- # Rename columns for their target names.
484
- df = df.rename(columns={"month_year": "time_period"})
485
-
486
- # Select target columns.
487
- df = df[["time_period", "start_index_m3", "end_index_m3", "volume_m3", "energy_kwh", "timestamp"]]
488
-
489
- res = cast(List[Dict[str, Any]], df.to_dict('records'))
490
-
491
- return res
492
-
493
- # ------------------------------------------------------
494
- @staticmethod
495
- def computeYearly(daily: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
496
-
497
- df = pd.DataFrame(daily)
498
-
499
- # Trimming head and trailing spaces and convert to datetime.
500
- df["date_time"] = pd.to_datetime(df["time_period"].str.strip(), format=JsonWebDataSource.OUTPUT_DATE_FORMAT)
501
-
502
- # Get the corresponding year.
503
- df["year"] = df["date_time"].dt.strftime("%Y")
504
-
505
- # Aggregate rows by month_year.
506
- df = df[["year", "start_index_m3", "end_index_m3", "volume_m3", "energy_kwh", "timestamp"]].groupby("year").agg(start_index_m3=('start_index_m3', 'min'), end_index_m3=('end_index_m3', 'max'), volume_m3=('volume_m3', 'sum'), energy_kwh=('energy_kwh', 'sum'), timestamp=('timestamp', 'min'), count=('energy_kwh', 'count')).reset_index()
507
-
508
- # Sort rows by month ascending.
509
- df = df.sort_values(by=['year'])
510
-
511
- # Select rows where we have almost a full year (more than 360) except for the current year.
512
- df = pd.concat([df[(df["count"] >= 360)], df.tail(1)[df.tail(1)["count"] < 360]])
513
-
514
- # Rename columns for their target names.
515
- df = df.rename(columns={"year": "time_period"})
516
-
517
- # Select target columns.
518
- df = df[["time_period", "start_index_m3", "end_index_m3", "volume_m3", "energy_kwh", "timestamp"]]
519
-
520
- res = cast(List[Dict[str, Any]], df.to_dict('records'))
521
-
522
- return res
1
+ import logging
2
+ import glob
3
+ import os
4
+ import json
5
+ import time
6
+ import pandas as pd
7
+ import http.cookiejar
8
+ from abc import ABC, abstractmethod
9
+ from typing import Any, List, Dict, cast, Optional
10
+ from requests import Session
11
+ from datetime import date, timedelta
12
+ from pygazpar.enum import Frequency, PropertyName
13
+ from pygazpar.excelparser import ExcelParser
14
+ from pygazpar.jsonparser import JsonParser
15
+
16
+ SESSION_TOKEN_URL = "https://connexion.grdf.fr/api/v1/authn"
17
+ SESSION_TOKEN_PAYLOAD = """{{
18
+ "username": "{0}",
19
+ "password": "{1}",
20
+ "options": {{
21
+ "multiOptionalFactorEnroll": "false",
22
+ "warnBeforePasswordExpired": "false"
23
+ }}
24
+ }}"""
25
+
26
+ AUTH_TOKEN_URL = "https://connexion.grdf.fr/login/sessionCookieRedirect"
27
+ AUTH_TOKEN_PARAMS = """{{
28
+ "checkAccountSetupComplete": "true",
29
+ "token": "{0}",
30
+ "redirectUrl": "https://monespace.grdf.fr"
31
+ }}"""
32
+
33
+ Logger = logging.getLogger(__name__)
34
+
35
+ MeterReading = Dict[str, Any]
36
+
37
+ MeterReadings = List[MeterReading]
38
+
39
+ MeterReadingsByFrequency = Dict[str, MeterReadings]
40
+
41
+
42
+ # ------------------------------------------------------------------------------------------------------------
43
+ class IDataSource(ABC):
44
+
45
+ @abstractmethod
46
+ def load(self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
47
+ pass
48
+
49
+
50
+ # ------------------------------------------------------------------------------------------------------------
51
+ class WebDataSource(IDataSource):
52
+
53
+ # ------------------------------------------------------
54
+ def __init__(self, username: str, password: str):
55
+
56
+ self.__username = username
57
+ self.__password = password
58
+
59
+ # ------------------------------------------------------
60
+ def load(self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
61
+
62
+ self._login(self.__username, self.__password) # We ignore the return value.
63
+
64
+ res = self._loadFromSession(pceIdentifier, startDate, endDate, frequencies)
65
+
66
+ Logger.debug("The data update terminates normally")
67
+
68
+ return res
69
+
70
+ # ------------------------------------------------------
71
+ def _login(self, username: str, password: str) -> str:
72
+
73
+ session = Session()
74
+ session.headers.update({"domain": "grdf.fr"})
75
+ session.headers.update({"Content-Type": "application/json"})
76
+ session.headers.update({"X-Requested-With": "XMLHttpRequest"})
77
+
78
+ payload = SESSION_TOKEN_PAYLOAD.format(username, password)
79
+
80
+ response = session.post(SESSION_TOKEN_URL, data=payload)
81
+
82
+ if response.status_code != 200:
83
+ raise Exception(f"An error occurred while logging in. Status code: {response.status_code} - {response.text}")
84
+
85
+ session_token = response.json().get("sessionToken")
86
+
87
+ Logger.debug("Session token: %s", session_token)
88
+
89
+ jar = http.cookiejar.CookieJar()
90
+
91
+ self._session = Session()
92
+ self._session.headers.update({"Content-Type": "application/json"})
93
+ self._session.headers.update({"X-Requested-With": "XMLHttpRequest"})
94
+
95
+ params = json.loads(AUTH_TOKEN_PARAMS.format(session_token))
96
+
97
+ response = self._session.get(AUTH_TOKEN_URL, params=params, allow_redirects=True, cookies=jar) # type: ignore
98
+
99
+ if response.status_code != 200:
100
+ raise Exception(f"An error occurred while getting the auth token. Status code: {response.status_code} - {response.text}")
101
+
102
+ auth_token = self._session.cookies.get("auth_token", domain="monespace.grdf.fr")
103
+
104
+ return auth_token # type: ignore
105
+
106
+ @abstractmethod
107
+ def _loadFromSession(self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
108
+ pass
109
+
110
+
111
+ # ------------------------------------------------------------------------------------------------------------
112
+ class ExcelWebDataSource(WebDataSource):
113
+
114
+ DATA_URL = "https://monespace.grdf.fr/api/e-conso/pce/consommation/informatives/telecharger?dateDebut={0}&dateFin={1}&frequence={3}&pceList[]={2}"
115
+
116
+ DATE_FORMAT = "%Y-%m-%d"
117
+
118
+ FREQUENCY_VALUES = {
119
+ Frequency.HOURLY: "Horaire",
120
+ Frequency.DAILY: "Journalier",
121
+ Frequency.WEEKLY: "Hebdomadaire",
122
+ Frequency.MONTHLY: "Mensuel",
123
+ Frequency.YEARLY: "Journalier"
124
+ }
125
+
126
+ DATA_FILENAME = 'Donnees_informatives_*.xlsx'
127
+
128
+ # ------------------------------------------------------
129
+ def __init__(self, username: str, password: str, tmpDirectory: str):
130
+
131
+ super().__init__(username, password)
132
+
133
+ self.__tmpDirectory = tmpDirectory
134
+
135
+ # ------------------------------------------------------
136
+ def _loadFromSession(self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
137
+
138
+ res = {}
139
+
140
+ # XLSX is in the TMP directory
141
+ data_file_path_pattern = self.__tmpDirectory + '/' + ExcelWebDataSource.DATA_FILENAME
142
+
143
+ # We remove an eventual existing data file (from a previous run that has not deleted it).
144
+ file_list = glob.glob(data_file_path_pattern)
145
+ for filename in file_list:
146
+ if os.path.isfile(filename):
147
+ try:
148
+ os.remove(filename)
149
+ except PermissionError:
150
+ pass
151
+
152
+ if frequencies is None:
153
+ # Transform Enum in List.
154
+ frequencyList = [frequency for frequency in Frequency]
155
+ else:
156
+ # Get unique values.
157
+ frequencyList = set(frequencies)
158
+
159
+ for frequency in frequencyList:
160
+ # Inject parameters.
161
+ downloadUrl = ExcelWebDataSource.DATA_URL.format(startDate.strftime(ExcelWebDataSource.DATE_FORMAT), endDate.strftime(ExcelWebDataSource.DATE_FORMAT), pceIdentifier, ExcelWebDataSource.FREQUENCY_VALUES[frequency])
162
+
163
+ Logger.debug(f"Loading data of frequency {ExcelWebDataSource.FREQUENCY_VALUES[frequency]} from {startDate.strftime(ExcelWebDataSource.DATE_FORMAT)} to {endDate.strftime(ExcelWebDataSource.DATE_FORMAT)}")
164
+
165
+ # Retry mechanism.
166
+ retry = 10
167
+ while retry > 0:
168
+
169
+ try:
170
+ self.__downloadFile(self._session, downloadUrl, self.__tmpDirectory)
171
+ break
172
+ except Exception as e:
173
+
174
+ if retry == 1:
175
+ raise e
176
+
177
+ Logger.error("An error occurred while loading data. Retry in 3 seconds.")
178
+ time.sleep(3)
179
+ retry -= 1
180
+
181
+ # Load the XLSX file into the data structure
182
+ file_list = glob.glob(data_file_path_pattern)
183
+
184
+ if len(file_list) == 0:
185
+ Logger.warning(f"Not any data file has been found in '{self.__tmpDirectory}' directory")
186
+
187
+ for filename in file_list:
188
+ res[frequency.value] = ExcelParser.parse(filename, frequency if frequency != Frequency.YEARLY else Frequency.DAILY)
189
+ try:
190
+ # openpyxl does not close the file properly.
191
+ os.remove(filename)
192
+ except PermissionError:
193
+ pass
194
+
195
+ # We compute yearly from daily data.
196
+ if frequency == Frequency.YEARLY:
197
+ res[frequency.value] = FrequencyConverter.computeYearly(res[frequency.value])
198
+
199
+ return res
200
+
201
+ # ------------------------------------------------------
202
+ def __downloadFile(self, session: Session, url: str, path: str):
203
+
204
+ response = session.get(url)
205
+
206
+ if "text/html" in response.headers.get("Content-Type"): # type: ignore
207
+ raise Exception("An error occurred while loading data. Please check your credentials.")
208
+
209
+ if response.status_code != 200:
210
+ raise Exception(f"An error occurred while loading data. Status code: {response.status_code} - {response.text}")
211
+
212
+ response.raise_for_status()
213
+
214
+ filename = response.headers["Content-Disposition"].split("filename=")[1]
215
+
216
+ open(f"{path}/{filename}", "wb").write(response.content)
217
+
218
+
219
+ # ------------------------------------------------------------------------------------------------------------
220
+ class ExcelFileDataSource(IDataSource):
221
+
222
+ def __init__(self, excelFile: str):
223
+
224
+ self.__excelFile = excelFile
225
+
226
+ def load(self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
227
+
228
+ res = {}
229
+
230
+ if frequencies is None:
231
+ # Transform Enum in List.
232
+ frequencyList = [frequency for frequency in Frequency]
233
+ else:
234
+ # Get unique values.
235
+ frequencyList = set(frequencies)
236
+
237
+ for frequency in frequencyList:
238
+ if frequency != Frequency.YEARLY:
239
+ res[frequency.value] = ExcelParser.parse(self.__excelFile, frequency)
240
+ else:
241
+ daily = ExcelParser.parse(self.__excelFile, Frequency.DAILY)
242
+ res[frequency.value] = FrequencyConverter.computeYearly(daily)
243
+
244
+ return res
245
+
246
+
247
+ # ------------------------------------------------------------------------------------------------------------
248
+ class JsonWebDataSource(WebDataSource):
249
+
250
+ DATA_URL = "https://monespace.grdf.fr/api/e-conso/pce/consommation/informatives?dateDebut={0}&dateFin={1}&pceList[]={2}"
251
+
252
+ TEMPERATURES_URL = "https://monespace.grdf.fr/api/e-conso/pce/{0}/meteo?dateFinPeriode={1}&nbJours={2}"
253
+
254
+ INPUT_DATE_FORMAT = "%Y-%m-%d"
255
+
256
+ OUTPUT_DATE_FORMAT = "%d/%m/%Y"
257
+
258
+ def __init__(self, username: str, password: str):
259
+
260
+ super().__init__(username, password)
261
+
262
+ def _loadFromSession(self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
263
+
264
+ res = {}
265
+
266
+ computeByFrequency = {
267
+ Frequency.HOURLY: FrequencyConverter.computeHourly,
268
+ Frequency.DAILY: FrequencyConverter.computeDaily,
269
+ Frequency.WEEKLY: FrequencyConverter.computeWeekly,
270
+ Frequency.MONTHLY: FrequencyConverter.computeMonthly,
271
+ Frequency.YEARLY: FrequencyConverter.computeYearly
272
+ }
273
+
274
+ # Data URL: Inject parameters.
275
+ downloadUrl = JsonWebDataSource.DATA_URL.format(startDate.strftime(JsonWebDataSource.INPUT_DATE_FORMAT), endDate.strftime(JsonWebDataSource.INPUT_DATE_FORMAT), pceIdentifier)
276
+
277
+ # Retry mechanism.
278
+ retry = 10
279
+ while retry > 0:
280
+
281
+ try:
282
+ response = self._session.get(downloadUrl)
283
+
284
+ if "text/html" in response.headers.get("Content-Type"): # type: ignore
285
+ raise Exception("An error occurred while loading data. Please check your credentials.")
286
+
287
+ if response.status_code != 200:
288
+ raise Exception(f"An error occurred while loading data. Status code: {response.status_code} - {response.text}")
289
+
290
+ break
291
+ except Exception as e:
292
+
293
+ if retry == 1:
294
+ raise e
295
+
296
+ Logger.error("An error occurred while loading data. Retry in 3 seconds.")
297
+ time.sleep(3)
298
+ retry -= 1
299
+
300
+ data = response.text
301
+
302
+ Logger.debug("Json meter data: %s", data)
303
+
304
+ # Temperatures URL: Inject parameters.
305
+ endDate = date.today() - timedelta(days=1) if endDate >= date.today() else endDate
306
+ days = min((endDate - startDate).days, 730)
307
+ temperaturesUrl = JsonWebDataSource.TEMPERATURES_URL.format(pceIdentifier, endDate.strftime(JsonWebDataSource.INPUT_DATE_FORMAT), days)
308
+
309
+ # Get weather data.
310
+ temperatures = self._session.get(temperaturesUrl).text
311
+
312
+ Logger.debug("Json temperature data: %s", temperatures)
313
+
314
+ # Transform all the data into the target structure.
315
+ daily = JsonParser.parse(data, temperatures, pceIdentifier)
316
+
317
+ Logger.debug("Processed daily data: %s", daily)
318
+
319
+ if frequencies is None:
320
+ # Transform Enum in List.
321
+ frequencyList = [frequency for frequency in Frequency]
322
+ else:
323
+ # Get unique values.
324
+ frequencyList = set(frequencies)
325
+
326
+ for frequency in frequencyList:
327
+ res[frequency.value] = computeByFrequency[frequency](daily)
328
+
329
+ return res
330
+
331
+
332
+ # ------------------------------------------------------------------------------------------------------------
333
+ class JsonFileDataSource(IDataSource):
334
+
335
+ def __init__(self, consumptionJsonFile: str, temperatureJsonFile):
336
+
337
+ self.__consumptionJsonFile = consumptionJsonFile
338
+ self.__temperatureJsonFile = temperatureJsonFile
339
+
340
+ def load(self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
341
+
342
+ res = {}
343
+
344
+ with open(self.__consumptionJsonFile) as consumptionJsonFile:
345
+ with open(self.__temperatureJsonFile) as temperatureJsonFile:
346
+ daily = JsonParser.parse(consumptionJsonFile.read(), temperatureJsonFile.read(), pceIdentifier)
347
+
348
+ computeByFrequency = {
349
+ Frequency.HOURLY: FrequencyConverter.computeHourly,
350
+ Frequency.DAILY: FrequencyConverter.computeDaily,
351
+ Frequency.WEEKLY: FrequencyConverter.computeWeekly,
352
+ Frequency.MONTHLY: FrequencyConverter.computeMonthly,
353
+ Frequency.YEARLY: FrequencyConverter.computeYearly
354
+ }
355
+
356
+ if frequencies is None:
357
+ # Transform Enum in List.
358
+ frequencyList = [frequency for frequency in Frequency]
359
+ else:
360
+ # Get unique values.
361
+ frequencyList = set(frequencies)
362
+
363
+ for frequency in frequencyList:
364
+ res[frequency.value] = computeByFrequency[frequency](daily)
365
+
366
+ return res
367
+
368
+
369
+ # ------------------------------------------------------------------------------------------------------------
370
+ class TestDataSource(IDataSource):
371
+
372
+ def __init__(self):
373
+
374
+ pass
375
+
376
+ def load(self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
377
+
378
+ res = {}
379
+
380
+ dataSampleFilenameByFrequency = {
381
+ Frequency.HOURLY: "hourly_data_sample.json",
382
+ Frequency.DAILY: "daily_data_sample.json",
383
+ Frequency.WEEKLY: "weekly_data_sample.json",
384
+ Frequency.MONTHLY: "monthly_data_sample.json",
385
+ Frequency.YEARLY: "yearly_data_sample.json"
386
+ }
387
+
388
+ if frequencies is None:
389
+ # Transform Enum in List.
390
+ frequencyList = [frequency for frequency in Frequency]
391
+ else:
392
+ # Get unique values.
393
+ frequencyList = set(frequencies)
394
+
395
+ for frequency in frequencyList:
396
+ dataSampleFilename = f"{os.path.dirname(os.path.abspath(__file__))}/resources/{dataSampleFilenameByFrequency[frequency]}"
397
+
398
+ with open(dataSampleFilename) as jsonFile:
399
+ res[frequency.value] = cast(List[Dict[PropertyName, Any]], json.load(jsonFile))
400
+
401
+ return res
402
+
403
+
404
+ # ------------------------------------------------------------------------------------------------------------
405
+ class FrequencyConverter:
406
+
407
+ MONTHS = [
408
+ "Janvier",
409
+ "Février",
410
+ "Mars",
411
+ "Avril",
412
+ "Mai",
413
+ "Juin",
414
+ "Juillet",
415
+ "Août",
416
+ "Septembre",
417
+ "Octobre",
418
+ "Novembre",
419
+ "Décembre"
420
+ ]
421
+
422
+ # ------------------------------------------------------
423
+ @staticmethod
424
+ def computeHourly(daily: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
425
+
426
+ return []
427
+
428
+ # ------------------------------------------------------
429
+ @staticmethod
430
+ def computeDaily(daily: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
431
+
432
+ return daily
433
+
434
+ # ------------------------------------------------------
435
+ @staticmethod
436
+ def computeWeekly(daily: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
437
+
438
+ df = pd.DataFrame(daily)
439
+
440
+ # Trimming head and trailing spaces and convert to datetime.
441
+ df["date_time"] = pd.to_datetime(df["time_period"].str.strip(), format=JsonWebDataSource.OUTPUT_DATE_FORMAT)
442
+
443
+ # Get the first day of week.
444
+ df["first_day_of_week"] = pd.to_datetime(df["date_time"].dt.strftime("%W %Y 1"), format="%W %Y %w")
445
+
446
+ # Get the last day of week.
447
+ df["last_day_of_week"] = pd.to_datetime(df["date_time"].dt.strftime("%W %Y 0"), format="%W %Y %w")
448
+
449
+ # Reformat the time period.
450
+ df["time_period"] = "Du " + df["first_day_of_week"].dt.strftime(JsonWebDataSource.OUTPUT_DATE_FORMAT).astype(str) + " au " + df["last_day_of_week"].dt.strftime(JsonWebDataSource.OUTPUT_DATE_FORMAT).astype(str)
451
+
452
+ # Aggregate rows by month_year.
453
+ df = df[["first_day_of_week", "time_period", "start_index_m3", "end_index_m3", "volume_m3", "energy_kwh", "timestamp"]].groupby("time_period").agg(first_day_of_week=('first_day_of_week', 'min'), start_index_m3=('start_index_m3', 'min'), end_index_m3=('end_index_m3', 'max'), volume_m3=('volume_m3', 'sum'), energy_kwh=('energy_kwh', 'sum'), timestamp=('timestamp', 'min'), count=('energy_kwh', 'count')).reset_index()
454
+
455
+ # Sort rows by month ascending.
456
+ df = df.sort_values(by=['first_day_of_week'])
457
+
458
+ # Select rows where we have a full week (7 days) except for the current week.
459
+ df = pd.concat([df[(df["count"] >= 7)], df.tail(1)[df.tail(1)["count"] < 7]])
460
+
461
+ # Select target columns.
462
+ df = df[["time_period", "start_index_m3", "end_index_m3", "volume_m3", "energy_kwh", "timestamp"]]
463
+
464
+ res = cast(List[Dict[str, Any]], df.to_dict('records'))
465
+
466
+ return res
467
+
468
+ # ------------------------------------------------------
469
+ @staticmethod
470
+ def computeMonthly(daily: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
471
+
472
+ df = pd.DataFrame(daily)
473
+
474
+ # Trimming head and trailing spaces and convert to datetime.
475
+ df["date_time"] = pd.to_datetime(df["time_period"].str.strip(), format=JsonWebDataSource.OUTPUT_DATE_FORMAT)
476
+
477
+ # Get the corresponding month-year.
478
+ df["month_year"] = df["date_time"].apply(lambda x: FrequencyConverter.MONTHS[x.month - 1]).astype(str) + " " + df["date_time"].dt.strftime("%Y").astype(str)
479
+
480
+ # Aggregate rows by month_year.
481
+ df = df[["date_time", "month_year", "start_index_m3", "end_index_m3", "volume_m3", "energy_kwh", "timestamp"]].groupby("month_year").agg(first_day_of_month=('date_time', 'min'), start_index_m3=('start_index_m3', 'min'), end_index_m3=('end_index_m3', 'max'), volume_m3=('volume_m3', 'sum'), energy_kwh=('energy_kwh', 'sum'), timestamp=('timestamp', 'min'), count=('energy_kwh', 'count')).reset_index()
482
+
483
+ # Sort rows by month ascending.
484
+ df = df.sort_values(by=['first_day_of_month'])
485
+
486
+ # Select rows where we have a full month (more than 27 days) except for the current month.
487
+ df = pd.concat([df[(df["count"] >= 28)], df.tail(1)[df.tail(1)["count"] < 28]])
488
+
489
+ # Rename columns for their target names.
490
+ df = df.rename(columns={"month_year": "time_period"})
491
+
492
+ # Select target columns.
493
+ df = df[["time_period", "start_index_m3", "end_index_m3", "volume_m3", "energy_kwh", "timestamp"]]
494
+
495
+ res = cast(List[Dict[str, Any]], df.to_dict('records'))
496
+
497
+ return res
498
+
499
+ # ------------------------------------------------------
500
+ @staticmethod
501
+ def computeYearly(daily: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
502
+
503
+ df = pd.DataFrame(daily)
504
+
505
+ # Trimming head and trailing spaces and convert to datetime.
506
+ df["date_time"] = pd.to_datetime(df["time_period"].str.strip(), format=JsonWebDataSource.OUTPUT_DATE_FORMAT)
507
+
508
+ # Get the corresponding year.
509
+ df["year"] = df["date_time"].dt.strftime("%Y")
510
+
511
+ # Aggregate rows by month_year.
512
+ df = df[["year", "start_index_m3", "end_index_m3", "volume_m3", "energy_kwh", "timestamp"]].groupby("year").agg(start_index_m3=('start_index_m3', 'min'), end_index_m3=('end_index_m3', 'max'), volume_m3=('volume_m3', 'sum'), energy_kwh=('energy_kwh', 'sum'), timestamp=('timestamp', 'min'), count=('energy_kwh', 'count')).reset_index()
513
+
514
+ # Sort rows by month ascending.
515
+ df = df.sort_values(by=['year'])
516
+
517
+ # Select rows where we have almost a full year (more than 360) except for the current year.
518
+ df = pd.concat([df[(df["count"] >= 360)], df.tail(1)[df.tail(1)["count"] < 360]])
519
+
520
+ # Rename columns for their target names.
521
+ df = df.rename(columns={"year": "time_period"})
522
+
523
+ # Select target columns.
524
+ df = df[["time_period", "start_index_m3", "end_index_m3", "volume_m3", "energy_kwh", "timestamp"]]
525
+
526
+ res = cast(List[Dict[str, Any]], df.to_dict('records'))
527
+
528
+ return res