pygazpar 1.3.0a15__py310-none-any.whl → 1.3.0a25__py310-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
pygazpar/datasource.py CHANGED
@@ -1,522 +1,528 @@
1
- import logging
2
- import glob
3
- import os
4
- import json
5
- import time
6
- import pandas as pd
7
- import http.cookiejar
8
- from abc import ABC, abstractmethod
9
- from typing import Any, List, Dict, cast, Optional
10
- from requests import Session
11
- from datetime import date, timedelta
12
- from pygazpar.enum import Frequency, PropertyName
13
- from pygazpar.excelparser import ExcelParser
14
- from pygazpar.jsonparser import JsonParser
15
-
16
- SESSION_TOKEN_URL = "https://connexion.grdf.fr/api/v1/authn"
17
- SESSION_TOKEN_PAYLOAD = """{{
18
- "username": "{0}",
19
- "password": "{1}",
20
- "options": {{
21
- "multiOptionalFactorEnroll": "false",
22
- "warnBeforePasswordExpired": "false"
23
- }}
24
- }}"""
25
-
26
- AUTH_TOKEN_URL = "https://connexion.grdf.fr/login/sessionCookieRedirect"
27
- AUTH_TOKEN_PARAMS = """{{
28
- "checkAccountSetupComplete": "true",
29
- "token": "{0}",
30
- "redirectUrl": "https://monespace.grdf.fr"
31
- }}"""
32
-
33
- Logger = logging.getLogger(__name__)
34
-
35
- MeterReading = Dict[str, Any]
36
-
37
- MeterReadings = List[MeterReading]
38
-
39
- MeterReadingsByFrequency = Dict[str, MeterReadings]
40
-
41
-
42
- # ------------------------------------------------------------------------------------------------------------
43
- class IDataSource(ABC):
44
-
45
- @abstractmethod
46
- def load(self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
47
- pass
48
-
49
-
50
- # ------------------------------------------------------------------------------------------------------------
51
- class WebDataSource(IDataSource):
52
-
53
- # ------------------------------------------------------
54
- def __init__(self, username: str, password: str):
55
-
56
- self.__username = username
57
- self.__password = password
58
-
59
- # ------------------------------------------------------
60
- def load(self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
61
-
62
- self._login(self.__username, self.__password) # We ignore the return value.
63
-
64
- res = self._loadFromSession(pceIdentifier, startDate, endDate, frequencies)
65
-
66
- Logger.debug("The data update terminates normally")
67
-
68
- return res
69
-
70
- # ------------------------------------------------------
71
- def _login(self, username: str, password: str) -> str:
72
-
73
- session = Session()
74
- session.headers.update({"domain": "grdf.fr"})
75
- session.headers.update({"Content-Type": "application/json"})
76
- session.headers.update({"X-Requested-With": "XMLHttpRequest"})
77
-
78
- payload = SESSION_TOKEN_PAYLOAD.format(username, password)
79
-
80
- response = session.post(SESSION_TOKEN_URL, data=payload)
81
-
82
- if response.status_code != 200:
83
- raise Exception(f"An error occurred while logging in. Status code: {response.status_code} - {response.text}")
84
-
85
- session_token = response.json().get("sessionToken")
86
-
87
- Logger.debug("Session token: %s", session_token)
88
-
89
- jar = http.cookiejar.CookieJar()
90
-
91
- self._session = Session()
92
- self._session.headers.update({"Content-Type": "application/json"})
93
- self._session.headers.update({"X-Requested-With": "XMLHttpRequest"})
94
-
95
- params = json.loads(AUTH_TOKEN_PARAMS.format(session_token))
96
-
97
- response = self._session.get(AUTH_TOKEN_URL, params=params, allow_redirects=True, cookies=jar) # type: ignore
98
-
99
- if response.status_code != 200:
100
- raise Exception(f"An error occurred while getting the auth token. Status code: {response.status_code} - {response.text}")
101
-
102
- auth_token = self._session.cookies.get("auth_token", domain="monespace.grdf.fr")
103
-
104
- return auth_token # type: ignore
105
-
106
- @abstractmethod
107
- def _loadFromSession(self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
108
- pass
109
-
110
-
111
- # ------------------------------------------------------------------------------------------------------------
112
- class ExcelWebDataSource(WebDataSource):
113
-
114
- DATA_URL = "https://monespace.grdf.fr/api/e-conso/pce/consommation/informatives/telecharger?dateDebut={0}&dateFin={1}&frequence={3}&pceList[]={2}"
115
-
116
- DATE_FORMAT = "%Y-%m-%d"
117
-
118
- FREQUENCY_VALUES = {
119
- Frequency.HOURLY: "Horaire",
120
- Frequency.DAILY: "Journalier",
121
- Frequency.WEEKLY: "Hebdomadaire",
122
- Frequency.MONTHLY: "Mensuel",
123
- Frequency.YEARLY: "Journalier"
124
- }
125
-
126
- DATA_FILENAME = 'Donnees_informatives_*.xlsx'
127
-
128
- # ------------------------------------------------------
129
- def __init__(self, username: str, password: str, tmpDirectory: str):
130
-
131
- super().__init__(username, password)
132
-
133
- self.__tmpDirectory = tmpDirectory
134
-
135
- # ------------------------------------------------------
136
- def _loadFromSession(self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
137
-
138
- res = {}
139
-
140
- # XLSX is in the TMP directory
141
- data_file_path_pattern = self.__tmpDirectory + '/' + ExcelWebDataSource.DATA_FILENAME
142
-
143
- # We remove an eventual existing data file (from a previous run that has not deleted it).
144
- file_list = glob.glob(data_file_path_pattern)
145
- for filename in file_list:
146
- if os.path.isfile(filename):
147
- try:
148
- os.remove(filename)
149
- except PermissionError:
150
- pass
151
-
152
- if frequencies is None:
153
- # Transform Enum in List.
154
- frequencyList = [frequency for frequency in Frequency]
155
- else:
156
- # Get unique values.
157
- frequencyList = set(frequencies)
158
-
159
- for frequency in frequencyList:
160
- # Inject parameters.
161
- downloadUrl = ExcelWebDataSource.DATA_URL.format(startDate.strftime(ExcelWebDataSource.DATE_FORMAT), endDate.strftime(ExcelWebDataSource.DATE_FORMAT), pceIdentifier, ExcelWebDataSource.FREQUENCY_VALUES[frequency])
162
-
163
- Logger.debug(f"Loading data of frequency {ExcelWebDataSource.FREQUENCY_VALUES[frequency]} from {startDate.strftime(ExcelWebDataSource.DATE_FORMAT)} to {endDate.strftime(ExcelWebDataSource.DATE_FORMAT)}")
164
-
165
- # Retry mechanism.
166
- retry = 10
167
- while retry > 0:
168
-
169
- try:
170
- self.__downloadFile(self._session, downloadUrl, self.__tmpDirectory)
171
- break
172
- except Exception as e:
173
-
174
- if retry == 1:
175
- raise e
176
-
177
- Logger.error("An error occurred while loading data. Retry in 3 seconds.")
178
- time.sleep(3)
179
- retry -= 1
180
-
181
- # Load the XLSX file into the data structure
182
- file_list = glob.glob(data_file_path_pattern)
183
-
184
- if len(file_list) == 0:
185
- Logger.warning(f"Not any data file has been found in '{self.__tmpDirectory}' directory")
186
-
187
- for filename in file_list:
188
- res[frequency.value] = ExcelParser.parse(filename, frequency if frequency != Frequency.YEARLY else Frequency.DAILY)
189
- try:
190
- # openpyxl does not close the file properly.
191
- os.remove(filename)
192
- except PermissionError:
193
- pass
194
-
195
- # We compute yearly from daily data.
196
- if frequency == Frequency.YEARLY:
197
- res[frequency.value] = FrequencyConverter.computeYearly(res[frequency.value])
198
-
199
- return res
200
-
201
- # ------------------------------------------------------
202
- def __downloadFile(self, session: Session, url: str, path: str):
203
-
204
- response = session.get(url)
205
-
206
- if "text/html" in response.headers.get("Content-Type"): # type: ignore
207
- raise Exception("An error occurred while loading data. Please check your credentials.")
208
-
209
- if response.status_code != 200:
210
- raise Exception(f"An error occurred while loading data. Status code: {response.status_code} - {response.text}")
211
-
212
- response.raise_for_status()
213
-
214
- filename = response.headers["Content-Disposition"].split("filename=")[1]
215
-
216
- open(f"{path}/{filename}", "wb").write(response.content)
217
-
218
-
219
- # ------------------------------------------------------------------------------------------------------------
220
- class ExcelFileDataSource(IDataSource):
221
-
222
- def __init__(self, excelFile: str):
223
-
224
- self.__excelFile = excelFile
225
-
226
- def load(self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
227
-
228
- res = {}
229
-
230
- if frequencies is None:
231
- # Transform Enum in List.
232
- frequencyList = [frequency for frequency in Frequency]
233
- else:
234
- # Get unique values.
235
- frequencyList = set(frequencies)
236
-
237
- for frequency in frequencyList:
238
- if frequency != Frequency.YEARLY:
239
- res[frequency.value] = ExcelParser.parse(self.__excelFile, frequency)
240
- else:
241
- daily = ExcelParser.parse(self.__excelFile, Frequency.DAILY)
242
- res[frequency.value] = FrequencyConverter.computeYearly(daily)
243
-
244
- return res
245
-
246
-
247
- # ------------------------------------------------------------------------------------------------------------
248
- class JsonWebDataSource(WebDataSource):
249
-
250
- DATA_URL = "https://monespace.grdf.fr/api/e-conso/pce/consommation/informatives?dateDebut={0}&dateFin={1}&pceList[]={2}"
251
-
252
- TEMPERATURES_URL = "https://monespace.grdf.fr/api/e-conso/pce/{0}/meteo?dateFinPeriode={1}&nbJours={2}"
253
-
254
- INPUT_DATE_FORMAT = "%Y-%m-%d"
255
-
256
- OUTPUT_DATE_FORMAT = "%d/%m/%Y"
257
-
258
- def __init__(self, username: str, password: str):
259
-
260
- super().__init__(username, password)
261
-
262
- def _loadFromSession(self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
263
-
264
- res = {}
265
-
266
- computeByFrequency = {
267
- Frequency.HOURLY: FrequencyConverter.computeHourly,
268
- Frequency.DAILY: FrequencyConverter.computeDaily,
269
- Frequency.WEEKLY: FrequencyConverter.computeWeekly,
270
- Frequency.MONTHLY: FrequencyConverter.computeMonthly,
271
- Frequency.YEARLY: FrequencyConverter.computeYearly
272
- }
273
-
274
- # Data URL: Inject parameters.
275
- downloadUrl = JsonWebDataSource.DATA_URL.format(startDate.strftime(JsonWebDataSource.INPUT_DATE_FORMAT), endDate.strftime(JsonWebDataSource.INPUT_DATE_FORMAT), pceIdentifier)
276
-
277
- # Retry mechanism.
278
- retry = 10
279
- while retry > 0:
280
-
281
- try:
282
- response = self._session.get(downloadUrl)
283
-
284
- if "text/html" in response.headers.get("Content-Type"): # type: ignore
285
- raise Exception("An error occurred while loading data. Please check your credentials.")
286
-
287
- if response.status_code != 200:
288
- raise Exception(f"An error occurred while loading data. Status code: {response.status_code} - {response.text}")
289
-
290
- break
291
- except Exception as e:
292
-
293
- if retry == 1:
294
- raise e
295
-
296
- Logger.error("An error occurred while loading data. Retry in 3 seconds.")
297
- time.sleep(3)
298
- retry -= 1
299
-
300
- data = response.text
301
-
302
- # Temperatures URL: Inject parameters.
303
- endDate = date.today() - timedelta(days=1) if endDate >= date.today() else endDate
304
- days = min((endDate - startDate).days, 730)
305
- temperaturesUrl = JsonWebDataSource.TEMPERATURES_URL.format(pceIdentifier, endDate.strftime(JsonWebDataSource.INPUT_DATE_FORMAT), days)
306
-
307
- # Get weather data.
308
- temperatures = self._session.get(temperaturesUrl).text
309
-
310
- # Transform all the data into the target structure.
311
- daily = JsonParser.parse(data, temperatures, pceIdentifier)
312
-
313
- if frequencies is None:
314
- # Transform Enum in List.
315
- frequencyList = [frequency for frequency in Frequency]
316
- else:
317
- # Get unique values.
318
- frequencyList = set(frequencies)
319
-
320
- for frequency in frequencyList:
321
- res[frequency.value] = computeByFrequency[frequency](daily)
322
-
323
- return res
324
-
325
-
326
- # ------------------------------------------------------------------------------------------------------------
327
- class JsonFileDataSource(IDataSource):
328
-
329
- def __init__(self, consumptionJsonFile: str, temperatureJsonFile):
330
-
331
- self.__consumptionJsonFile = consumptionJsonFile
332
- self.__temperatureJsonFile = temperatureJsonFile
333
-
334
- def load(self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
335
-
336
- res = {}
337
-
338
- with open(self.__consumptionJsonFile) as consumptionJsonFile:
339
- with open(self.__temperatureJsonFile) as temperatureJsonFile:
340
- daily = JsonParser.parse(consumptionJsonFile.read(), temperatureJsonFile.read(), pceIdentifier)
341
-
342
- computeByFrequency = {
343
- Frequency.HOURLY: FrequencyConverter.computeHourly,
344
- Frequency.DAILY: FrequencyConverter.computeDaily,
345
- Frequency.WEEKLY: FrequencyConverter.computeWeekly,
346
- Frequency.MONTHLY: FrequencyConverter.computeMonthly,
347
- Frequency.YEARLY: FrequencyConverter.computeYearly
348
- }
349
-
350
- if frequencies is None:
351
- # Transform Enum in List.
352
- frequencyList = [frequency for frequency in Frequency]
353
- else:
354
- # Get unique values.
355
- frequencyList = set(frequencies)
356
-
357
- for frequency in frequencyList:
358
- res[frequency.value] = computeByFrequency[frequency](daily)
359
-
360
- return res
361
-
362
-
363
- # ------------------------------------------------------------------------------------------------------------
364
- class TestDataSource(IDataSource):
365
-
366
- def __init__(self):
367
-
368
- pass
369
-
370
- def load(self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
371
-
372
- res = {}
373
-
374
- dataSampleFilenameByFrequency = {
375
- Frequency.HOURLY: "hourly_data_sample.json",
376
- Frequency.DAILY: "daily_data_sample.json",
377
- Frequency.WEEKLY: "weekly_data_sample.json",
378
- Frequency.MONTHLY: "monthly_data_sample.json",
379
- Frequency.YEARLY: "yearly_data_sample.json"
380
- }
381
-
382
- if frequencies is None:
383
- # Transform Enum in List.
384
- frequencyList = [frequency for frequency in Frequency]
385
- else:
386
- # Get unique values.
387
- frequencyList = set(frequencies)
388
-
389
- for frequency in frequencyList:
390
- dataSampleFilename = f"{os.path.dirname(os.path.abspath(__file__))}/resources/{dataSampleFilenameByFrequency[frequency]}"
391
-
392
- with open(dataSampleFilename) as jsonFile:
393
- res[frequency.value] = cast(List[Dict[PropertyName, Any]], json.load(jsonFile))
394
-
395
- return res
396
-
397
-
398
- # ------------------------------------------------------------------------------------------------------------
399
- class FrequencyConverter:
400
-
401
- MONTHS = [
402
- "Janvier",
403
- "Février",
404
- "Mars",
405
- "Avril",
406
- "Mai",
407
- "Juin",
408
- "Juillet",
409
- "Août",
410
- "Septembre",
411
- "Octobre",
412
- "Novembre",
413
- "Décembre"
414
- ]
415
-
416
- # ------------------------------------------------------
417
- @staticmethod
418
- def computeHourly(daily: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
419
-
420
- return []
421
-
422
- # ------------------------------------------------------
423
- @staticmethod
424
- def computeDaily(daily: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
425
-
426
- return daily
427
-
428
- # ------------------------------------------------------
429
- @staticmethod
430
- def computeWeekly(daily: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
431
-
432
- df = pd.DataFrame(daily)
433
-
434
- # Trimming head and trailing spaces and convert to datetime.
435
- df["date_time"] = pd.to_datetime(df["time_period"].str.strip(), format=JsonWebDataSource.OUTPUT_DATE_FORMAT)
436
-
437
- # Get the first day of week.
438
- df["first_day_of_week"] = pd.to_datetime(df["date_time"].dt.strftime("%W %Y 1"), format="%W %Y %w")
439
-
440
- # Get the last day of week.
441
- df["last_day_of_week"] = pd.to_datetime(df["date_time"].dt.strftime("%W %Y 0"), format="%W %Y %w")
442
-
443
- # Reformat the time period.
444
- df["time_period"] = "Du " + df["first_day_of_week"].dt.strftime(JsonWebDataSource.OUTPUT_DATE_FORMAT).astype(str) + " au " + df["last_day_of_week"].dt.strftime(JsonWebDataSource.OUTPUT_DATE_FORMAT).astype(str)
445
-
446
- # Aggregate rows by month_year.
447
- df = df[["first_day_of_week", "time_period", "start_index_m3", "end_index_m3", "volume_m3", "energy_kwh", "timestamp"]].groupby("time_period").agg(first_day_of_week=('first_day_of_week', 'min'), start_index_m3=('start_index_m3', 'min'), end_index_m3=('end_index_m3', 'max'), volume_m3=('volume_m3', 'sum'), energy_kwh=('energy_kwh', 'sum'), timestamp=('timestamp', 'min'), count=('energy_kwh', 'count')).reset_index()
448
-
449
- # Sort rows by month ascending.
450
- df = df.sort_values(by=['first_day_of_week'])
451
-
452
- # Select rows where we have a full week (7 days) except for the current week.
453
- df = pd.concat([df[(df["count"] >= 7)], df.tail(1)[df.tail(1)["count"] < 7]])
454
-
455
- # Select target columns.
456
- df = df[["time_period", "start_index_m3", "end_index_m3", "volume_m3", "energy_kwh", "timestamp"]]
457
-
458
- res = cast(List[Dict[str, Any]], df.to_dict('records'))
459
-
460
- return res
461
-
462
- # ------------------------------------------------------
463
- @staticmethod
464
- def computeMonthly(daily: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
465
-
466
- df = pd.DataFrame(daily)
467
-
468
- # Trimming head and trailing spaces and convert to datetime.
469
- df["date_time"] = pd.to_datetime(df["time_period"].str.strip(), format=JsonWebDataSource.OUTPUT_DATE_FORMAT)
470
-
471
- # Get the corresponding month-year.
472
- df["month_year"] = df["date_time"].apply(lambda x: FrequencyConverter.MONTHS[x.month - 1]).astype(str) + " " + df["date_time"].dt.strftime("%Y").astype(str)
473
-
474
- # Aggregate rows by month_year.
475
- df = df[["date_time", "month_year", "start_index_m3", "end_index_m3", "volume_m3", "energy_kwh", "timestamp"]].groupby("month_year").agg(first_day_of_month=('date_time', 'min'), start_index_m3=('start_index_m3', 'min'), end_index_m3=('end_index_m3', 'max'), volume_m3=('volume_m3', 'sum'), energy_kwh=('energy_kwh', 'sum'), timestamp=('timestamp', 'min'), count=('energy_kwh', 'count')).reset_index()
476
-
477
- # Sort rows by month ascending.
478
- df = df.sort_values(by=['first_day_of_month'])
479
-
480
- # Select rows where we have a full month (more than 27 days) except for the current month.
481
- df = pd.concat([df[(df["count"] >= 28)], df.tail(1)[df.tail(1)["count"] < 28]])
482
-
483
- # Rename columns for their target names.
484
- df = df.rename(columns={"month_year": "time_period"})
485
-
486
- # Select target columns.
487
- df = df[["time_period", "start_index_m3", "end_index_m3", "volume_m3", "energy_kwh", "timestamp"]]
488
-
489
- res = cast(List[Dict[str, Any]], df.to_dict('records'))
490
-
491
- return res
492
-
493
- # ------------------------------------------------------
494
- @staticmethod
495
- def computeYearly(daily: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
496
-
497
- df = pd.DataFrame(daily)
498
-
499
- # Trimming head and trailing spaces and convert to datetime.
500
- df["date_time"] = pd.to_datetime(df["time_period"].str.strip(), format=JsonWebDataSource.OUTPUT_DATE_FORMAT)
501
-
502
- # Get the corresponding year.
503
- df["year"] = df["date_time"].dt.strftime("%Y")
504
-
505
- # Aggregate rows by month_year.
506
- df = df[["year", "start_index_m3", "end_index_m3", "volume_m3", "energy_kwh", "timestamp"]].groupby("year").agg(start_index_m3=('start_index_m3', 'min'), end_index_m3=('end_index_m3', 'max'), volume_m3=('volume_m3', 'sum'), energy_kwh=('energy_kwh', 'sum'), timestamp=('timestamp', 'min'), count=('energy_kwh', 'count')).reset_index()
507
-
508
- # Sort rows by month ascending.
509
- df = df.sort_values(by=['year'])
510
-
511
- # Select rows where we have almost a full year (more than 360) except for the current year.
512
- df = pd.concat([df[(df["count"] >= 360)], df.tail(1)[df.tail(1)["count"] < 360]])
513
-
514
- # Rename columns for their target names.
515
- df = df.rename(columns={"year": "time_period"})
516
-
517
- # Select target columns.
518
- df = df[["time_period", "start_index_m3", "end_index_m3", "volume_m3", "energy_kwh", "timestamp"]]
519
-
520
- res = cast(List[Dict[str, Any]], df.to_dict('records'))
521
-
522
- return res
1
+ import logging
2
+ import glob
3
+ import os
4
+ import json
5
+ import time
6
+ import pandas as pd
7
+ import http.cookiejar
8
+ from abc import ABC, abstractmethod
9
+ from typing import Any, List, Dict, cast, Optional
10
+ from requests import Session
11
+ from datetime import date, timedelta
12
+ from pygazpar.enum import Frequency, PropertyName
13
+ from pygazpar.excelparser import ExcelParser
14
+ from pygazpar.jsonparser import JsonParser
15
+
16
+ SESSION_TOKEN_URL = "https://connexion.grdf.fr/api/v1/authn"
17
+ SESSION_TOKEN_PAYLOAD = """{{
18
+ "username": "{0}",
19
+ "password": "{1}",
20
+ "options": {{
21
+ "multiOptionalFactorEnroll": "false",
22
+ "warnBeforePasswordExpired": "false"
23
+ }}
24
+ }}"""
25
+
26
+ AUTH_TOKEN_URL = "https://connexion.grdf.fr/login/sessionCookieRedirect"
27
+ AUTH_TOKEN_PARAMS = """{{
28
+ "checkAccountSetupComplete": "true",
29
+ "token": "{0}",
30
+ "redirectUrl": "https://monespace.grdf.fr"
31
+ }}"""
32
+
33
+ Logger = logging.getLogger(__name__)
34
+
35
+ MeterReading = Dict[str, Any]
36
+
37
+ MeterReadings = List[MeterReading]
38
+
39
+ MeterReadingsByFrequency = Dict[str, MeterReadings]
40
+
41
+
42
+ # ------------------------------------------------------------------------------------------------------------
43
+ class IDataSource(ABC):
44
+
45
+ @abstractmethod
46
+ def load(self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
47
+ pass
48
+
49
+
50
+ # ------------------------------------------------------------------------------------------------------------
51
+ class WebDataSource(IDataSource):
52
+
53
+ # ------------------------------------------------------
54
+ def __init__(self, username: str, password: str):
55
+
56
+ self.__username = username
57
+ self.__password = password
58
+
59
+ # ------------------------------------------------------
60
+ def load(self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
61
+
62
+ self._login(self.__username, self.__password) # We ignore the return value.
63
+
64
+ res = self._loadFromSession(pceIdentifier, startDate, endDate, frequencies)
65
+
66
+ Logger.debug("The data update terminates normally")
67
+
68
+ return res
69
+
70
+ # ------------------------------------------------------
71
+ def _login(self, username: str, password: str) -> str:
72
+
73
+ session = Session()
74
+ session.headers.update({"domain": "grdf.fr"})
75
+ session.headers.update({"Content-Type": "application/json"})
76
+ session.headers.update({"X-Requested-With": "XMLHttpRequest"})
77
+
78
+ payload = SESSION_TOKEN_PAYLOAD.format(username, password)
79
+
80
+ response = session.post(SESSION_TOKEN_URL, data=payload)
81
+
82
+ if response.status_code != 200:
83
+ raise Exception(f"An error occurred while logging in. Status code: {response.status_code} - {response.text}")
84
+
85
+ session_token = response.json().get("sessionToken")
86
+
87
+ Logger.debug("Session token: %s", session_token)
88
+
89
+ jar = http.cookiejar.CookieJar()
90
+
91
+ self._session = Session()
92
+ self._session.headers.update({"Content-Type": "application/json"})
93
+ self._session.headers.update({"X-Requested-With": "XMLHttpRequest"})
94
+
95
+ params = json.loads(AUTH_TOKEN_PARAMS.format(session_token))
96
+
97
+ response = self._session.get(AUTH_TOKEN_URL, params=params, allow_redirects=True, cookies=jar) # type: ignore
98
+
99
+ if response.status_code != 200:
100
+ raise Exception(f"An error occurred while getting the auth token. Status code: {response.status_code} - {response.text}")
101
+
102
+ auth_token = self._session.cookies.get("auth_token", domain="monespace.grdf.fr")
103
+
104
+ return auth_token # type: ignore
105
+
106
+ @abstractmethod
107
+ def _loadFromSession(self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
108
+ pass
109
+
110
+
111
+ # ------------------------------------------------------------------------------------------------------------
112
+ class ExcelWebDataSource(WebDataSource):
113
+
114
+ DATA_URL = "https://monespace.grdf.fr/api/e-conso/pce/consommation/informatives/telecharger?dateDebut={0}&dateFin={1}&frequence={3}&pceList[]={2}"
115
+
116
+ DATE_FORMAT = "%Y-%m-%d"
117
+
118
+ FREQUENCY_VALUES = {
119
+ Frequency.HOURLY: "Horaire",
120
+ Frequency.DAILY: "Journalier",
121
+ Frequency.WEEKLY: "Hebdomadaire",
122
+ Frequency.MONTHLY: "Mensuel",
123
+ Frequency.YEARLY: "Journalier"
124
+ }
125
+
126
+ DATA_FILENAME = 'Donnees_informatives_*.xlsx'
127
+
128
+ # ------------------------------------------------------
129
+ def __init__(self, username: str, password: str, tmpDirectory: str):
130
+
131
+ super().__init__(username, password)
132
+
133
+ self.__tmpDirectory = tmpDirectory
134
+
135
+ # ------------------------------------------------------
136
+ def _loadFromSession(self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
137
+
138
+ res = {}
139
+
140
+ # XLSX is in the TMP directory
141
+ data_file_path_pattern = self.__tmpDirectory + '/' + ExcelWebDataSource.DATA_FILENAME
142
+
143
+ # We remove an eventual existing data file (from a previous run that has not deleted it).
144
+ file_list = glob.glob(data_file_path_pattern)
145
+ for filename in file_list:
146
+ if os.path.isfile(filename):
147
+ try:
148
+ os.remove(filename)
149
+ except PermissionError:
150
+ pass
151
+
152
+ if frequencies is None:
153
+ # Transform Enum in List.
154
+ frequencyList = [frequency for frequency in Frequency]
155
+ else:
156
+ # Get unique values.
157
+ frequencyList = set(frequencies)
158
+
159
+ for frequency in frequencyList:
160
+ # Inject parameters.
161
+ downloadUrl = ExcelWebDataSource.DATA_URL.format(startDate.strftime(ExcelWebDataSource.DATE_FORMAT), endDate.strftime(ExcelWebDataSource.DATE_FORMAT), pceIdentifier, ExcelWebDataSource.FREQUENCY_VALUES[frequency])
162
+
163
+ Logger.debug(f"Loading data of frequency {ExcelWebDataSource.FREQUENCY_VALUES[frequency]} from {startDate.strftime(ExcelWebDataSource.DATE_FORMAT)} to {endDate.strftime(ExcelWebDataSource.DATE_FORMAT)}")
164
+
165
+ # Retry mechanism.
166
+ retry = 10
167
+ while retry > 0:
168
+
169
+ try:
170
+ self.__downloadFile(self._session, downloadUrl, self.__tmpDirectory)
171
+ break
172
+ except Exception as e:
173
+
174
+ if retry == 1:
175
+ raise e
176
+
177
+ Logger.error("An error occurred while loading data. Retry in 3 seconds.")
178
+ time.sleep(3)
179
+ retry -= 1
180
+
181
+ # Load the XLSX file into the data structure
182
+ file_list = glob.glob(data_file_path_pattern)
183
+
184
+ if len(file_list) == 0:
185
+ Logger.warning(f"Not any data file has been found in '{self.__tmpDirectory}' directory")
186
+
187
+ for filename in file_list:
188
+ res[frequency.value] = ExcelParser.parse(filename, frequency if frequency != Frequency.YEARLY else Frequency.DAILY)
189
+ try:
190
+ # openpyxl does not close the file properly.
191
+ os.remove(filename)
192
+ except PermissionError:
193
+ pass
194
+
195
+ # We compute yearly from daily data.
196
+ if frequency == Frequency.YEARLY:
197
+ res[frequency.value] = FrequencyConverter.computeYearly(res[frequency.value])
198
+
199
+ return res
200
+
201
+ # ------------------------------------------------------
202
+ def __downloadFile(self, session: Session, url: str, path: str):
203
+
204
+ response = session.get(url)
205
+
206
+ if "text/html" in response.headers.get("Content-Type"): # type: ignore
207
+ raise Exception("An error occurred while loading data. Please check your credentials.")
208
+
209
+ if response.status_code != 200:
210
+ raise Exception(f"An error occurred while loading data. Status code: {response.status_code} - {response.text}")
211
+
212
+ response.raise_for_status()
213
+
214
+ filename = response.headers["Content-Disposition"].split("filename=")[1]
215
+
216
+ open(f"{path}/{filename}", "wb").write(response.content)
217
+
218
+
219
+ # ------------------------------------------------------------------------------------------------------------
220
+ class ExcelFileDataSource(IDataSource):
221
+
222
+ def __init__(self, excelFile: str):
223
+
224
+ self.__excelFile = excelFile
225
+
226
+ def load(self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
227
+
228
+ res = {}
229
+
230
+ if frequencies is None:
231
+ # Transform Enum in List.
232
+ frequencyList = [frequency for frequency in Frequency]
233
+ else:
234
+ # Get unique values.
235
+ frequencyList = set(frequencies)
236
+
237
+ for frequency in frequencyList:
238
+ if frequency != Frequency.YEARLY:
239
+ res[frequency.value] = ExcelParser.parse(self.__excelFile, frequency)
240
+ else:
241
+ daily = ExcelParser.parse(self.__excelFile, Frequency.DAILY)
242
+ res[frequency.value] = FrequencyConverter.computeYearly(daily)
243
+
244
+ return res
245
+
246
+
247
+ # ------------------------------------------------------------------------------------------------------------
248
+ class JsonWebDataSource(WebDataSource):
249
+
250
+ DATA_URL = "https://monespace.grdf.fr/api/e-conso/pce/consommation/informatives?dateDebut={0}&dateFin={1}&pceList[]={2}"
251
+
252
+ TEMPERATURES_URL = "https://monespace.grdf.fr/api/e-conso/pce/{0}/meteo?dateFinPeriode={1}&nbJours={2}"
253
+
254
+ INPUT_DATE_FORMAT = "%Y-%m-%d"
255
+
256
+ OUTPUT_DATE_FORMAT = "%d/%m/%Y"
257
+
258
+ def __init__(self, username: str, password: str):
259
+
260
+ super().__init__(username, password)
261
+
262
+ def _loadFromSession(self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
263
+
264
+ res = {}
265
+
266
+ computeByFrequency = {
267
+ Frequency.HOURLY: FrequencyConverter.computeHourly,
268
+ Frequency.DAILY: FrequencyConverter.computeDaily,
269
+ Frequency.WEEKLY: FrequencyConverter.computeWeekly,
270
+ Frequency.MONTHLY: FrequencyConverter.computeMonthly,
271
+ Frequency.YEARLY: FrequencyConverter.computeYearly
272
+ }
273
+
274
+ # Data URL: Inject parameters.
275
+ downloadUrl = JsonWebDataSource.DATA_URL.format(startDate.strftime(JsonWebDataSource.INPUT_DATE_FORMAT), endDate.strftime(JsonWebDataSource.INPUT_DATE_FORMAT), pceIdentifier)
276
+
277
+ # Retry mechanism.
278
+ retry = 10
279
+ while retry > 0:
280
+
281
+ try:
282
+ response = self._session.get(downloadUrl)
283
+
284
+ if "text/html" in response.headers.get("Content-Type"): # type: ignore
285
+ raise Exception("An error occurred while loading data. Please check your credentials.")
286
+
287
+ if response.status_code != 200:
288
+ raise Exception(f"An error occurred while loading data. Status code: {response.status_code} - {response.text}")
289
+
290
+ break
291
+ except Exception as e:
292
+
293
+ if retry == 1:
294
+ raise e
295
+
296
+ Logger.error("An error occurred while loading data. Retry in 3 seconds.")
297
+ time.sleep(3)
298
+ retry -= 1
299
+
300
+ data = response.text
301
+
302
+ Logger.debug("Json meter data: %s", data)
303
+
304
+ # Temperatures URL: Inject parameters.
305
+ endDate = date.today() - timedelta(days=1) if endDate >= date.today() else endDate
306
+ days = min((endDate - startDate).days, 730)
307
+ temperaturesUrl = JsonWebDataSource.TEMPERATURES_URL.format(pceIdentifier, endDate.strftime(JsonWebDataSource.INPUT_DATE_FORMAT), days)
308
+
309
+ # Get weather data.
310
+ temperatures = self._session.get(temperaturesUrl).text
311
+
312
+ Logger.debug("Json temperature data: %s", temperatures)
313
+
314
+ # Transform all the data into the target structure.
315
+ daily = JsonParser.parse(data, temperatures, pceIdentifier)
316
+
317
+ Logger.debug("Processed daily data: %s", daily)
318
+
319
+ if frequencies is None:
320
+ # Transform Enum in List.
321
+ frequencyList = [frequency for frequency in Frequency]
322
+ else:
323
+ # Get unique values.
324
+ frequencyList = set(frequencies)
325
+
326
+ for frequency in frequencyList:
327
+ res[frequency.value] = computeByFrequency[frequency](daily)
328
+
329
+ return res
330
+
331
+
332
+ # ------------------------------------------------------------------------------------------------------------
333
+ class JsonFileDataSource(IDataSource):
334
+
335
+ def __init__(self, consumptionJsonFile: str, temperatureJsonFile):
336
+
337
+ self.__consumptionJsonFile = consumptionJsonFile
338
+ self.__temperatureJsonFile = temperatureJsonFile
339
+
340
+ def load(self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
341
+
342
+ res = {}
343
+
344
+ with open(self.__consumptionJsonFile) as consumptionJsonFile:
345
+ with open(self.__temperatureJsonFile) as temperatureJsonFile:
346
+ daily = JsonParser.parse(consumptionJsonFile.read(), temperatureJsonFile.read(), pceIdentifier)
347
+
348
+ computeByFrequency = {
349
+ Frequency.HOURLY: FrequencyConverter.computeHourly,
350
+ Frequency.DAILY: FrequencyConverter.computeDaily,
351
+ Frequency.WEEKLY: FrequencyConverter.computeWeekly,
352
+ Frequency.MONTHLY: FrequencyConverter.computeMonthly,
353
+ Frequency.YEARLY: FrequencyConverter.computeYearly
354
+ }
355
+
356
+ if frequencies is None:
357
+ # Transform Enum in List.
358
+ frequencyList = [frequency for frequency in Frequency]
359
+ else:
360
+ # Get unique values.
361
+ frequencyList = set(frequencies)
362
+
363
+ for frequency in frequencyList:
364
+ res[frequency.value] = computeByFrequency[frequency](daily)
365
+
366
+ return res
367
+
368
+
369
+ # ------------------------------------------------------------------------------------------------------------
370
+ class TestDataSource(IDataSource):
371
+
372
+ def __init__(self):
373
+
374
+ pass
375
+
376
+ def load(self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
377
+
378
+ res = {}
379
+
380
+ dataSampleFilenameByFrequency = {
381
+ Frequency.HOURLY: "hourly_data_sample.json",
382
+ Frequency.DAILY: "daily_data_sample.json",
383
+ Frequency.WEEKLY: "weekly_data_sample.json",
384
+ Frequency.MONTHLY: "monthly_data_sample.json",
385
+ Frequency.YEARLY: "yearly_data_sample.json"
386
+ }
387
+
388
+ if frequencies is None:
389
+ # Transform Enum in List.
390
+ frequencyList = [frequency for frequency in Frequency]
391
+ else:
392
+ # Get unique values.
393
+ frequencyList = set(frequencies)
394
+
395
+ for frequency in frequencyList:
396
+ dataSampleFilename = f"{os.path.dirname(os.path.abspath(__file__))}/resources/{dataSampleFilenameByFrequency[frequency]}"
397
+
398
+ with open(dataSampleFilename) as jsonFile:
399
+ res[frequency.value] = cast(List[Dict[PropertyName, Any]], json.load(jsonFile))
400
+
401
+ return res
402
+
403
+
404
+ # ------------------------------------------------------------------------------------------------------------
405
+ class FrequencyConverter:
406
+
407
+ MONTHS = [
408
+ "Janvier",
409
+ "Février",
410
+ "Mars",
411
+ "Avril",
412
+ "Mai",
413
+ "Juin",
414
+ "Juillet",
415
+ "Août",
416
+ "Septembre",
417
+ "Octobre",
418
+ "Novembre",
419
+ "Décembre"
420
+ ]
421
+
422
+ # ------------------------------------------------------
423
+ @staticmethod
424
+ def computeHourly(daily: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
425
+
426
+ return []
427
+
428
+ # ------------------------------------------------------
429
+ @staticmethod
430
+ def computeDaily(daily: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
431
+
432
+ return daily
433
+
434
+ # ------------------------------------------------------
435
+ @staticmethod
436
+ def computeWeekly(daily: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
437
+
438
+ df = pd.DataFrame(daily)
439
+
440
+ # Trimming head and trailing spaces and convert to datetime.
441
+ df["date_time"] = pd.to_datetime(df["time_period"].str.strip(), format=JsonWebDataSource.OUTPUT_DATE_FORMAT)
442
+
443
+ # Get the first day of week.
444
+ df["first_day_of_week"] = pd.to_datetime(df["date_time"].dt.strftime("%W %Y 1"), format="%W %Y %w")
445
+
446
+ # Get the last day of week.
447
+ df["last_day_of_week"] = pd.to_datetime(df["date_time"].dt.strftime("%W %Y 0"), format="%W %Y %w")
448
+
449
+ # Reformat the time period.
450
+ df["time_period"] = "Du " + df["first_day_of_week"].dt.strftime(JsonWebDataSource.OUTPUT_DATE_FORMAT).astype(str) + " au " + df["last_day_of_week"].dt.strftime(JsonWebDataSource.OUTPUT_DATE_FORMAT).astype(str)
451
+
452
+ # Aggregate rows by month_year.
453
+ df = df[["first_day_of_week", "time_period", "start_index_m3", "end_index_m3", "volume_m3", "energy_kwh", "timestamp"]].groupby("time_period").agg(first_day_of_week=('first_day_of_week', 'min'), start_index_m3=('start_index_m3', 'min'), end_index_m3=('end_index_m3', 'max'), volume_m3=('volume_m3', 'sum'), energy_kwh=('energy_kwh', 'sum'), timestamp=('timestamp', 'min'), count=('energy_kwh', 'count')).reset_index()
454
+
455
+ # Sort rows by month ascending.
456
+ df = df.sort_values(by=['first_day_of_week'])
457
+
458
+ # Select rows where we have a full week (7 days) except for the current week.
459
+ df = pd.concat([df[(df["count"] >= 7)], df.tail(1)[df.tail(1)["count"] < 7]])
460
+
461
+ # Select target columns.
462
+ df = df[["time_period", "start_index_m3", "end_index_m3", "volume_m3", "energy_kwh", "timestamp"]]
463
+
464
+ res = cast(List[Dict[str, Any]], df.to_dict('records'))
465
+
466
+ return res
467
+
468
+ # ------------------------------------------------------
469
+ @staticmethod
470
+ def computeMonthly(daily: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
471
+
472
+ df = pd.DataFrame(daily)
473
+
474
+ # Trimming head and trailing spaces and convert to datetime.
475
+ df["date_time"] = pd.to_datetime(df["time_period"].str.strip(), format=JsonWebDataSource.OUTPUT_DATE_FORMAT)
476
+
477
+ # Get the corresponding month-year.
478
+ df["month_year"] = df["date_time"].apply(lambda x: FrequencyConverter.MONTHS[x.month - 1]).astype(str) + " " + df["date_time"].dt.strftime("%Y").astype(str)
479
+
480
+ # Aggregate rows by month_year.
481
+ df = df[["date_time", "month_year", "start_index_m3", "end_index_m3", "volume_m3", "energy_kwh", "timestamp"]].groupby("month_year").agg(first_day_of_month=('date_time', 'min'), start_index_m3=('start_index_m3', 'min'), end_index_m3=('end_index_m3', 'max'), volume_m3=('volume_m3', 'sum'), energy_kwh=('energy_kwh', 'sum'), timestamp=('timestamp', 'min'), count=('energy_kwh', 'count')).reset_index()
482
+
483
+ # Sort rows by month ascending.
484
+ df = df.sort_values(by=['first_day_of_month'])
485
+
486
+ # Select rows where we have a full month (more than 27 days) except for the current month.
487
+ df = pd.concat([df[(df["count"] >= 28)], df.tail(1)[df.tail(1)["count"] < 28]])
488
+
489
+ # Rename columns for their target names.
490
+ df = df.rename(columns={"month_year": "time_period"})
491
+
492
+ # Select target columns.
493
+ df = df[["time_period", "start_index_m3", "end_index_m3", "volume_m3", "energy_kwh", "timestamp"]]
494
+
495
+ res = cast(List[Dict[str, Any]], df.to_dict('records'))
496
+
497
+ return res
498
+
499
+ # ------------------------------------------------------
500
+ @staticmethod
501
+ def computeYearly(daily: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
502
+
503
+ df = pd.DataFrame(daily)
504
+
505
+ # Trimming head and trailing spaces and convert to datetime.
506
+ df["date_time"] = pd.to_datetime(df["time_period"].str.strip(), format=JsonWebDataSource.OUTPUT_DATE_FORMAT)
507
+
508
+ # Get the corresponding year.
509
+ df["year"] = df["date_time"].dt.strftime("%Y")
510
+
511
+ # Aggregate rows by month_year.
512
+ df = df[["year", "start_index_m3", "end_index_m3", "volume_m3", "energy_kwh", "timestamp"]].groupby("year").agg(start_index_m3=('start_index_m3', 'min'), end_index_m3=('end_index_m3', 'max'), volume_m3=('volume_m3', 'sum'), energy_kwh=('energy_kwh', 'sum'), timestamp=('timestamp', 'min'), count=('energy_kwh', 'count')).reset_index()
513
+
514
+ # Sort rows by month ascending.
515
+ df = df.sort_values(by=['year'])
516
+
517
+ # Select rows where we have almost a full year (more than 360) except for the current year.
518
+ df = pd.concat([df[(df["count"] >= 360)], df.tail(1)[df.tail(1)["count"] < 360]])
519
+
520
+ # Rename columns for their target names.
521
+ df = df.rename(columns={"year": "time_period"})
522
+
523
+ # Select target columns.
524
+ df = df[["time_period", "start_index_m3", "end_index_m3", "volume_m3", "energy_kwh", "timestamp"]]
525
+
526
+ res = cast(List[Dict[str, Any]], df.to_dict('records'))
527
+
528
+ return res