pygazpar 1.2.8__py312-none-any.whl → 1.3.0a11__py312-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
pygazpar/datasource.py CHANGED
@@ -1,528 +1,538 @@
1
- import logging
2
- import glob
3
- import os
4
- import json
5
- import time
6
- import pandas as pd
7
- import http.cookiejar
8
- from abc import ABC, abstractmethod
9
- from typing import Any, List, Dict, cast, Optional
10
- from requests import Session
11
- from datetime import date, timedelta
12
- from pygazpar.enum import Frequency, PropertyName
13
- from pygazpar.excelparser import ExcelParser
14
- from pygazpar.jsonparser import JsonParser
15
-
16
- SESSION_TOKEN_URL = "https://connexion.grdf.fr/api/v1/authn"
17
- SESSION_TOKEN_PAYLOAD = """{{
18
- "username": "{0}",
19
- "password": "{1}",
20
- "options": {{
21
- "multiOptionalFactorEnroll": "false",
22
- "warnBeforePasswordExpired": "false"
23
- }}
24
- }}"""
25
-
26
- AUTH_TOKEN_URL = "https://connexion.grdf.fr/login/sessionCookieRedirect"
27
- AUTH_TOKEN_PARAMS = """{{
28
- "checkAccountSetupComplete": "true",
29
- "token": "{0}",
30
- "redirectUrl": "https://monespace.grdf.fr"
31
- }}"""
32
-
33
- Logger = logging.getLogger(__name__)
34
-
35
- MeterReading = Dict[str, Any]
36
-
37
- MeterReadings = List[MeterReading]
38
-
39
- MeterReadingsByFrequency = Dict[str, MeterReadings]
40
-
41
-
42
- # ------------------------------------------------------------------------------------------------------------
43
- class IDataSource(ABC):
44
-
45
- @abstractmethod
46
- def load(self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
47
- pass
48
-
49
-
50
- # ------------------------------------------------------------------------------------------------------------
51
- class WebDataSource(IDataSource):
52
-
53
- # ------------------------------------------------------
54
- def __init__(self, username: str, password: str):
55
-
56
- self.__username = username
57
- self.__password = password
58
-
59
- # ------------------------------------------------------
60
- def load(self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
61
-
62
- self._login(self.__username, self.__password) # We ignore the return value.
63
-
64
- res = self._loadFromSession(pceIdentifier, startDate, endDate, frequencies)
65
-
66
- Logger.debug("The data update terminates normally")
67
-
68
- return res
69
-
70
- # ------------------------------------------------------
71
- def _login(self, username: str, password: str) -> str:
72
-
73
- session = Session()
74
- session.headers.update({"domain": "grdf.fr"})
75
- session.headers.update({"Content-Type": "application/json"})
76
- session.headers.update({"X-Requested-With": "XMLHttpRequest"})
77
-
78
- payload = SESSION_TOKEN_PAYLOAD.format(username, password)
79
-
80
- response = session.post(SESSION_TOKEN_URL, data=payload)
81
-
82
- if response.status_code != 200:
83
- raise Exception(f"An error occurred while logging in. Status code: {response.status_code} - {response.text}")
84
-
85
- session_token = response.json().get("sessionToken")
86
-
87
- Logger.debug("Session token: %s", session_token)
88
-
89
- jar = http.cookiejar.CookieJar()
90
-
91
- self._session = Session()
92
- self._session.headers.update({"Content-Type": "application/json"})
93
- self._session.headers.update({"X-Requested-With": "XMLHttpRequest"})
94
-
95
- params = json.loads(AUTH_TOKEN_PARAMS.format(session_token))
96
-
97
- response = self._session.get(AUTH_TOKEN_URL, params=params, allow_redirects=True, cookies=jar) # type: ignore
98
-
99
- if response.status_code != 200:
100
- raise Exception(f"An error occurred while getting the auth token. Status code: {response.status_code} - {response.text}")
101
-
102
- auth_token = self._session.cookies.get("auth_token", domain="monespace.grdf.fr")
103
-
104
- return auth_token # type: ignore
105
-
106
- @abstractmethod
107
- def _loadFromSession(self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
108
- pass
109
-
110
-
111
- # ------------------------------------------------------------------------------------------------------------
112
- class ExcelWebDataSource(WebDataSource):
113
-
114
- DATA_URL = "https://monespace.grdf.fr/api/e-conso/pce/consommation/informatives/telecharger?dateDebut={0}&dateFin={1}&frequence={3}&pceList[]={2}"
115
-
116
- DATE_FORMAT = "%Y-%m-%d"
117
-
118
- FREQUENCY_VALUES = {
119
- Frequency.HOURLY: "Horaire",
120
- Frequency.DAILY: "Journalier",
121
- Frequency.WEEKLY: "Hebdomadaire",
122
- Frequency.MONTHLY: "Mensuel",
123
- Frequency.YEARLY: "Journalier"
124
- }
125
-
126
- DATA_FILENAME = 'Donnees_informatives_*.xlsx'
127
-
128
- # ------------------------------------------------------
129
- def __init__(self, username: str, password: str, tmpDirectory: str):
130
-
131
- super().__init__(username, password)
132
-
133
- self.__tmpDirectory = tmpDirectory
134
-
135
- # ------------------------------------------------------
136
- def _loadFromSession(self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
137
-
138
- res = {}
139
-
140
- # XLSX is in the TMP directory
141
- data_file_path_pattern = self.__tmpDirectory + '/' + ExcelWebDataSource.DATA_FILENAME
142
-
143
- # We remove an eventual existing data file (from a previous run that has not deleted it).
144
- file_list = glob.glob(data_file_path_pattern)
145
- for filename in file_list:
146
- if os.path.isfile(filename):
147
- try:
148
- os.remove(filename)
149
- except PermissionError:
150
- pass
151
-
152
- if frequencies is None:
153
- # Transform Enum in List.
154
- frequencyList = [frequency for frequency in Frequency]
155
- else:
156
- # Get unique values.
157
- frequencyList = set(frequencies)
158
-
159
- for frequency in frequencyList:
160
- # Inject parameters.
161
- downloadUrl = ExcelWebDataSource.DATA_URL.format(startDate.strftime(ExcelWebDataSource.DATE_FORMAT), endDate.strftime(ExcelWebDataSource.DATE_FORMAT), pceIdentifier, ExcelWebDataSource.FREQUENCY_VALUES[frequency])
162
-
163
- Logger.debug(f"Loading data of frequency {ExcelWebDataSource.FREQUENCY_VALUES[frequency]} from {startDate.strftime(ExcelWebDataSource.DATE_FORMAT)} to {endDate.strftime(ExcelWebDataSource.DATE_FORMAT)}")
164
-
165
- # Retry mechanism.
166
- retry = 10
167
- while retry > 0:
168
-
169
- try:
170
- self.__downloadFile(self._session, downloadUrl, self.__tmpDirectory)
171
- break
172
- except Exception as e:
173
-
174
- if retry == 1:
175
- raise e
176
-
177
- Logger.error("An error occurred while loading data. Retry in 3 seconds.")
178
- time.sleep(3)
179
- retry -= 1
180
-
181
- # Load the XLSX file into the data structure
182
- file_list = glob.glob(data_file_path_pattern)
183
-
184
- if len(file_list) == 0:
185
- Logger.warning(f"Not any data file has been found in '{self.__tmpDirectory}' directory")
186
-
187
- for filename in file_list:
188
- res[frequency.value] = ExcelParser.parse(filename, frequency if frequency != Frequency.YEARLY else Frequency.DAILY)
189
- try:
190
- # openpyxl does not close the file properly.
191
- os.remove(filename)
192
- except PermissionError:
193
- pass
194
-
195
- # We compute yearly from daily data.
196
- if frequency == Frequency.YEARLY:
197
- res[frequency.value] = FrequencyConverter.computeYearly(res[frequency.value])
198
-
199
- return res
200
-
201
- # ------------------------------------------------------
202
- def __downloadFile(self, session: Session, url: str, path: str):
203
-
204
- response = session.get(url)
205
-
206
- if "text/html" in response.headers.get("Content-Type"): # type: ignore
207
- raise Exception("An error occurred while loading data. Please check your credentials.")
208
-
209
- if response.status_code != 200:
210
- raise Exception(f"An error occurred while loading data. Status code: {response.status_code} - {response.text}")
211
-
212
- response.raise_for_status()
213
-
214
- filename = response.headers["Content-Disposition"].split("filename=")[1]
215
-
216
- open(f"{path}/{filename}", "wb").write(response.content)
217
-
218
-
219
- # ------------------------------------------------------------------------------------------------------------
220
- class ExcelFileDataSource(IDataSource):
221
-
222
- def __init__(self, excelFile: str):
223
-
224
- self.__excelFile = excelFile
225
-
226
- def load(self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
227
-
228
- res = {}
229
-
230
- if frequencies is None:
231
- # Transform Enum in List.
232
- frequencyList = [frequency for frequency in Frequency]
233
- else:
234
- # Get unique values.
235
- frequencyList = set(frequencies)
236
-
237
- for frequency in frequencyList:
238
- if frequency != Frequency.YEARLY:
239
- res[frequency.value] = ExcelParser.parse(self.__excelFile, frequency)
240
- else:
241
- daily = ExcelParser.parse(self.__excelFile, Frequency.DAILY)
242
- res[frequency.value] = FrequencyConverter.computeYearly(daily)
243
-
244
- return res
245
-
246
-
247
- # ------------------------------------------------------------------------------------------------------------
248
- class JsonWebDataSource(WebDataSource):
249
-
250
- DATA_URL = "https://monespace.grdf.fr/api/e-conso/pce/consommation/informatives?dateDebut={0}&dateFin={1}&pceList[]={2}"
251
-
252
- TEMPERATURES_URL = "https://monespace.grdf.fr/api/e-conso/pce/{0}/meteo?dateFinPeriode={1}&nbJours={2}"
253
-
254
- INPUT_DATE_FORMAT = "%Y-%m-%d"
255
-
256
- OUTPUT_DATE_FORMAT = "%d/%m/%Y"
257
-
258
- def __init__(self, username: str, password: str):
259
-
260
- super().__init__(username, password)
261
-
262
- def _loadFromSession(self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
263
-
264
- res = {}
265
-
266
- computeByFrequency = {
267
- Frequency.HOURLY: FrequencyConverter.computeHourly,
268
- Frequency.DAILY: FrequencyConverter.computeDaily,
269
- Frequency.WEEKLY: FrequencyConverter.computeWeekly,
270
- Frequency.MONTHLY: FrequencyConverter.computeMonthly,
271
- Frequency.YEARLY: FrequencyConverter.computeYearly
272
- }
273
-
274
- # Data URL: Inject parameters.
275
- downloadUrl = JsonWebDataSource.DATA_URL.format(startDate.strftime(JsonWebDataSource.INPUT_DATE_FORMAT), endDate.strftime(JsonWebDataSource.INPUT_DATE_FORMAT), pceIdentifier)
276
-
277
- # Retry mechanism.
278
- retry = 10
279
- while retry > 0:
280
-
281
- try:
282
- response = self._session.get(downloadUrl)
283
-
284
- if "text/html" in response.headers.get("Content-Type"): # type: ignore
285
- raise Exception("An error occurred while loading data. Please check your credentials.")
286
-
287
- if response.status_code != 200:
288
- raise Exception(f"An error occurred while loading data. Status code: {response.status_code} - {response.text}")
289
-
290
- break
291
- except Exception as e:
292
-
293
- if retry == 1:
294
- raise e
295
-
296
- Logger.error("An error occurred while loading data. Retry in 3 seconds.")
297
- time.sleep(3)
298
- retry -= 1
299
-
300
- data = response.text
301
-
302
- Logger.debug("Json meter data: %s", data)
303
-
304
- # Temperatures URL: Inject parameters.
305
- endDate = date.today() - timedelta(days=1) if endDate >= date.today() else endDate
306
- days = min((endDate - startDate).days, 730)
307
- temperaturesUrl = JsonWebDataSource.TEMPERATURES_URL.format(pceIdentifier, endDate.strftime(JsonWebDataSource.INPUT_DATE_FORMAT), days)
308
-
309
- # Get weather data.
310
- temperatures = self._session.get(temperaturesUrl).text
311
-
312
- Logger.debug("Json temperature data: %s", temperatures)
313
-
314
- # Transform all the data into the target structure.
315
- daily = JsonParser.parse(data, temperatures, pceIdentifier)
316
-
317
- Logger.debug("Processed daily data: %s", daily)
318
-
319
- if frequencies is None:
320
- # Transform Enum in List.
321
- frequencyList = [frequency for frequency in Frequency]
322
- else:
323
- # Get unique values.
324
- frequencyList = set(frequencies)
325
-
326
- for frequency in frequencyList:
327
- res[frequency.value] = computeByFrequency[frequency](daily)
328
-
329
- return res
330
-
331
-
332
- # ------------------------------------------------------------------------------------------------------------
333
- class JsonFileDataSource(IDataSource):
334
-
335
- def __init__(self, consumptionJsonFile: str, temperatureJsonFile):
336
-
337
- self.__consumptionJsonFile = consumptionJsonFile
338
- self.__temperatureJsonFile = temperatureJsonFile
339
-
340
- def load(self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
341
-
342
- res = {}
343
-
344
- with open(self.__consumptionJsonFile) as consumptionJsonFile:
345
- with open(self.__temperatureJsonFile) as temperatureJsonFile:
346
- daily = JsonParser.parse(consumptionJsonFile.read(), temperatureJsonFile.read(), pceIdentifier)
347
-
348
- computeByFrequency = {
349
- Frequency.HOURLY: FrequencyConverter.computeHourly,
350
- Frequency.DAILY: FrequencyConverter.computeDaily,
351
- Frequency.WEEKLY: FrequencyConverter.computeWeekly,
352
- Frequency.MONTHLY: FrequencyConverter.computeMonthly,
353
- Frequency.YEARLY: FrequencyConverter.computeYearly
354
- }
355
-
356
- if frequencies is None:
357
- # Transform Enum in List.
358
- frequencyList = [frequency for frequency in Frequency]
359
- else:
360
- # Get unique values.
361
- frequencyList = set(frequencies)
362
-
363
- for frequency in frequencyList:
364
- res[frequency.value] = computeByFrequency[frequency](daily)
365
-
366
- return res
367
-
368
-
369
- # ------------------------------------------------------------------------------------------------------------
370
- class TestDataSource(IDataSource):
371
-
372
- def __init__(self):
373
-
374
- pass
375
-
376
- def load(self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
377
-
378
- res = {}
379
-
380
- dataSampleFilenameByFrequency = {
381
- Frequency.HOURLY: "hourly_data_sample.json",
382
- Frequency.DAILY: "daily_data_sample.json",
383
- Frequency.WEEKLY: "weekly_data_sample.json",
384
- Frequency.MONTHLY: "monthly_data_sample.json",
385
- Frequency.YEARLY: "yearly_data_sample.json"
386
- }
387
-
388
- if frequencies is None:
389
- # Transform Enum in List.
390
- frequencyList = [frequency for frequency in Frequency]
391
- else:
392
- # Get unique values.
393
- frequencyList = set(frequencies)
394
-
395
- for frequency in frequencyList:
396
- dataSampleFilename = f"{os.path.dirname(os.path.abspath(__file__))}/resources/{dataSampleFilenameByFrequency[frequency]}"
397
-
398
- with open(dataSampleFilename) as jsonFile:
399
- res[frequency.value] = cast(List[Dict[PropertyName, Any]], json.load(jsonFile))
400
-
401
- return res
402
-
403
-
404
- # ------------------------------------------------------------------------------------------------------------
405
- class FrequencyConverter:
406
-
407
- MONTHS = [
408
- "Janvier",
409
- "Février",
410
- "Mars",
411
- "Avril",
412
- "Mai",
413
- "Juin",
414
- "Juillet",
415
- "Août",
416
- "Septembre",
417
- "Octobre",
418
- "Novembre",
419
- "Décembre"
420
- ]
421
-
422
- # ------------------------------------------------------
423
- @staticmethod
424
- def computeHourly(daily: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
425
-
426
- return []
427
-
428
- # ------------------------------------------------------
429
- @staticmethod
430
- def computeDaily(daily: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
431
-
432
- return daily
433
-
434
- # ------------------------------------------------------
435
- @staticmethod
436
- def computeWeekly(daily: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
437
-
438
- df = pd.DataFrame(daily)
439
-
440
- # Trimming head and trailing spaces and convert to datetime.
441
- df["date_time"] = pd.to_datetime(df["time_period"].str.strip(), format=JsonWebDataSource.OUTPUT_DATE_FORMAT)
442
-
443
- # Get the first day of week.
444
- df["first_day_of_week"] = pd.to_datetime(df["date_time"].dt.strftime("%W %Y 1"), format="%W %Y %w")
445
-
446
- # Get the last day of week.
447
- df["last_day_of_week"] = pd.to_datetime(df["date_time"].dt.strftime("%W %Y 0"), format="%W %Y %w")
448
-
449
- # Reformat the time period.
450
- df["time_period"] = "Du " + df["first_day_of_week"].dt.strftime(JsonWebDataSource.OUTPUT_DATE_FORMAT).astype(str) + " au " + df["last_day_of_week"].dt.strftime(JsonWebDataSource.OUTPUT_DATE_FORMAT).astype(str)
451
-
452
- # Aggregate rows by month_year.
453
- df = df[["first_day_of_week", "time_period", "start_index_m3", "end_index_m3", "volume_m3", "energy_kwh", "timestamp"]].groupby("time_period").agg(first_day_of_week=('first_day_of_week', 'min'), start_index_m3=('start_index_m3', 'min'), end_index_m3=('end_index_m3', 'max'), volume_m3=('volume_m3', 'sum'), energy_kwh=('energy_kwh', 'sum'), timestamp=('timestamp', 'min'), count=('energy_kwh', 'count')).reset_index()
454
-
455
- # Sort rows by month ascending.
456
- df = df.sort_values(by=['first_day_of_week'])
457
-
458
- # Select rows where we have a full week (7 days) except for the current week.
459
- df = pd.concat([df[(df["count"] >= 7)], df.tail(1)[df.tail(1)["count"] < 7]])
460
-
461
- # Select target columns.
462
- df = df[["time_period", "start_index_m3", "end_index_m3", "volume_m3", "energy_kwh", "timestamp"]]
463
-
464
- res = cast(List[Dict[str, Any]], df.to_dict('records'))
465
-
466
- return res
467
-
468
- # ------------------------------------------------------
469
- @staticmethod
470
- def computeMonthly(daily: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
471
-
472
- df = pd.DataFrame(daily)
473
-
474
- # Trimming head and trailing spaces and convert to datetime.
475
- df["date_time"] = pd.to_datetime(df["time_period"].str.strip(), format=JsonWebDataSource.OUTPUT_DATE_FORMAT)
476
-
477
- # Get the corresponding month-year.
478
- df["month_year"] = df["date_time"].apply(lambda x: FrequencyConverter.MONTHS[x.month - 1]).astype(str) + " " + df["date_time"].dt.strftime("%Y").astype(str)
479
-
480
- # Aggregate rows by month_year.
481
- df = df[["date_time", "month_year", "start_index_m3", "end_index_m3", "volume_m3", "energy_kwh", "timestamp"]].groupby("month_year").agg(first_day_of_month=('date_time', 'min'), start_index_m3=('start_index_m3', 'min'), end_index_m3=('end_index_m3', 'max'), volume_m3=('volume_m3', 'sum'), energy_kwh=('energy_kwh', 'sum'), timestamp=('timestamp', 'min'), count=('energy_kwh', 'count')).reset_index()
482
-
483
- # Sort rows by month ascending.
484
- df = df.sort_values(by=['first_day_of_month'])
485
-
486
- # Select rows where we have a full month (more than 27 days) except for the current month.
487
- df = pd.concat([df[(df["count"] >= 28)], df.tail(1)[df.tail(1)["count"] < 28]])
488
-
489
- # Rename columns for their target names.
490
- df = df.rename(columns={"month_year": "time_period"})
491
-
492
- # Select target columns.
493
- df = df[["time_period", "start_index_m3", "end_index_m3", "volume_m3", "energy_kwh", "timestamp"]]
494
-
495
- res = cast(List[Dict[str, Any]], df.to_dict('records'))
496
-
497
- return res
498
-
499
- # ------------------------------------------------------
500
- @staticmethod
501
- def computeYearly(daily: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
502
-
503
- df = pd.DataFrame(daily)
504
-
505
- # Trimming head and trailing spaces and convert to datetime.
506
- df["date_time"] = pd.to_datetime(df["time_period"].str.strip(), format=JsonWebDataSource.OUTPUT_DATE_FORMAT)
507
-
508
- # Get the corresponding year.
509
- df["year"] = df["date_time"].dt.strftime("%Y")
510
-
511
- # Aggregate rows by month_year.
512
- df = df[["year", "start_index_m3", "end_index_m3", "volume_m3", "energy_kwh", "timestamp"]].groupby("year").agg(start_index_m3=('start_index_m3', 'min'), end_index_m3=('end_index_m3', 'max'), volume_m3=('volume_m3', 'sum'), energy_kwh=('energy_kwh', 'sum'), timestamp=('timestamp', 'min'), count=('energy_kwh', 'count')).reset_index()
513
-
514
- # Sort rows by month ascending.
515
- df = df.sort_values(by=['year'])
516
-
517
- # Select rows where we have almost a full year (more than 360) except for the current year.
518
- df = pd.concat([df[(df["count"] >= 360)], df.tail(1)[df.tail(1)["count"] < 360]])
519
-
520
- # Rename columns for their target names.
521
- df = df.rename(columns={"year": "time_period"})
522
-
523
- # Select target columns.
524
- df = df[["time_period", "start_index_m3", "end_index_m3", "volume_m3", "energy_kwh", "timestamp"]]
525
-
526
- res = cast(List[Dict[str, Any]], df.to_dict('records'))
527
-
528
- return res
1
+ import logging
2
+ import glob
3
+ import os
4
+ import json
5
+ import time
6
+ import pandas as pd
7
+ import http.cookiejar
8
+ from abc import ABC, abstractmethod
9
+ from typing import Any, List, Dict, cast, Optional
10
+ from requests import Session
11
+ from datetime import date, timedelta
12
+ from pygazpar.enum import Frequency, PropertyName
13
+ from pygazpar.excelparser import ExcelParser
14
+ from pygazpar.jsonparser import JsonParser
15
+
16
+ SESSION_TOKEN_URL = "https://connexion.grdf.fr/api/v1/authn"
17
+ SESSION_TOKEN_PAYLOAD = """{{
18
+ "username": "{0}",
19
+ "password": "{1}",
20
+ "options": {{
21
+ "multiOptionalFactorEnroll": "false",
22
+ "warnBeforePasswordExpired": "false"
23
+ }}
24
+ }}"""
25
+
26
+ AUTH_TOKEN_URL = "https://connexion.grdf.fr/login/sessionCookieRedirect"
27
+ AUTH_TOKEN_PARAMS = """{{
28
+ "checkAccountSetupComplete": "true",
29
+ "token": "{0}",
30
+ "redirectUrl": "https://monespace.grdf.fr"
31
+ }}"""
32
+
33
+ Logger = logging.getLogger(__name__)
34
+
35
+ MeterReading = Dict[str, Any]
36
+
37
+ MeterReadings = List[MeterReading]
38
+
39
+ MeterReadingsByFrequency = Dict[str, MeterReadings]
40
+
41
+
42
+ # ------------------------------------------------------------------------------------------------------------
43
+ class IDataSource(ABC):
44
+
45
+ @abstractmethod
46
+ def load(self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
47
+ pass
48
+
49
+
50
+ # ------------------------------------------------------------------------------------------------------------
51
+ class WebDataSource(IDataSource):
52
+
53
+ # ------------------------------------------------------
54
+ def __init__(self, username: str, password: str):
55
+
56
+ self.__username = username
57
+ self.__password = password
58
+
59
+ # ------------------------------------------------------
60
+ def load(self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
61
+
62
+ auth_token = self._login(self.__username, self.__password)
63
+
64
+ res = self._loadFromSession(auth_token, pceIdentifier, startDate, endDate, frequencies)
65
+
66
+ Logger.debug("The data update terminates normally")
67
+
68
+ return res
69
+
70
+ # ------------------------------------------------------
71
+ def _login(self, username: str, password: str) -> str:
72
+
73
+ session = Session()
74
+ session.headers.update({"domain": "grdf.fr"})
75
+ session.headers.update({"Content-Type": "application/json"})
76
+ session.headers.update({"X-Requested-With": "XMLHttpRequest"})
77
+
78
+ payload = SESSION_TOKEN_PAYLOAD.format(username, password)
79
+
80
+ response = session.post(SESSION_TOKEN_URL, data=payload)
81
+
82
+ if response.status_code != 200:
83
+ raise Exception(f"An error occurred while logging in. Status code: {response.status_code} - {response.text}")
84
+
85
+ session_token = response.json().get("sessionToken")
86
+
87
+ Logger.debug("Session token: %s", session_token)
88
+
89
+ jar = http.cookiejar.CookieJar()
90
+
91
+ session = Session()
92
+ session.headers.update({"Content-Type": "application/json"})
93
+ session.headers.update({"X-Requested-With": "XMLHttpRequest"})
94
+
95
+ params = json.loads(AUTH_TOKEN_PARAMS.format(session_token))
96
+
97
+ response = session.get(AUTH_TOKEN_URL, params=params, allow_redirects=True, cookies=jar)
98
+
99
+ if response.status_code != 200:
100
+ raise Exception(f"An error occurred while getting the auth token. Status code: {response.status_code} - {response.text}")
101
+
102
+ auth_token = session.cookies.get("auth_token", domain="monespace.grdf.fr")
103
+
104
+ return auth_token
105
+
106
+ @abstractmethod
107
+ def _loadFromSession(self, auth_token: str, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
108
+ pass
109
+
110
+
111
+ # ------------------------------------------------------------------------------------------------------------
112
+ class ExcelWebDataSource(WebDataSource):
113
+
114
+ DATA_URL = "https://monespace.grdf.fr/api/e-conso/pce/consommation/informatives/telecharger?dateDebut={0}&dateFin={1}&frequence={3}&pceList[]={2}"
115
+
116
+ DATE_FORMAT = "%Y-%m-%d"
117
+
118
+ FREQUENCY_VALUES = {
119
+ Frequency.HOURLY: "Horaire",
120
+ Frequency.DAILY: "Journalier",
121
+ Frequency.WEEKLY: "Hebdomadaire",
122
+ Frequency.MONTHLY: "Mensuel",
123
+ Frequency.YEARLY: "Journalier"
124
+ }
125
+
126
+ DATA_FILENAME = 'Donnees_informatives_*.xlsx'
127
+
128
+ # ------------------------------------------------------
129
+ def __init__(self, username: str, password: str, tmpDirectory: str):
130
+
131
+ super().__init__(username, password)
132
+
133
+ self.__tmpDirectory = tmpDirectory
134
+
135
+ # ------------------------------------------------------
136
+ def _loadFromSession(self, auth_token: str, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
137
+
138
+ res = {}
139
+
140
+ # XLSX is in the TMP directory
141
+ data_file_path_pattern = self.__tmpDirectory + '/' + ExcelWebDataSource.DATA_FILENAME
142
+
143
+ # We remove an eventual existing data file (from a previous run that has not deleted it).
144
+ file_list = glob.glob(data_file_path_pattern)
145
+ for filename in file_list:
146
+ if os.path.isfile(filename):
147
+ try:
148
+ os.remove(filename)
149
+ except PermissionError:
150
+ pass
151
+
152
+ if frequencies is None:
153
+ # Transform Enum in List.
154
+ frequencyList = [frequency for frequency in Frequency]
155
+ else:
156
+ # Get unique values.
157
+ frequencyList = set(frequencies)
158
+
159
+ for frequency in frequencyList:
160
+ # Inject parameters.
161
+ downloadUrl = ExcelWebDataSource.DATA_URL.format(startDate.strftime(ExcelWebDataSource.DATE_FORMAT), endDate.strftime(ExcelWebDataSource.DATE_FORMAT), pceIdentifier, ExcelWebDataSource.FREQUENCY_VALUES[frequency])
162
+
163
+ Logger.debug(f"Loading data of frequency {ExcelWebDataSource.FREQUENCY_VALUES[frequency]} from {startDate.strftime(ExcelWebDataSource.DATE_FORMAT)} to {endDate.strftime(ExcelWebDataSource.DATE_FORMAT)}")
164
+
165
+ # Retry mechanism.
166
+ retry = 10
167
+ while retry > 0:
168
+
169
+ # Create a session.
170
+ session = Session()
171
+ session.headers.update({"Host": "monespace.grdf.fr"})
172
+ session.headers.update({"Domain": "grdf.fr"})
173
+ session.headers.update({"X-Requested-With": "XMLHttpRequest"})
174
+ session.headers.update({"Accept": "application/json"})
175
+ session.cookies.set("auth_token", auth_token, domain="monespace.grdf.fr")
176
+
177
+ try:
178
+ self.__downloadFile(session, downloadUrl, self.__tmpDirectory)
179
+ break
180
+ except Exception as e:
181
+
182
+ if retry == 1:
183
+ raise e
184
+
185
+ Logger.error("An error occurred while loading data. Retry in 3 seconds.")
186
+ time.sleep(3)
187
+ retry -= 1
188
+
189
+ # Load the XLSX file into the data structure
190
+ file_list = glob.glob(data_file_path_pattern)
191
+
192
+ if len(file_list) == 0:
193
+ Logger.warning(f"Not any data file has been found in '{self.__tmpDirectory}' directory")
194
+
195
+ for filename in file_list:
196
+ res[frequency.value] = ExcelParser.parse(filename, frequency if frequency != Frequency.YEARLY else Frequency.DAILY)
197
+ try:
198
+ # openpyxl does not close the file properly.
199
+ os.remove(filename)
200
+ except PermissionError:
201
+ pass
202
+
203
+ # We compute yearly from daily data.
204
+ if frequency == Frequency.YEARLY:
205
+ res[frequency.value] = FrequencyConverter.computeYearly(res[frequency.value])
206
+
207
+ return res
208
+
209
+ # ------------------------------------------------------
210
+ def __downloadFile(self, session: Session, url: str, path: str):
211
+
212
+ response = session.get(url)
213
+
214
+ if "text/html" in response.headers.get("Content-Type"):
215
+ raise Exception("An error occurred while loading data. Please check your credentials.")
216
+
217
+ if response.status_code != 200:
218
+ raise Exception(f"An error occurred while loading data. Status code: {response.status_code} - {response.text}")
219
+
220
+ response.raise_for_status()
221
+
222
+ filename = response.headers["Content-Disposition"].split("filename=")[1]
223
+
224
+ open(f"{path}/{filename}", "wb").write(response.content)
225
+
226
+
227
+ # ------------------------------------------------------------------------------------------------------------
228
+ class ExcelFileDataSource(IDataSource):
229
+
230
+ def __init__(self, excelFile: str):
231
+
232
+ self.__excelFile = excelFile
233
+
234
+ def load(self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
235
+
236
+ res = {}
237
+
238
+ if frequencies is None:
239
+ # Transform Enum in List.
240
+ frequencyList = [frequency for frequency in Frequency]
241
+ else:
242
+ # Get unique values.
243
+ frequencyList = set(frequencies)
244
+
245
+ for frequency in frequencyList:
246
+ if frequency != Frequency.YEARLY:
247
+ res[frequency.value] = ExcelParser.parse(self.__excelFile, frequency)
248
+ else:
249
+ daily = ExcelParser.parse(self.__excelFile, Frequency.DAILY)
250
+ res[frequency.value] = FrequencyConverter.computeYearly(daily)
251
+
252
+ return res
253
+
254
+
255
+ # ------------------------------------------------------------------------------------------------------------
256
+ class JsonWebDataSource(WebDataSource):
257
+
258
+ DATA_URL = "https://monespace.grdf.fr/api/e-conso/pce/consommation/informatives?dateDebut={0}&dateFin={1}&pceList[]={2}"
259
+
260
+ TEMPERATURES_URL = "https://monespace.grdf.fr/api/e-conso/pce/{0}/meteo?dateFinPeriode={1}&nbJours={2}"
261
+
262
+ INPUT_DATE_FORMAT = "%Y-%m-%d"
263
+
264
+ OUTPUT_DATE_FORMAT = "%d/%m/%Y"
265
+
266
+ def __init__(self, username: str, password: str):
267
+
268
+ super().__init__(username, password)
269
+
270
+ def _loadFromSession(self, auth_token: str, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
271
+
272
+ res = {}
273
+
274
+ computeByFrequency = {
275
+ Frequency.HOURLY: FrequencyConverter.computeHourly,
276
+ Frequency.DAILY: FrequencyConverter.computeDaily,
277
+ Frequency.WEEKLY: FrequencyConverter.computeWeekly,
278
+ Frequency.MONTHLY: FrequencyConverter.computeMonthly,
279
+ Frequency.YEARLY: FrequencyConverter.computeYearly
280
+ }
281
+
282
+ # Data URL: Inject parameters.
283
+ downloadUrl = JsonWebDataSource.DATA_URL.format(startDate.strftime(JsonWebDataSource.INPUT_DATE_FORMAT), endDate.strftime(JsonWebDataSource.INPUT_DATE_FORMAT), pceIdentifier)
284
+
285
+ # Retry mechanism.
286
+ retry = 10
287
+ while retry > 0:
288
+
289
+ # Create a session.
290
+ session = Session()
291
+ session.headers.update({"Host": "monespace.grdf.fr"})
292
+ session.headers.update({"Domain": "grdf.fr"})
293
+ session.headers.update({"X-Requested-With": "XMLHttpRequest"})
294
+ session.headers.update({"Accept": "application/json"})
295
+ session.cookies.set("auth_token", auth_token, domain="monespace.grdf.fr")
296
+
297
+ try:
298
+ response = session.get(downloadUrl)
299
+
300
+ if "text/html" in response.headers.get("Content-Type"):
301
+ raise Exception("An error occurred while loading data. Please check your credentials.")
302
+
303
+ if response.status_code != 200:
304
+ raise Exception(f"An error occurred while loading data. Status code: {response.status_code} - {response.text}")
305
+
306
+ break
307
+ except Exception as e:
308
+
309
+ if retry == 1:
310
+ raise e
311
+
312
+ Logger.error("An error occurred while loading data. Retry in 3 seconds.")
313
+ time.sleep(3)
314
+ retry -= 1
315
+
316
+ data = response.text
317
+
318
+ # Temperatures URL: Inject parameters.
319
+ endDate = date.today() - timedelta(days=1) if endDate >= date.today() else endDate
320
+ days = min((endDate - startDate).days, 730)
321
+ temperaturesUrl = JsonWebDataSource.TEMPERATURES_URL.format(pceIdentifier, endDate.strftime(JsonWebDataSource.INPUT_DATE_FORMAT), days)
322
+
323
+ # Get weather data.
324
+ temperatures = session.get(temperaturesUrl).text
325
+
326
+ # Transform all the data into the target structure.
327
+ daily = JsonParser.parse(data, temperatures, pceIdentifier)
328
+
329
+ if frequencies is None:
330
+ # Transform Enum in List.
331
+ frequencyList = [frequency for frequency in Frequency]
332
+ else:
333
+ # Get unique values.
334
+ frequencyList = set(frequencies)
335
+
336
+ for frequency in frequencyList:
337
+ res[frequency.value] = computeByFrequency[frequency](daily)
338
+
339
+ return res
340
+
341
+
342
+ # ------------------------------------------------------------------------------------------------------------
343
+ class JsonFileDataSource(IDataSource):
344
+
345
+ def __init__(self, consumptionJsonFile: str, temperatureJsonFile):
346
+
347
+ self.__consumptionJsonFile = consumptionJsonFile
348
+ self.__temperatureJsonFile = temperatureJsonFile
349
+
350
+ def load(self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
351
+
352
+ res = {}
353
+
354
+ with open(self.__consumptionJsonFile) as consumptionJsonFile:
355
+ with open(self.__temperatureJsonFile) as temperatureJsonFile:
356
+ daily = JsonParser.parse(consumptionJsonFile.read(), temperatureJsonFile.read(), pceIdentifier)
357
+
358
+ computeByFrequency = {
359
+ Frequency.HOURLY: FrequencyConverter.computeHourly,
360
+ Frequency.DAILY: FrequencyConverter.computeDaily,
361
+ Frequency.WEEKLY: FrequencyConverter.computeWeekly,
362
+ Frequency.MONTHLY: FrequencyConverter.computeMonthly,
363
+ Frequency.YEARLY: FrequencyConverter.computeYearly
364
+ }
365
+
366
+ if frequencies is None:
367
+ # Transform Enum in List.
368
+ frequencyList = [frequency for frequency in Frequency]
369
+ else:
370
+ # Get unique values.
371
+ frequencyList = set(frequencies)
372
+
373
+ for frequency in frequencyList:
374
+ res[frequency.value] = computeByFrequency[frequency](daily)
375
+
376
+ return res
377
+
378
+
379
+ # ------------------------------------------------------------------------------------------------------------
380
+ class TestDataSource(IDataSource):
381
+
382
+ def __init__(self):
383
+
384
+ pass
385
+
386
+ def load(self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
387
+
388
+ res = {}
389
+
390
+ dataSampleFilenameByFrequency = {
391
+ Frequency.HOURLY: "hourly_data_sample.json",
392
+ Frequency.DAILY: "daily_data_sample.json",
393
+ Frequency.WEEKLY: "weekly_data_sample.json",
394
+ Frequency.MONTHLY: "monthly_data_sample.json",
395
+ Frequency.YEARLY: "yearly_data_sample.json"
396
+ }
397
+
398
+ if frequencies is None:
399
+ # Transform Enum in List.
400
+ frequencyList = [frequency for frequency in Frequency]
401
+ else:
402
+ # Get unique values.
403
+ frequencyList = set(frequencies)
404
+
405
+ for frequency in frequencyList:
406
+ dataSampleFilename = f"{os.path.dirname(os.path.abspath(__file__))}/resources/{dataSampleFilenameByFrequency[frequency]}"
407
+
408
+ with open(dataSampleFilename) as jsonFile:
409
+ res[frequency.value] = cast(List[Dict[PropertyName, Any]], json.load(jsonFile))
410
+
411
+ return res
412
+
413
+
414
+ # ------------------------------------------------------------------------------------------------------------
415
+ class FrequencyConverter:
416
+
417
+ MONTHS = [
418
+ "Janvier",
419
+ "Février",
420
+ "Mars",
421
+ "Avril",
422
+ "Mai",
423
+ "Juin",
424
+ "Juillet",
425
+ "Août",
426
+ "Septembre",
427
+ "Octobre",
428
+ "Novembre",
429
+ "Décembre"
430
+ ]
431
+
432
+ # ------------------------------------------------------
433
+ @staticmethod
434
+ def computeHourly(daily: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
435
+
436
+ return []
437
+
438
+ # ------------------------------------------------------
439
+ @staticmethod
440
+ def computeDaily(daily: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
441
+
442
+ return daily
443
+
444
+ # ------------------------------------------------------
445
+ @staticmethod
446
+ def computeWeekly(daily: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
447
+
448
+ df = pd.DataFrame(daily)
449
+
450
+ # Trimming head and trailing spaces and convert to datetime.
451
+ df["date_time"] = pd.to_datetime(df["time_period"].str.strip(), format=JsonWebDataSource.OUTPUT_DATE_FORMAT)
452
+
453
+ # Get the first day of week.
454
+ df["first_day_of_week"] = pd.to_datetime(df["date_time"].dt.strftime("%W %Y 1"), format="%W %Y %w")
455
+
456
+ # Get the last day of week.
457
+ df["last_day_of_week"] = pd.to_datetime(df["date_time"].dt.strftime("%W %Y 0"), format="%W %Y %w")
458
+
459
+ # Reformat the time period.
460
+ df["time_period"] = "Du " + df["first_day_of_week"].dt.strftime(JsonWebDataSource.OUTPUT_DATE_FORMAT).astype(str) + " au " + df["last_day_of_week"].dt.strftime(JsonWebDataSource.OUTPUT_DATE_FORMAT).astype(str)
461
+
462
+ # Aggregate rows by month_year.
463
+ df = df[["first_day_of_week", "time_period", "start_index_m3", "end_index_m3", "volume_m3", "energy_kwh", "timestamp"]].groupby("time_period").agg(first_day_of_week=('first_day_of_week', 'min'), start_index_m3=('start_index_m3', 'min'), end_index_m3=('end_index_m3', 'max'), volume_m3=('volume_m3', 'sum'), energy_kwh=('energy_kwh', 'sum'), timestamp=('timestamp', 'min'), count=('energy_kwh', 'count')).reset_index()
464
+
465
+ # Sort rows by month ascending.
466
+ df = df.sort_values(by=['first_day_of_week'])
467
+
468
+ # Select rows where we have a full week (7 days) except for the current week.
469
+ df = pd.concat([df[(df["count"] >= 7)], df.tail(1)[df["count"] < 7]])
470
+
471
+ # Select target columns.
472
+ df = df[["time_period", "start_index_m3", "end_index_m3", "volume_m3", "energy_kwh", "timestamp"]]
473
+
474
+ res = cast(List[Dict[str, Any]], df.to_dict('records'))
475
+
476
+ return res
477
+
478
+ # ------------------------------------------------------
479
+ @staticmethod
480
+ def computeMonthly(daily: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
481
+
482
+ df = pd.DataFrame(daily)
483
+
484
+ # Trimming head and trailing spaces and convert to datetime.
485
+ df["date_time"] = pd.to_datetime(df["time_period"].str.strip(), format=JsonWebDataSource.OUTPUT_DATE_FORMAT)
486
+
487
+ # Get the corresponding month-year.
488
+ df["month_year"] = df["date_time"].apply(lambda x: FrequencyConverter.MONTHS[x.month - 1]).astype(str) + " " + df["date_time"].dt.strftime("%Y").astype(str)
489
+
490
+ # Aggregate rows by month_year.
491
+ df = df[["date_time", "month_year", "start_index_m3", "end_index_m3", "volume_m3", "energy_kwh", "timestamp"]].groupby("month_year").agg(first_day_of_month=('date_time', 'min'), start_index_m3=('start_index_m3', 'min'), end_index_m3=('end_index_m3', 'max'), volume_m3=('volume_m3', 'sum'), energy_kwh=('energy_kwh', 'sum'), timestamp=('timestamp', 'min'), count=('energy_kwh', 'count')).reset_index()
492
+
493
+ # Sort rows by month ascending.
494
+ df = df.sort_values(by=['first_day_of_month'])
495
+
496
+ # Select rows where we have a full month (more than 27 days) except for the current month.
497
+ df = pd.concat([df[(df["count"] >= 28)], df.tail(1)[df["count"] < 28]])
498
+
499
+ # Rename columns for their target names.
500
+ df = df.rename(columns={"month_year": "time_period"})
501
+
502
+ # Select target columns.
503
+ df = df[["time_period", "start_index_m3", "end_index_m3", "volume_m3", "energy_kwh", "timestamp"]]
504
+
505
+ res = cast(List[Dict[str, Any]], df.to_dict('records'))
506
+
507
+ return res
508
+
509
+ # ------------------------------------------------------
510
+ @staticmethod
511
+ def computeYearly(daily: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
512
+
513
+ df = pd.DataFrame(daily)
514
+
515
+ # Trimming head and trailing spaces and convert to datetime.
516
+ df["date_time"] = pd.to_datetime(df["time_period"].str.strip(), format=JsonWebDataSource.OUTPUT_DATE_FORMAT)
517
+
518
+ # Get the corresponding year.
519
+ df["year"] = df["date_time"].dt.strftime("%Y")
520
+
521
+ # Aggregate rows by month_year.
522
+ df = df[["year", "start_index_m3", "end_index_m3", "volume_m3", "energy_kwh", "timestamp"]].groupby("year").agg(start_index_m3=('start_index_m3', 'min'), end_index_m3=('end_index_m3', 'max'), volume_m3=('volume_m3', 'sum'), energy_kwh=('energy_kwh', 'sum'), timestamp=('timestamp', 'min'), count=('energy_kwh', 'count')).reset_index()
523
+
524
+ # Sort rows by month ascending.
525
+ df = df.sort_values(by=['year'])
526
+
527
+ # Select rows where we have almost a full year (more than 360) except for the current year.
528
+ df = pd.concat([df[(df["count"] >= 360)], df.tail(1)[df["count"] < 360]])
529
+
530
+ # Rename columns for their target names.
531
+ df = df.rename(columns={"year": "time_period"})
532
+
533
+ # Select target columns.
534
+ df = df[["time_period", "start_index_m3", "end_index_m3", "volume_m3", "energy_kwh", "timestamp"]]
535
+
536
+ res = cast(List[Dict[str, Any]], df.to_dict('records'))
537
+
538
+ return res