pygazpar 1.2.8__py311-none-any.whl → 1.3.0a6__py311-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
pygazpar/datasource.py CHANGED
@@ -1,528 +1,535 @@
1
- import logging
2
- import glob
3
- import os
4
- import json
5
- import time
6
- import pandas as pd
7
- import http.cookiejar
8
- from abc import ABC, abstractmethod
9
- from typing import Any, List, Dict, cast, Optional
10
- from requests import Session
11
- from datetime import date, timedelta
12
- from pygazpar.enum import Frequency, PropertyName
13
- from pygazpar.excelparser import ExcelParser
14
- from pygazpar.jsonparser import JsonParser
15
-
16
- SESSION_TOKEN_URL = "https://connexion.grdf.fr/api/v1/authn"
17
- SESSION_TOKEN_PAYLOAD = """{{
18
- "username": "{0}",
19
- "password": "{1}",
20
- "options": {{
21
- "multiOptionalFactorEnroll": "false",
22
- "warnBeforePasswordExpired": "false"
23
- }}
24
- }}"""
25
-
26
- AUTH_TOKEN_URL = "https://connexion.grdf.fr/login/sessionCookieRedirect"
27
- AUTH_TOKEN_PARAMS = """{{
28
- "checkAccountSetupComplete": "true",
29
- "token": "{0}",
30
- "redirectUrl": "https://monespace.grdf.fr"
31
- }}"""
32
-
33
- Logger = logging.getLogger(__name__)
34
-
35
- MeterReading = Dict[str, Any]
36
-
37
- MeterReadings = List[MeterReading]
38
-
39
- MeterReadingsByFrequency = Dict[str, MeterReadings]
40
-
41
-
42
- # ------------------------------------------------------------------------------------------------------------
43
- class IDataSource(ABC):
44
-
45
- @abstractmethod
46
- def load(self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
47
- pass
48
-
49
-
50
- # ------------------------------------------------------------------------------------------------------------
51
- class WebDataSource(IDataSource):
52
-
53
- # ------------------------------------------------------
54
- def __init__(self, username: str, password: str):
55
-
56
- self.__username = username
57
- self.__password = password
58
-
59
- # ------------------------------------------------------
60
- def load(self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
61
-
62
- self._login(self.__username, self.__password) # We ignore the return value.
63
-
64
- res = self._loadFromSession(pceIdentifier, startDate, endDate, frequencies)
65
-
66
- Logger.debug("The data update terminates normally")
67
-
68
- return res
69
-
70
- # ------------------------------------------------------
71
- def _login(self, username: str, password: str) -> str:
72
-
73
- session = Session()
74
- session.headers.update({"domain": "grdf.fr"})
75
- session.headers.update({"Content-Type": "application/json"})
76
- session.headers.update({"X-Requested-With": "XMLHttpRequest"})
77
-
78
- payload = SESSION_TOKEN_PAYLOAD.format(username, password)
79
-
80
- response = session.post(SESSION_TOKEN_URL, data=payload)
81
-
82
- if response.status_code != 200:
83
- raise Exception(f"An error occurred while logging in. Status code: {response.status_code} - {response.text}")
84
-
85
- session_token = response.json().get("sessionToken")
86
-
87
- Logger.debug("Session token: %s", session_token)
88
-
89
- jar = http.cookiejar.CookieJar()
90
-
91
- self._session = Session()
92
- self._session.headers.update({"Content-Type": "application/json"})
93
- self._session.headers.update({"X-Requested-With": "XMLHttpRequest"})
94
-
95
- params = json.loads(AUTH_TOKEN_PARAMS.format(session_token))
96
-
97
- response = self._session.get(AUTH_TOKEN_URL, params=params, allow_redirects=True, cookies=jar) # type: ignore
98
-
99
- if response.status_code != 200:
100
- raise Exception(f"An error occurred while getting the auth token. Status code: {response.status_code} - {response.text}")
101
-
102
- auth_token = self._session.cookies.get("auth_token", domain="monespace.grdf.fr")
103
-
104
- return auth_token # type: ignore
105
-
106
- @abstractmethod
107
- def _loadFromSession(self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
108
- pass
109
-
110
-
111
- # ------------------------------------------------------------------------------------------------------------
112
- class ExcelWebDataSource(WebDataSource):
113
-
114
- DATA_URL = "https://monespace.grdf.fr/api/e-conso/pce/consommation/informatives/telecharger?dateDebut={0}&dateFin={1}&frequence={3}&pceList[]={2}"
115
-
116
- DATE_FORMAT = "%Y-%m-%d"
117
-
118
- FREQUENCY_VALUES = {
119
- Frequency.HOURLY: "Horaire",
120
- Frequency.DAILY: "Journalier",
121
- Frequency.WEEKLY: "Hebdomadaire",
122
- Frequency.MONTHLY: "Mensuel",
123
- Frequency.YEARLY: "Journalier"
124
- }
125
-
126
- DATA_FILENAME = 'Donnees_informatives_*.xlsx'
127
-
128
- # ------------------------------------------------------
129
- def __init__(self, username: str, password: str, tmpDirectory: str):
130
-
131
- super().__init__(username, password)
132
-
133
- self.__tmpDirectory = tmpDirectory
134
-
135
- # ------------------------------------------------------
136
- def _loadFromSession(self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
137
-
138
- res = {}
139
-
140
- # XLSX is in the TMP directory
141
- data_file_path_pattern = self.__tmpDirectory + '/' + ExcelWebDataSource.DATA_FILENAME
142
-
143
- # We remove an eventual existing data file (from a previous run that has not deleted it).
144
- file_list = glob.glob(data_file_path_pattern)
145
- for filename in file_list:
146
- if os.path.isfile(filename):
147
- try:
148
- os.remove(filename)
149
- except PermissionError:
150
- pass
151
-
152
- if frequencies is None:
153
- # Transform Enum in List.
154
- frequencyList = [frequency for frequency in Frequency]
155
- else:
156
- # Get unique values.
157
- frequencyList = set(frequencies)
158
-
159
- for frequency in frequencyList:
160
- # Inject parameters.
161
- downloadUrl = ExcelWebDataSource.DATA_URL.format(startDate.strftime(ExcelWebDataSource.DATE_FORMAT), endDate.strftime(ExcelWebDataSource.DATE_FORMAT), pceIdentifier, ExcelWebDataSource.FREQUENCY_VALUES[frequency])
162
-
163
- Logger.debug(f"Loading data of frequency {ExcelWebDataSource.FREQUENCY_VALUES[frequency]} from {startDate.strftime(ExcelWebDataSource.DATE_FORMAT)} to {endDate.strftime(ExcelWebDataSource.DATE_FORMAT)}")
164
-
165
- # Retry mechanism.
166
- retry = 10
167
- while retry > 0:
168
-
169
- try:
170
- self.__downloadFile(self._session, downloadUrl, self.__tmpDirectory)
171
- break
172
- except Exception as e:
173
-
174
- if retry == 1:
175
- raise e
176
-
177
- Logger.error("An error occurred while loading data. Retry in 3 seconds.")
178
- time.sleep(3)
179
- retry -= 1
180
-
181
- # Load the XLSX file into the data structure
182
- file_list = glob.glob(data_file_path_pattern)
183
-
184
- if len(file_list) == 0:
185
- Logger.warning(f"Not any data file has been found in '{self.__tmpDirectory}' directory")
186
-
187
- for filename in file_list:
188
- res[frequency.value] = ExcelParser.parse(filename, frequency if frequency != Frequency.YEARLY else Frequency.DAILY)
189
- try:
190
- # openpyxl does not close the file properly.
191
- os.remove(filename)
192
- except PermissionError:
193
- pass
194
-
195
- # We compute yearly from daily data.
196
- if frequency == Frequency.YEARLY:
197
- res[frequency.value] = FrequencyConverter.computeYearly(res[frequency.value])
198
-
199
- return res
200
-
201
- # ------------------------------------------------------
202
- def __downloadFile(self, session: Session, url: str, path: str):
203
-
204
- response = session.get(url)
205
-
206
- if "text/html" in response.headers.get("Content-Type"): # type: ignore
207
- raise Exception("An error occurred while loading data. Please check your credentials.")
208
-
209
- if response.status_code != 200:
210
- raise Exception(f"An error occurred while loading data. Status code: {response.status_code} - {response.text}")
211
-
212
- response.raise_for_status()
213
-
214
- filename = response.headers["Content-Disposition"].split("filename=")[1]
215
-
216
- open(f"{path}/{filename}", "wb").write(response.content)
217
-
218
-
219
- # ------------------------------------------------------------------------------------------------------------
220
- class ExcelFileDataSource(IDataSource):
221
-
222
- def __init__(self, excelFile: str):
223
-
224
- self.__excelFile = excelFile
225
-
226
- def load(self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
227
-
228
- res = {}
229
-
230
- if frequencies is None:
231
- # Transform Enum in List.
232
- frequencyList = [frequency for frequency in Frequency]
233
- else:
234
- # Get unique values.
235
- frequencyList = set(frequencies)
236
-
237
- for frequency in frequencyList:
238
- if frequency != Frequency.YEARLY:
239
- res[frequency.value] = ExcelParser.parse(self.__excelFile, frequency)
240
- else:
241
- daily = ExcelParser.parse(self.__excelFile, Frequency.DAILY)
242
- res[frequency.value] = FrequencyConverter.computeYearly(daily)
243
-
244
- return res
245
-
246
-
247
- # ------------------------------------------------------------------------------------------------------------
248
- class JsonWebDataSource(WebDataSource):
249
-
250
- DATA_URL = "https://monespace.grdf.fr/api/e-conso/pce/consommation/informatives?dateDebut={0}&dateFin={1}&pceList[]={2}"
251
-
252
- TEMPERATURES_URL = "https://monespace.grdf.fr/api/e-conso/pce/{0}/meteo?dateFinPeriode={1}&nbJours={2}"
253
-
254
- INPUT_DATE_FORMAT = "%Y-%m-%d"
255
-
256
- OUTPUT_DATE_FORMAT = "%d/%m/%Y"
257
-
258
- def __init__(self, username: str, password: str):
259
-
260
- super().__init__(username, password)
261
-
262
- def _loadFromSession(self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
263
-
264
- res = {}
265
-
266
- computeByFrequency = {
267
- Frequency.HOURLY: FrequencyConverter.computeHourly,
268
- Frequency.DAILY: FrequencyConverter.computeDaily,
269
- Frequency.WEEKLY: FrequencyConverter.computeWeekly,
270
- Frequency.MONTHLY: FrequencyConverter.computeMonthly,
271
- Frequency.YEARLY: FrequencyConverter.computeYearly
272
- }
273
-
274
- # Data URL: Inject parameters.
275
- downloadUrl = JsonWebDataSource.DATA_URL.format(startDate.strftime(JsonWebDataSource.INPUT_DATE_FORMAT), endDate.strftime(JsonWebDataSource.INPUT_DATE_FORMAT), pceIdentifier)
276
-
277
- # Retry mechanism.
278
- retry = 10
279
- while retry > 0:
280
-
281
- try:
282
- response = self._session.get(downloadUrl)
283
-
284
- if "text/html" in response.headers.get("Content-Type"): # type: ignore
285
- raise Exception("An error occurred while loading data. Please check your credentials.")
286
-
287
- if response.status_code != 200:
288
- raise Exception(f"An error occurred while loading data. Status code: {response.status_code} - {response.text}")
289
-
290
- break
291
- except Exception as e:
292
-
293
- if retry == 1:
294
- raise e
295
-
296
- Logger.error("An error occurred while loading data. Retry in 3 seconds.")
297
- time.sleep(3)
298
- retry -= 1
299
-
300
- data = response.text
301
-
302
- Logger.debug("Json meter data: %s", data)
303
-
304
- # Temperatures URL: Inject parameters.
305
- endDate = date.today() - timedelta(days=1) if endDate >= date.today() else endDate
306
- days = min((endDate - startDate).days, 730)
307
- temperaturesUrl = JsonWebDataSource.TEMPERATURES_URL.format(pceIdentifier, endDate.strftime(JsonWebDataSource.INPUT_DATE_FORMAT), days)
308
-
309
- # Get weather data.
310
- temperatures = self._session.get(temperaturesUrl).text
311
-
312
- Logger.debug("Json temperature data: %s", temperatures)
313
-
314
- # Transform all the data into the target structure.
315
- daily = JsonParser.parse(data, temperatures, pceIdentifier)
316
-
317
- Logger.debug("Processed daily data: %s", daily)
318
-
319
- if frequencies is None:
320
- # Transform Enum in List.
321
- frequencyList = [frequency for frequency in Frequency]
322
- else:
323
- # Get unique values.
324
- frequencyList = set(frequencies)
325
-
326
- for frequency in frequencyList:
327
- res[frequency.value] = computeByFrequency[frequency](daily)
328
-
329
- return res
330
-
331
-
332
- # ------------------------------------------------------------------------------------------------------------
333
- class JsonFileDataSource(IDataSource):
334
-
335
- def __init__(self, consumptionJsonFile: str, temperatureJsonFile):
336
-
337
- self.__consumptionJsonFile = consumptionJsonFile
338
- self.__temperatureJsonFile = temperatureJsonFile
339
-
340
- def load(self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
341
-
342
- res = {}
343
-
344
- with open(self.__consumptionJsonFile) as consumptionJsonFile:
345
- with open(self.__temperatureJsonFile) as temperatureJsonFile:
346
- daily = JsonParser.parse(consumptionJsonFile.read(), temperatureJsonFile.read(), pceIdentifier)
347
-
348
- computeByFrequency = {
349
- Frequency.HOURLY: FrequencyConverter.computeHourly,
350
- Frequency.DAILY: FrequencyConverter.computeDaily,
351
- Frequency.WEEKLY: FrequencyConverter.computeWeekly,
352
- Frequency.MONTHLY: FrequencyConverter.computeMonthly,
353
- Frequency.YEARLY: FrequencyConverter.computeYearly
354
- }
355
-
356
- if frequencies is None:
357
- # Transform Enum in List.
358
- frequencyList = [frequency for frequency in Frequency]
359
- else:
360
- # Get unique values.
361
- frequencyList = set(frequencies)
362
-
363
- for frequency in frequencyList:
364
- res[frequency.value] = computeByFrequency[frequency](daily)
365
-
366
- return res
367
-
368
-
369
- # ------------------------------------------------------------------------------------------------------------
370
- class TestDataSource(IDataSource):
371
-
372
- def __init__(self):
373
-
374
- pass
375
-
376
- def load(self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
377
-
378
- res = {}
379
-
380
- dataSampleFilenameByFrequency = {
381
- Frequency.HOURLY: "hourly_data_sample.json",
382
- Frequency.DAILY: "daily_data_sample.json",
383
- Frequency.WEEKLY: "weekly_data_sample.json",
384
- Frequency.MONTHLY: "monthly_data_sample.json",
385
- Frequency.YEARLY: "yearly_data_sample.json"
386
- }
387
-
388
- if frequencies is None:
389
- # Transform Enum in List.
390
- frequencyList = [frequency for frequency in Frequency]
391
- else:
392
- # Get unique values.
393
- frequencyList = set(frequencies)
394
-
395
- for frequency in frequencyList:
396
- dataSampleFilename = f"{os.path.dirname(os.path.abspath(__file__))}/resources/{dataSampleFilenameByFrequency[frequency]}"
397
-
398
- with open(dataSampleFilename) as jsonFile:
399
- res[frequency.value] = cast(List[Dict[PropertyName, Any]], json.load(jsonFile))
400
-
401
- return res
402
-
403
-
404
- # ------------------------------------------------------------------------------------------------------------
405
- class FrequencyConverter:
406
-
407
- MONTHS = [
408
- "Janvier",
409
- "Février",
410
- "Mars",
411
- "Avril",
412
- "Mai",
413
- "Juin",
414
- "Juillet",
415
- "Août",
416
- "Septembre",
417
- "Octobre",
418
- "Novembre",
419
- "Décembre"
420
- ]
421
-
422
- # ------------------------------------------------------
423
- @staticmethod
424
- def computeHourly(daily: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
425
-
426
- return []
427
-
428
- # ------------------------------------------------------
429
- @staticmethod
430
- def computeDaily(daily: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
431
-
432
- return daily
433
-
434
- # ------------------------------------------------------
435
- @staticmethod
436
- def computeWeekly(daily: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
437
-
438
- df = pd.DataFrame(daily)
439
-
440
- # Trimming head and trailing spaces and convert to datetime.
441
- df["date_time"] = pd.to_datetime(df["time_period"].str.strip(), format=JsonWebDataSource.OUTPUT_DATE_FORMAT)
442
-
443
- # Get the first day of week.
444
- df["first_day_of_week"] = pd.to_datetime(df["date_time"].dt.strftime("%W %Y 1"), format="%W %Y %w")
445
-
446
- # Get the last day of week.
447
- df["last_day_of_week"] = pd.to_datetime(df["date_time"].dt.strftime("%W %Y 0"), format="%W %Y %w")
448
-
449
- # Reformat the time period.
450
- df["time_period"] = "Du " + df["first_day_of_week"].dt.strftime(JsonWebDataSource.OUTPUT_DATE_FORMAT).astype(str) + " au " + df["last_day_of_week"].dt.strftime(JsonWebDataSource.OUTPUT_DATE_FORMAT).astype(str)
451
-
452
- # Aggregate rows by month_year.
453
- df = df[["first_day_of_week", "time_period", "start_index_m3", "end_index_m3", "volume_m3", "energy_kwh", "timestamp"]].groupby("time_period").agg(first_day_of_week=('first_day_of_week', 'min'), start_index_m3=('start_index_m3', 'min'), end_index_m3=('end_index_m3', 'max'), volume_m3=('volume_m3', 'sum'), energy_kwh=('energy_kwh', 'sum'), timestamp=('timestamp', 'min'), count=('energy_kwh', 'count')).reset_index()
454
-
455
- # Sort rows by month ascending.
456
- df = df.sort_values(by=['first_day_of_week'])
457
-
458
- # Select rows where we have a full week (7 days) except for the current week.
459
- df = pd.concat([df[(df["count"] >= 7)], df.tail(1)[df.tail(1)["count"] < 7]])
460
-
461
- # Select target columns.
462
- df = df[["time_period", "start_index_m3", "end_index_m3", "volume_m3", "energy_kwh", "timestamp"]]
463
-
464
- res = cast(List[Dict[str, Any]], df.to_dict('records'))
465
-
466
- return res
467
-
468
- # ------------------------------------------------------
469
- @staticmethod
470
- def computeMonthly(daily: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
471
-
472
- df = pd.DataFrame(daily)
473
-
474
- # Trimming head and trailing spaces and convert to datetime.
475
- df["date_time"] = pd.to_datetime(df["time_period"].str.strip(), format=JsonWebDataSource.OUTPUT_DATE_FORMAT)
476
-
477
- # Get the corresponding month-year.
478
- df["month_year"] = df["date_time"].apply(lambda x: FrequencyConverter.MONTHS[x.month - 1]).astype(str) + " " + df["date_time"].dt.strftime("%Y").astype(str)
479
-
480
- # Aggregate rows by month_year.
481
- df = df[["date_time", "month_year", "start_index_m3", "end_index_m3", "volume_m3", "energy_kwh", "timestamp"]].groupby("month_year").agg(first_day_of_month=('date_time', 'min'), start_index_m3=('start_index_m3', 'min'), end_index_m3=('end_index_m3', 'max'), volume_m3=('volume_m3', 'sum'), energy_kwh=('energy_kwh', 'sum'), timestamp=('timestamp', 'min'), count=('energy_kwh', 'count')).reset_index()
482
-
483
- # Sort rows by month ascending.
484
- df = df.sort_values(by=['first_day_of_month'])
485
-
486
- # Select rows where we have a full month (more than 27 days) except for the current month.
487
- df = pd.concat([df[(df["count"] >= 28)], df.tail(1)[df.tail(1)["count"] < 28]])
488
-
489
- # Rename columns for their target names.
490
- df = df.rename(columns={"month_year": "time_period"})
491
-
492
- # Select target columns.
493
- df = df[["time_period", "start_index_m3", "end_index_m3", "volume_m3", "energy_kwh", "timestamp"]]
494
-
495
- res = cast(List[Dict[str, Any]], df.to_dict('records'))
496
-
497
- return res
498
-
499
- # ------------------------------------------------------
500
- @staticmethod
501
- def computeYearly(daily: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
502
-
503
- df = pd.DataFrame(daily)
504
-
505
- # Trimming head and trailing spaces and convert to datetime.
506
- df["date_time"] = pd.to_datetime(df["time_period"].str.strip(), format=JsonWebDataSource.OUTPUT_DATE_FORMAT)
507
-
508
- # Get the corresponding year.
509
- df["year"] = df["date_time"].dt.strftime("%Y")
510
-
511
- # Aggregate rows by month_year.
512
- df = df[["year", "start_index_m3", "end_index_m3", "volume_m3", "energy_kwh", "timestamp"]].groupby("year").agg(start_index_m3=('start_index_m3', 'min'), end_index_m3=('end_index_m3', 'max'), volume_m3=('volume_m3', 'sum'), energy_kwh=('energy_kwh', 'sum'), timestamp=('timestamp', 'min'), count=('energy_kwh', 'count')).reset_index()
513
-
514
- # Sort rows by month ascending.
515
- df = df.sort_values(by=['year'])
516
-
517
- # Select rows where we have almost a full year (more than 360) except for the current year.
518
- df = pd.concat([df[(df["count"] >= 360)], df.tail(1)[df.tail(1)["count"] < 360]])
519
-
520
- # Rename columns for their target names.
521
- df = df.rename(columns={"year": "time_period"})
522
-
523
- # Select target columns.
524
- df = df[["time_period", "start_index_m3", "end_index_m3", "volume_m3", "energy_kwh", "timestamp"]]
525
-
526
- res = cast(List[Dict[str, Any]], df.to_dict('records'))
527
-
528
- return res
1
+ import logging
2
+ import glob
3
+ import os
4
+ import json
5
+ import time
6
+ import pandas as pd
7
+ import http.cookiejar
8
+ from abc import ABC, abstractmethod
9
+ from typing import Any, List, Dict, cast, Optional
10
+ from requests import Session
11
+ from datetime import date, timedelta
12
+ from pygazpar.enum import Frequency, PropertyName
13
+ from pygazpar.excelparser import ExcelParser
14
+ from pygazpar.jsonparser import JsonParser
15
+
16
+ SESSION_TOKEN_URL = "https://connexion.grdf.fr/api/v1/authn"
17
+ SESSION_TOKEN_PAYLOAD = """{{
18
+ "username": "{0}",
19
+ "password": "{1}",
20
+ "options": {{
21
+ "multiOptionalFactorEnroll": "false",
22
+ "warnBeforePasswordExpired": "false"
23
+ }}
24
+ }}"""
25
+
26
+ AUTH_TOKEN_URL = "https://connexion.grdf.fr/login/sessionCookieRedirect"
27
+ AUTH_TOKEN_PARAMS = """{{
28
+ "checkAccountSetupComplete": "true",
29
+ "token": "{0}",
30
+ "redirectUrl": "https://monespace.grdf.fr"
31
+ }}"""
32
+
33
+ Logger = logging.getLogger(__name__)
34
+
35
+ MeterReading = Dict[str, Any]
36
+
37
+ MeterReadings = List[MeterReading]
38
+
39
+ MeterReadingsByFrequency = Dict[str, MeterReadings]
40
+
41
+
42
+ # ------------------------------------------------------------------------------------------------------------
43
+ class IDataSource(ABC):
44
+
45
+ @abstractmethod
46
+ def load(self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
47
+ pass
48
+
49
+
50
+ # ------------------------------------------------------------------------------------------------------------
51
+ class WebDataSource(IDataSource):
52
+
53
+ # ------------------------------------------------------
54
+ def __init__(self, username: str, password: str):
55
+
56
+ self.__username = username
57
+ self.__password = password
58
+
59
+ # ------------------------------------------------------
60
+ def load(self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
61
+
62
+ auth_token = self._login(self.__username, self.__password)
63
+
64
+ res = self._loadFromSession(auth_token, pceIdentifier, startDate, endDate, frequencies)
65
+
66
+ Logger.debug("The data update terminates normally")
67
+
68
+ return res
69
+
70
+ # ------------------------------------------------------
71
+ def _login(self, username: str, password: str) -> str:
72
+
73
+ session = Session()
74
+ session.headers.update({"domain": "grdf.fr"})
75
+ session.headers.update({"Content-Type": "application/json"})
76
+ session.headers.update({"X-Requested-With": "XMLHttpRequest"})
77
+
78
+ payload = SESSION_TOKEN_PAYLOAD.format(username, password)
79
+
80
+ response = session.post(SESSION_TOKEN_URL, data=payload)
81
+
82
+ if response.status_code != 200:
83
+ raise Exception(f"An error occurred while logging in. Status code: {response.status_code} - {response.text}")
84
+
85
+ session_token = response.json().get("sessionToken")
86
+
87
+ Logger.debug("Session token: %s", session_token)
88
+
89
+ jar = http.cookiejar.CookieJar()
90
+
91
+ session = Session()
92
+ session.headers.update({"Content-Type": "application/json"})
93
+ session.headers.update({"X-Requested-With": "XMLHttpRequest"})
94
+
95
+ params = json.loads(AUTH_TOKEN_PARAMS.format(session_token))
96
+
97
+ response = session.get(AUTH_TOKEN_URL, params=params, allow_redirects=True, cookies=jar)
98
+
99
+ if response.status_code != 200:
100
+ raise Exception(f"An error occurred while getting the auth token. Status code: {response.status_code} - {response.text}")
101
+
102
+ auth_token = session.cookies.get("auth_token", domain="monespace.grdf.fr")
103
+
104
+ return auth_token
105
+
106
+ @abstractmethod
107
+ def _loadFromSession(self, auth_token: str, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
108
+ pass
109
+
110
+
111
+ # ------------------------------------------------------------------------------------------------------------
112
+ class ExcelWebDataSource(WebDataSource):
113
+
114
+ DATA_URL = "https://monespace.grdf.fr/api/e-conso/pce/consommation/informatives/telecharger?dateDebut={0}&dateFin={1}&frequence={3}&pceList[]={2}"
115
+
116
+ DATE_FORMAT = "%Y-%m-%d"
117
+
118
+ FREQUENCY_VALUES = {
119
+ Frequency.HOURLY: "Horaire",
120
+ Frequency.DAILY: "Journalier",
121
+ Frequency.WEEKLY: "Hebdomadaire",
122
+ Frequency.MONTHLY: "Mensuel",
123
+ Frequency.YEARLY: "Journalier"
124
+ }
125
+
126
+ DATA_FILENAME = 'Donnees_informatives_*.xlsx'
127
+
128
+ # ------------------------------------------------------
129
+ def __init__(self, username: str, password: str, tmpDirectory: str):
130
+
131
+ super().__init__(username, password)
132
+
133
+ self.__tmpDirectory = tmpDirectory
134
+
135
+ # ------------------------------------------------------
136
+ def _loadFromSession(self, auth_token: str, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
137
+
138
+ res = {}
139
+
140
+ # XLSX is in the TMP directory
141
+ data_file_path_pattern = self.__tmpDirectory + '/' + ExcelWebDataSource.DATA_FILENAME
142
+
143
+ # We remove an eventual existing data file (from a previous run that has not deleted it).
144
+ file_list = glob.glob(data_file_path_pattern)
145
+ for filename in file_list:
146
+ if os.path.isfile(filename):
147
+ os.remove(filename)
148
+
149
+ if frequencies is None:
150
+ # Transform Enum in List.
151
+ frequencyList = [frequency for frequency in Frequency]
152
+ else:
153
+ # Get unique values.
154
+ frequencyList = set(frequencies)
155
+
156
+ for frequency in frequencyList:
157
+ # Inject parameters.
158
+ downloadUrl = ExcelWebDataSource.DATA_URL.format(startDate.strftime(ExcelWebDataSource.DATE_FORMAT), endDate.strftime(ExcelWebDataSource.DATE_FORMAT), pceIdentifier, ExcelWebDataSource.FREQUENCY_VALUES[frequency])
159
+
160
+ Logger.debug(f"Loading data of frequency {ExcelWebDataSource.FREQUENCY_VALUES[frequency]} from {startDate.strftime(ExcelWebDataSource.DATE_FORMAT)} to {endDate.strftime(ExcelWebDataSource.DATE_FORMAT)}")
161
+
162
+ # Retry mechanism.
163
+ retry = 10
164
+ while retry > 0:
165
+
166
+ # Create a session.
167
+ session = Session()
168
+ session.headers.update({"Host": "monespace.grdf.fr"})
169
+ session.headers.update({"Domain": "grdf.fr"})
170
+ session.headers.update({"X-Requested-With": "XMLHttpRequest"})
171
+ session.headers.update({"Accept": "application/json"})
172
+ session.cookies.set("auth_token", auth_token, domain="monespace.grdf.fr")
173
+
174
+ try:
175
+ self.__downloadFile(session, downloadUrl, self.__tmpDirectory)
176
+ break
177
+ except Exception as e:
178
+
179
+ if retry == 1:
180
+ raise e
181
+
182
+ Logger.error("An error occurred while loading data. Retry in 3 seconds.")
183
+ time.sleep(3)
184
+ retry -= 1
185
+
186
+ # Load the XLSX file into the data structure
187
+ file_list = glob.glob(data_file_path_pattern)
188
+
189
+ if len(file_list) == 0:
190
+ Logger.warning(f"Not any data file has been found in '{self.__tmpDirectory}' directory")
191
+
192
+ for filename in file_list:
193
+ res[frequency.value] = ExcelParser.parse(filename, frequency if frequency != Frequency.YEARLY else Frequency.DAILY)
194
+ try:
195
+ # openpyxl does not close the file properly.
196
+ os.remove(filename)
197
+ except Exception:
198
+ pass
199
+
200
+ # We compute yearly from daily data.
201
+ if frequency == Frequency.YEARLY:
202
+ res[frequency.value] = FrequencyConverter.computeYearly(res[frequency.value])
203
+
204
+ return res
205
+
206
+ # ------------------------------------------------------
207
+ def __downloadFile(self, session: Session, url: str, path: str):
208
+
209
+ response = session.get(url)
210
+
211
+ if "text/html" in response.headers.get("Content-Type"):
212
+ raise Exception("An error occurred while loading data. Please check your credentials.")
213
+
214
+ if response.status_code != 200:
215
+ raise Exception(f"An error occurred while loading data. Status code: {response.status_code} - {response.text}")
216
+
217
+ response.raise_for_status()
218
+
219
+ filename = response.headers["Content-Disposition"].split("filename=")[1]
220
+
221
+ open(f"{path}/{filename}", "wb").write(response.content)
222
+
223
+
224
+ # ------------------------------------------------------------------------------------------------------------
225
+ class ExcelFileDataSource(IDataSource):
226
+
227
+ def __init__(self, excelFile: str):
228
+
229
+ self.__excelFile = excelFile
230
+
231
+ def load(self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
232
+
233
+ res = {}
234
+
235
+ if frequencies is None:
236
+ # Transform Enum in List.
237
+ frequencyList = [frequency for frequency in Frequency]
238
+ else:
239
+ # Get unique values.
240
+ frequencyList = set(frequencies)
241
+
242
+ for frequency in frequencyList:
243
+ if frequency != Frequency.YEARLY:
244
+ res[frequency.value] = ExcelParser.parse(self.__excelFile, frequency)
245
+ else:
246
+ daily = ExcelParser.parse(self.__excelFile, Frequency.DAILY)
247
+ res[frequency.value] = FrequencyConverter.computeYearly(daily)
248
+
249
+ return res
250
+
251
+
252
+ # ------------------------------------------------------------------------------------------------------------
253
+ class JsonWebDataSource(WebDataSource):
254
+
255
+ DATA_URL = "https://monespace.grdf.fr/api/e-conso/pce/consommation/informatives?dateDebut={0}&dateFin={1}&pceList[]={2}"
256
+
257
+ TEMPERATURES_URL = "https://monespace.grdf.fr/api/e-conso/pce/{0}/meteo?dateFinPeriode={1}&nbJours={2}"
258
+
259
+ INPUT_DATE_FORMAT = "%Y-%m-%d"
260
+
261
+ OUTPUT_DATE_FORMAT = "%d/%m/%Y"
262
+
263
+ def __init__(self, username: str, password: str):
264
+
265
+ super().__init__(username, password)
266
+
267
+ def _loadFromSession(self, auth_token: str, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
268
+
269
+ res = {}
270
+
271
+ computeByFrequency = {
272
+ Frequency.HOURLY: FrequencyConverter.computeHourly,
273
+ Frequency.DAILY: FrequencyConverter.computeDaily,
274
+ Frequency.WEEKLY: FrequencyConverter.computeWeekly,
275
+ Frequency.MONTHLY: FrequencyConverter.computeMonthly,
276
+ Frequency.YEARLY: FrequencyConverter.computeYearly
277
+ }
278
+
279
+ # Data URL: Inject parameters.
280
+ downloadUrl = JsonWebDataSource.DATA_URL.format(startDate.strftime(JsonWebDataSource.INPUT_DATE_FORMAT), endDate.strftime(JsonWebDataSource.INPUT_DATE_FORMAT), pceIdentifier)
281
+
282
+ # Retry mechanism.
283
+ retry = 10
284
+ while retry > 0:
285
+
286
+ # Create a session.
287
+ session = Session()
288
+ session.headers.update({"Host": "monespace.grdf.fr"})
289
+ session.headers.update({"Domain": "grdf.fr"})
290
+ session.headers.update({"X-Requested-With": "XMLHttpRequest"})
291
+ session.headers.update({"Accept": "application/json"})
292
+ session.cookies.set("auth_token", auth_token, domain="monespace.grdf.fr")
293
+
294
+ try:
295
+ response = session.get(downloadUrl)
296
+
297
+ if "text/html" in response.headers.get("Content-Type"):
298
+ raise Exception("An error occurred while loading data. Please check your credentials.")
299
+
300
+ if response.status_code != 200:
301
+ raise Exception(f"An error occurred while loading data. Status code: {response.status_code} - {response.text}")
302
+
303
+ break
304
+ except Exception as e:
305
+
306
+ if retry == 1:
307
+ raise e
308
+
309
+ Logger.error("An error occurred while loading data. Retry in 3 seconds.")
310
+ time.sleep(3)
311
+ retry -= 1
312
+
313
+ data = response.text
314
+
315
+ # Temperatures URL: Inject parameters.
316
+ endDate = date.today() - timedelta(days=1) if endDate >= date.today() else endDate
317
+ days = min((endDate - startDate).days, 730)
318
+ temperaturesUrl = JsonWebDataSource.TEMPERATURES_URL.format(pceIdentifier, endDate.strftime(JsonWebDataSource.INPUT_DATE_FORMAT), days)
319
+
320
+ # Get weather data.
321
+ temperatures = session.get(temperaturesUrl).text
322
+
323
+ # Transform all the data into the target structure.
324
+ daily = JsonParser.parse(data, temperatures, pceIdentifier)
325
+
326
+ if frequencies is None:
327
+ # Transform Enum in List.
328
+ frequencyList = [frequency for frequency in Frequency]
329
+ else:
330
+ # Get unique values.
331
+ frequencyList = set(frequencies)
332
+
333
+ for frequency in frequencyList:
334
+ res[frequency.value] = computeByFrequency[frequency](daily)
335
+
336
+ return res
337
+
338
+
339
+ # ------------------------------------------------------------------------------------------------------------
340
+ class JsonFileDataSource(IDataSource):
341
+
342
+ def __init__(self, consumptionJsonFile: str, temperatureJsonFile):
343
+
344
+ self.__consumptionJsonFile = consumptionJsonFile
345
+ self.__temperatureJsonFile = temperatureJsonFile
346
+
347
+ def load(self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
348
+
349
+ res = {}
350
+
351
+ with open(self.__consumptionJsonFile) as consumptionJsonFile:
352
+ with open(self.__temperatureJsonFile) as temperatureJsonFile:
353
+ daily = JsonParser.parse(consumptionJsonFile.read(), temperatureJsonFile.read(), pceIdentifier)
354
+
355
+ computeByFrequency = {
356
+ Frequency.HOURLY: FrequencyConverter.computeHourly,
357
+ Frequency.DAILY: FrequencyConverter.computeDaily,
358
+ Frequency.WEEKLY: FrequencyConverter.computeWeekly,
359
+ Frequency.MONTHLY: FrequencyConverter.computeMonthly,
360
+ Frequency.YEARLY: FrequencyConverter.computeYearly
361
+ }
362
+
363
+ if frequencies is None:
364
+ # Transform Enum in List.
365
+ frequencyList = [frequency for frequency in Frequency]
366
+ else:
367
+ # Get unique values.
368
+ frequencyList = set(frequencies)
369
+
370
+ for frequency in frequencyList:
371
+ res[frequency.value] = computeByFrequency[frequency](daily)
372
+
373
+ return res
374
+
375
+
376
+ # ------------------------------------------------------------------------------------------------------------
377
+ class TestDataSource(IDataSource):
378
+
379
+ def __init__(self):
380
+
381
+ pass
382
+
383
+ def load(self, pceIdentifier: str, startDate: date, endDate: date, frequencies: Optional[List[Frequency]] = None) -> MeterReadingsByFrequency:
384
+
385
+ res = {}
386
+
387
+ dataSampleFilenameByFrequency = {
388
+ Frequency.HOURLY: "hourly_data_sample.json",
389
+ Frequency.DAILY: "daily_data_sample.json",
390
+ Frequency.WEEKLY: "weekly_data_sample.json",
391
+ Frequency.MONTHLY: "monthly_data_sample.json",
392
+ Frequency.YEARLY: "yearly_data_sample.json"
393
+ }
394
+
395
+ if frequencies is None:
396
+ # Transform Enum in List.
397
+ frequencyList = [frequency for frequency in Frequency]
398
+ else:
399
+ # Get unique values.
400
+ frequencyList = set(frequencies)
401
+
402
+ for frequency in frequencyList:
403
+ dataSampleFilename = f"{os.path.dirname(os.path.abspath(__file__))}/resources/{dataSampleFilenameByFrequency[frequency]}"
404
+
405
+ with open(dataSampleFilename) as jsonFile:
406
+ res[frequency.value] = cast(List[Dict[PropertyName, Any]], json.load(jsonFile))
407
+
408
+ return res
409
+
410
+
411
+ # ------------------------------------------------------------------------------------------------------------
412
+ class FrequencyConverter:
413
+
414
+ MONTHS = [
415
+ "Janvier",
416
+ "Février",
417
+ "Mars",
418
+ "Avril",
419
+ "Mai",
420
+ "Juin",
421
+ "Juillet",
422
+ "Août",
423
+ "Septembre",
424
+ "Octobre",
425
+ "Novembre",
426
+ "Décembre"
427
+ ]
428
+
429
+ # ------------------------------------------------------
430
+ @staticmethod
431
+ def computeHourly(daily: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
432
+
433
+ return []
434
+
435
+ # ------------------------------------------------------
436
+ @staticmethod
437
+ def computeDaily(daily: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
438
+
439
+ return daily
440
+
441
+ # ------------------------------------------------------
442
+ @staticmethod
443
+ def computeWeekly(daily: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
444
+
445
+ df = pd.DataFrame(daily)
446
+
447
+ # Trimming head and trailing spaces and convert to datetime.
448
+ df["date_time"] = pd.to_datetime(df["time_period"].str.strip(), format=JsonWebDataSource.OUTPUT_DATE_FORMAT)
449
+
450
+ # Get the first day of week.
451
+ df["first_day_of_week"] = pd.to_datetime(df["date_time"].dt.strftime("%W %Y 1"), format="%W %Y %w")
452
+
453
+ # Get the last day of week.
454
+ df["last_day_of_week"] = pd.to_datetime(df["date_time"].dt.strftime("%W %Y 0"), format="%W %Y %w")
455
+
456
+ # Reformat the time period.
457
+ df["time_period"] = "Du " + df["first_day_of_week"].dt.strftime(JsonWebDataSource.OUTPUT_DATE_FORMAT).astype(str) + " au " + df["last_day_of_week"].dt.strftime(JsonWebDataSource.OUTPUT_DATE_FORMAT).astype(str)
458
+
459
+ # Aggregate rows by month_year.
460
+ df = df[["first_day_of_week", "time_period", "start_index_m3", "end_index_m3", "volume_m3", "energy_kwh", "timestamp"]].groupby("time_period").agg(first_day_of_week=('first_day_of_week', 'min'), start_index_m3=('start_index_m3', 'min'), end_index_m3=('end_index_m3', 'max'), volume_m3=('volume_m3', 'sum'), energy_kwh=('energy_kwh', 'sum'), timestamp=('timestamp', 'min'), count=('energy_kwh', 'count')).reset_index()
461
+
462
+ # Sort rows by month ascending.
463
+ df = df.sort_values(by=['first_day_of_week'])
464
+
465
+ # Select rows where we have a full week (7 days) except for the current week.
466
+ df = pd.concat([df[(df["count"] >= 7)], df.tail(1)[df["count"] < 7]])
467
+
468
+ # Select target columns.
469
+ df = df[["time_period", "start_index_m3", "end_index_m3", "volume_m3", "energy_kwh", "timestamp"]]
470
+
471
+ res = cast(List[Dict[str, Any]], df.to_dict('records'))
472
+
473
+ return res
474
+
475
+ # ------------------------------------------------------
476
+ @staticmethod
477
+ def computeMonthly(daily: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
478
+
479
+ df = pd.DataFrame(daily)
480
+
481
+ # Trimming head and trailing spaces and convert to datetime.
482
+ df["date_time"] = pd.to_datetime(df["time_period"].str.strip(), format=JsonWebDataSource.OUTPUT_DATE_FORMAT)
483
+
484
+ # Get the corresponding month-year.
485
+ df["month_year"] = df["date_time"].apply(lambda x: FrequencyConverter.MONTHS[x.month - 1]).astype(str) + " " + df["date_time"].dt.strftime("%Y").astype(str)
486
+
487
+ # Aggregate rows by month_year.
488
+ df = df[["date_time", "month_year", "start_index_m3", "end_index_m3", "volume_m3", "energy_kwh", "timestamp"]].groupby("month_year").agg(first_day_of_month=('date_time', 'min'), start_index_m3=('start_index_m3', 'min'), end_index_m3=('end_index_m3', 'max'), volume_m3=('volume_m3', 'sum'), energy_kwh=('energy_kwh', 'sum'), timestamp=('timestamp', 'min'), count=('energy_kwh', 'count')).reset_index()
489
+
490
+ # Sort rows by month ascending.
491
+ df = df.sort_values(by=['first_day_of_month'])
492
+
493
+ # Select rows where we have a full month (more than 27 days) except for the current month.
494
+ df = pd.concat([df[(df["count"] >= 28)], df.tail(1)[df["count"] < 28]])
495
+
496
+ # Rename columns for their target names.
497
+ df = df.rename(columns={"month_year": "time_period"})
498
+
499
+ # Select target columns.
500
+ df = df[["time_period", "start_index_m3", "end_index_m3", "volume_m3", "energy_kwh", "timestamp"]]
501
+
502
+ res = cast(List[Dict[str, Any]], df.to_dict('records'))
503
+
504
+ return res
505
+
506
+ # ------------------------------------------------------
507
+ @staticmethod
508
+ def computeYearly(daily: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
509
+
510
+ df = pd.DataFrame(daily)
511
+
512
+ # Trimming head and trailing spaces and convert to datetime.
513
+ df["date_time"] = pd.to_datetime(df["time_period"].str.strip(), format=JsonWebDataSource.OUTPUT_DATE_FORMAT)
514
+
515
+ # Get the corresponding year.
516
+ df["year"] = df["date_time"].dt.strftime("%Y")
517
+
518
+ # Aggregate rows by month_year.
519
+ df = df[["year", "start_index_m3", "end_index_m3", "volume_m3", "energy_kwh", "timestamp"]].groupby("year").agg(start_index_m3=('start_index_m3', 'min'), end_index_m3=('end_index_m3', 'max'), volume_m3=('volume_m3', 'sum'), energy_kwh=('energy_kwh', 'sum'), timestamp=('timestamp', 'min'), count=('energy_kwh', 'count')).reset_index()
520
+
521
+ # Sort rows by month ascending.
522
+ df = df.sort_values(by=['year'])
523
+
524
+ # Select rows where we have almost a full year (more than 360) except for the current year.
525
+ df = pd.concat([df[(df["count"] >= 360)], df.tail(1)[df["count"] < 360]])
526
+
527
+ # Rename columns for their target names.
528
+ df = df.rename(columns={"year": "time_period"})
529
+
530
+ # Select target columns.
531
+ df = df[["time_period", "start_index_m3", "end_index_m3", "volume_m3", "energy_kwh", "timestamp"]]
532
+
533
+ res = cast(List[Dict[str, Any]], df.to_dict('records'))
534
+
535
+ return res