SankeyExcelParser 1.0.0b0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (32) hide show
  1. SankeyExcelParser/__init__.py +0 -0
  2. SankeyExcelParser/io_excel.py +1867 -0
  3. SankeyExcelParser/io_excel_constants.py +811 -0
  4. SankeyExcelParser/sankey.py +3138 -0
  5. SankeyExcelParser/sankey_utils/__init__.py +0 -0
  6. SankeyExcelParser/sankey_utils/data.py +1118 -0
  7. SankeyExcelParser/sankey_utils/excel_source.py +31 -0
  8. SankeyExcelParser/sankey_utils/flux.py +344 -0
  9. SankeyExcelParser/sankey_utils/functions.py +278 -0
  10. SankeyExcelParser/sankey_utils/node.py +340 -0
  11. SankeyExcelParser/sankey_utils/protos/__init__.py +0 -0
  12. SankeyExcelParser/sankey_utils/protos/flux.py +84 -0
  13. SankeyExcelParser/sankey_utils/protos/node.py +386 -0
  14. SankeyExcelParser/sankey_utils/protos/sankey_object.py +135 -0
  15. SankeyExcelParser/sankey_utils/protos/tag_group.py +95 -0
  16. SankeyExcelParser/sankey_utils/sankey_object.py +165 -0
  17. SankeyExcelParser/sankey_utils/table_object.py +37 -0
  18. SankeyExcelParser/sankey_utils/tag.py +95 -0
  19. SankeyExcelParser/sankey_utils/tag_group.py +206 -0
  20. SankeyExcelParser/su_trace.py +239 -0
  21. SankeyExcelParser/tests/integration/__init__.py +0 -0
  22. SankeyExcelParser/tests/integration/test_base.py +356 -0
  23. SankeyExcelParser/tests/integration/test_run_check_input.py +100 -0
  24. SankeyExcelParser/tests/integration/test_run_conversions.py +96 -0
  25. SankeyExcelParser/tests/integration/test_run_load_input.py +94 -0
  26. SankeyExcelParser/tests/unit/__init__.py +0 -0
  27. SankeyExcelParser-1.0.0b0.data/scripts/run_parse_and_write_excel.py +155 -0
  28. SankeyExcelParser-1.0.0b0.data/scripts/run_parse_excel.py +115 -0
  29. SankeyExcelParser-1.0.0b0.dist-info/METADATA +113 -0
  30. SankeyExcelParser-1.0.0b0.dist-info/RECORD +32 -0
  31. SankeyExcelParser-1.0.0b0.dist-info/WHEEL +5 -0
  32. SankeyExcelParser-1.0.0b0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1867 @@
1
+ """
2
+ This module is dedicated to the conversion from outside format to internal json format.
3
+ Outside formats may be: a workbook (excel), another json file, a database etc...
4
+ Structure and specifications of internal json format are defined in this module. Internal
5
+ json format can take two main forms: one to adress input informations and a second one
6
+ for output communications.
7
+ """
8
+
9
+ # External libs -----------------------------------------------------
10
+ import pandas as pd
11
+ import numpy as np
12
+ import re
13
+
14
+ # Local libs -------------------------------------------------------
15
+ import SankeyExcelParser.io_excel_constants as CONST
16
+ import SankeyExcelParser.su_trace as su_trace
17
+
18
+ # External modules -------------------------------------------------
19
+ from unidecode import unidecode
20
+
21
+ # Local modules -----------------------------------------------------
22
+ from SankeyExcelParser.sankey import Sankey, UserExcelConverter
23
+
24
+ # has_xl_wings = True
25
+ # try:
26
+ # # import xlwings as xl
27
+ # import pythoncom
28
+ # pythoncom.CoInitialize()
29
+ # except Exception:
30
+ # has_xl_wings = False
31
+
32
+
33
+ # Private functions ----------------------------------------------------------------
34
+ def _compareStrings(
35
+ string_in: str,
36
+ string_ref: str,
37
+ strip_input_string=False
38
+ ):
39
+ """
40
+ Uniformize strings for easier comparison.
41
+
42
+ Parameters
43
+ ----------
44
+ :param string_in: String to compare.
45
+ :type string_in: str
46
+
47
+ :param string_ref: Ref string to compare with.
48
+ :type string_ref: str
49
+
50
+ :param strip_input_string: Remove ' ' at start / or end for input string.
51
+ :type strip_input_string: boolean, optionnal (default=False)
52
+
53
+ Returns
54
+ -------
55
+ :return: True if strings mean the same thing, False otherwise
56
+ :rtype: bool
57
+ """
58
+ s1 = string_in.lower()
59
+ s2 = string_ref.lower()
60
+ if strip_input_string:
61
+ s1 = s1.strip()
62
+ return (re.fullmatch(unidecode(s2), unidecode(s1)) is not None)
63
+
64
+
65
+ def _consistantColName(
66
+ sheet_name: str,
67
+ usr_col_name: str,
68
+ xl_names_converter: UserExcelConverter,
69
+ tags: list = []
70
+ ):
71
+ '''
72
+ Test if the usr_col_name is consistent with the allowed col list.
73
+
74
+ Parameters
75
+ ----------
76
+ :param sheet_name: Sheet name to check.
77
+ :type sheet_name: string
78
+
79
+ :param prop_cols: Column to find
80
+ :type prop_cols: string
81
+
82
+ :param tags: Tags list to check
83
+ :type tags: list
84
+
85
+ Returns
86
+ -------
87
+ :return:
88
+ If the column corresponds to an entry in the sheetname dictionnary, then result is the corresponding key.
89
+ If the column is a tag column / an additonnal column, the result is the standard format of the column name.
90
+ :rtype: string
91
+ '''
92
+ # Check if Sheet is about data
93
+ if _compareStrings(sheet_name, 'flux data', strip_input_string=True):
94
+ xl_names_converter.add_new_col(sheet_name, CONST.DATA_SHEET, usr_col_name)
95
+ return True, CONST.DATA_SHEET
96
+ sheet_name_lower = sheet_name.lower()
97
+ usr_col_name_lower = usr_col_name.lower()
98
+ if sheet_name_lower != '' and usr_col_name_lower != '':
99
+ # Is the proposed column a tag column ?
100
+ for tag in tags:
101
+ if _compareStrings(usr_col_name_lower, tag, strip_input_string=True):
102
+ return True, tag
103
+ # Is the proposed column in allowed columns ?
104
+ for std_col_name in CONST.DICT_OF_COLS_NAMES__RE[sheet_name_lower].keys():
105
+ for allowed_col_re in CONST.DICT_OF_COLS_NAMES__RE[sheet_name_lower][std_col_name]:
106
+ if _compareStrings(usr_col_name_lower, allowed_col_re, strip_input_string=True):
107
+ xl_names_converter.add_new_col(sheet_name_lower, std_col_name, usr_col_name)
108
+ return True, std_col_name
109
+ return False, usr_col_name
110
+
111
+
112
+ def _consistantSheetName(
113
+ usr_sheet_name: str,
114
+ xl_names_converter: UserExcelConverter,
115
+ ):
116
+ '''
117
+ Test if the usr_sheet_name is consistent with the allowed sheet list.
118
+
119
+ Parameters
120
+ ----------
121
+ :param usr_sheet_name: Sheet name to check.
122
+ :type usr_sheet_name: string
123
+
124
+ Returns
125
+ -------
126
+ :return:
127
+ - out1: True if tested sheet is consistant.
128
+ - out2: The dictionary key corresponding of the allowed list found, if tested sheet is consitant.
129
+ List of allowed sheet names if not.
130
+ :rtype: (bool, string)
131
+
132
+ Notes
133
+ -----
134
+ - If the usr_sheet_name input is empty ('') the result is a list of
135
+ allowed sheet name as a string.
136
+ - A particular case is taken into account for proxy input file which
137
+ usualy has 3 proxy sheets (and one of them with 'sector' keyword in its name)
138
+ '''
139
+ # Check if Sheet is about data
140
+ if _compareStrings(usr_sheet_name, 'flux data', strip_input_string=True):
141
+ xl_names_converter.add_new_sheet(CONST.DATA_SHEET, usr_sheet_name)
142
+ return True, CONST.DATA_SHEET
143
+ # If we have a sheet to check
144
+ if usr_sheet_name != '':
145
+ # Is sheet in list of possible names for sheets
146
+ for std_sheet_name in CONST.DICT_OF_SHEET_NAMES__RE.keys():
147
+ for allow_sheet_re in CONST.DICT_OF_SHEET_NAMES__RE[std_sheet_name]:
148
+ if _compareStrings(usr_sheet_name, allow_sheet_re, strip_input_string=True):
149
+ xl_names_converter.add_new_sheet(std_sheet_name, usr_sheet_name)
150
+ return True, std_sheet_name
151
+ #  We didn't found the corresponding key
152
+ return False, _allowedSheetNames()
153
+
154
+
155
+ def _allowedSheetNames(sheets_to_show=[]):
156
+ '''
157
+ Return the table of allowed sheet names with respect to their type of informations.
158
+
159
+ Parameters
160
+ ----------
161
+ :param sheets_to_show: list of sheet to print. If list empty, print all.
162
+ :type sheets_to_show: list, optional, default=[]
163
+
164
+ Returns
165
+ -------
166
+ :return:
167
+ Result is empty string if the tested col is not consistant.
168
+ Result is the dictionary key corresponding of the allowed list found.
169
+ :rtype: string
170
+ '''
171
+ wcol1 = 30
172
+ wcol2 = 70
173
+ # Create table header
174
+ list_allowed = '{0: <{w1}} | {1: <{w2}}\n'.format("Sheet type", "Possible sheet names", w1=wcol1, w2=wcol2)
175
+ list_allowed += '-'*(wcol1 + wcol2 + 3) + '\n'
176
+ # Keys to show = table first column
177
+ if len(sheets_to_show) > 0:
178
+ list_dict_keys = [_ for _ in sheets_to_show if _ in CONST.DICT_OF_SHEET_NAMES.keys()]
179
+ else:
180
+ list_dict_keys = CONST.DICT_OF_SHEET_NAMES.keys()
181
+ # Create table
182
+ for dict_key in list_dict_keys:
183
+ list_allowed += '{: <{w}} | '.format(dict_key, w=wcol1)
184
+ if len(CONST.DICT_OF_SHEET_NAMES[dict_key]) != 0:
185
+ list_allowed += ', '.join(set(CONST.DICT_OF_SHEET_NAMES[dict_key]))
186
+ list_allowed += '\n'
187
+ return list_allowed
188
+
189
+
190
+ def _checkNeededColumns(
191
+ columns: list,
192
+ columns_needed: list,
193
+ sheet_name: str,
194
+ columns_needed_onlyone: list = []
195
+ ):
196
+ """_summary_
197
+
198
+ Parameters
199
+ ----------
200
+ :param columns: Current list of columns
201
+ :type columns: list
202
+
203
+ :param columns_needed: List of columns to have
204
+ :type columns_needed: list
205
+
206
+ :param sheet_name: Sheet name from which to check names
207
+ :type sheet_name: str
208
+
209
+ :param columns_needed_onlyone: List of columns in which at least only one is needed
210
+ :type columns_needed_onlyone: list
211
+
212
+ Returns
213
+ -------
214
+ :return: (Success?, Log message)
215
+ :rtype: (bool, str)
216
+ """
217
+ # Check columns need
218
+ for column_needed in columns_needed:
219
+ if not (column_needed in columns):
220
+ err_msg = 'The \"{}\" column is missing '.format(column_needed)
221
+ err_msg += 'or does not have the correct name.\n'
222
+ err_msg += '\n'
223
+ err_msg += '{}\n'.format(CONST.DICT_OF_COMMENTS[sheet_name][column_needed][0])
224
+ err_msg += '\n'
225
+ std_column_names = set(CONST.DICT_OF_COLS_NAMES[sheet_name][column_needed])
226
+ err_msg += 'Acceptable names for this column : {}'.format(
227
+ ', '.join(['\"{}\"'.format(_) for _ in std_column_names]))
228
+ return False, err_msg
229
+ # Check optionnal columns (need_only one)
230
+ if len(columns_needed_onlyone) > 0:
231
+ if (not any(np.in1d(columns_needed_onlyone, columns))):
232
+ err_msg = 'A mandatory column is missing or does not have the correct name.\n'
233
+ err_msg += 'A least one of these columns must be present : {}'.format(
234
+ ', '.join(['\"{}\"'.format(_) for _ in columns_needed_onlyone]))
235
+ return False, err_msg
236
+ return True, ''
237
+
238
+
239
+ def _castColumnType(
240
+ sheet: pd.DataFrame,
241
+ columns_types,
242
+ empty_to_default_value=False
243
+ ):
244
+ """
245
+ Set specific columns values to str.
246
+
247
+ Parameters
248
+ ----------
249
+ :param sheet: Sheet to modify.
250
+ :type sheet: pandas.DataFrame, modified
251
+
252
+ :param columns_types: Dict of column and their default types/values OR any default value.
253
+ :type columns_types: any
254
+
255
+ :param empty_to_default_value: If true, set empty cells with default value, if not, set as None.
256
+ :type: bool
257
+
258
+ Returns
259
+ -------
260
+ :return: (Success? ; Log message)
261
+ :rtype: (bool, str)
262
+ """
263
+ # Filter column to convert / Columns that are in sheet
264
+ if type(columns_types) is dict:
265
+ cols_to_convert = \
266
+ [(_, columns_types[_]) for _ in columns_types.keys() if _ in sheet.columns]
267
+ else:
268
+ cols_to_convert = \
269
+ [(_, columns_types) for _ in sheet.columns]
270
+ # Convert
271
+ for (col, _) in cols_to_convert:
272
+ try:
273
+ # Special type
274
+ if type(_) is dict:
275
+ val = _['val']
276
+ else:
277
+ val = _
278
+ # Convert as string
279
+ if type(val) is str:
280
+ sheet[col] = sheet[col].replace({np.nan: 'None'})
281
+ sheet[col] = sheet[col].astype(str)
282
+ if empty_to_default_value:
283
+ sheet[col] = sheet[col].replace({'None': val})
284
+ else:
285
+ sheet[col] = sheet[col].replace({'None': None})
286
+ # Convert as float
287
+ elif type(val) is float:
288
+ sheet[col] = sheet[col].astype(float)
289
+ if empty_to_default_value:
290
+ sheet[col] = sheet[col].replace({np.nan: val})
291
+ else:
292
+ sheet[col] = sheet[col].replace({np.nan: None})
293
+ # Convert as int
294
+ elif type(val) is int:
295
+ sheet[col] = sheet[col].replace({np.nan: -702313053})
296
+ sheet[col] = sheet[col].astype(int)
297
+ if empty_to_default_value:
298
+ sheet[col] = sheet[col].replace({-702313053: val})
299
+ else:
300
+ sheet[col] = sheet[col].replace({-702313053: None})
301
+ # Convert to other types
302
+ else:
303
+ sheet[col] = sheet[col].astype(type(val))
304
+ except Exception:
305
+ err = 'Column \"{}\" contains values '.format(col)
306
+ err += 'that could not be read as {} values'.format(type(val))
307
+ return False, err
308
+ # Replace remaining empty data with None
309
+ sheet.replace({np.nan: None}, inplace=True)
310
+ return True, ''
311
+
312
+
313
+ def _pd_sorted_col(
314
+ dft: pd.DataFrame,
315
+ lico: list
316
+ ):
317
+ """
318
+ Sort columns order of a dataframe in function of a column list
319
+
320
+ Parameters
321
+ ----------
322
+ :param dft: Input dataframe to sort.
323
+ :type dft: pandas.DataFrame
324
+
325
+ :param lico: Ordered list of columns to have.
326
+ :type lico: list
327
+
328
+ Returns
329
+ -------
330
+ :return: Sorted dataframe.
331
+ :rtype: (bool; string)
332
+
333
+ """
334
+ li_df = list(dft)
335
+ if li_df != lico:
336
+ dftm = pd.DataFrame(columns=lico)
337
+ for col in lico:
338
+ dftm[col] = dft[col]
339
+ return dftm
340
+
341
+
342
+ def _extractTablesFromSheet(
343
+ sheet: pd.DataFrame,
344
+ new_sheets: list,
345
+ default_columns_names=None
346
+ ):
347
+ """
348
+ Extract all tables from an excel sheet.
349
+
350
+ Ex: Extract tables from a sheet like this
351
+
352
+ +----+----+----+----+----+
353
+ | - | - | - | - | - |
354
+ +----+----+----+----+----+
355
+ | - | - | C1 | C2 | C3 |
356
+ +----+----+----+----+----+
357
+ | - | R1 | x | - | x |
358
+ +----+----+----+----+----+
359
+ | - | R2 | x | x | - |
360
+ +----+----+----+----+----+
361
+ | - | - | - | - | - |
362
+ +----+----+----+----+----+
363
+ | - | - | C4 | C5 | C6 |
364
+ +----+----+----+----+----+
365
+ | - | R3 | - | x | x |
366
+ +----+----+----+----+----+
367
+ | - | R4 | x | - | - |
368
+ +----+----+----+----+----+
369
+
370
+ Or like this
371
+
372
+ +----+----+----+----+----+
373
+ | - | - | - | - | - |
374
+ +----+----+----+----+----+
375
+ | - | - | C1 | C2 | C3 |
376
+ +----+----+----+----+----+
377
+ | - | R1 | x | - | x |
378
+ +----+----+----+----+----+
379
+ | - | R2 | x | x | - |
380
+ +----+----+----+----+----+
381
+ | - | - | - | - | - |
382
+ +----+----+----+----+----+
383
+ | - | R3 | - | x | x |
384
+ +----+----+----+----+----+
385
+ | - | R4 | x | - | - |
386
+ +----+----+----+----+----+
387
+
388
+ Parameters
389
+ ----------
390
+ :param sheet: Sheet to parse
391
+ :type sheet: pd.DataFrame
392
+
393
+ :param new_sheets: List of sheets extracted from sheet
394
+ :type new_sheets: list(pd.DataFrame), modified
395
+
396
+ Returns
397
+ -------
398
+ :return: _description_
399
+ :rtype: _type_
400
+ """
401
+ # Nothing to do
402
+ if sheet.empty:
403
+ return True
404
+ # If we dont have any default column name -> read column index
405
+ # -> Useful if first row is composed of name of node
406
+ # -> Need to get rid of Unamed cols
407
+ # -> Then if nodes are mentionned in more than one column, panda add a '.x' (x a number)
408
+ # at the end of the node name, so we need to get rid of that too...
409
+ if default_columns_names is None:
410
+ default_columns_names = []
411
+ for _ in sheet.columns:
412
+ if isinstance(_, str):
413
+ if (re.fullmatch('Unnamed:.*', _) is None):
414
+ end_ = re.search('([.][0-9]+)\Z', _) # noqa: W605
415
+ if end_ is not None:
416
+ default_columns_names.append(_[:-len(end_[0])])
417
+ else:
418
+ default_columns_names.append(_)
419
+ # Need to reindex sheet to use enumerated correctly index and columns
420
+ sheet = sheet.reset_index(drop=True)
421
+ sheet = sheet.T.reset_index(drop=True).T
422
+ # ----------------- Initialize starting and ending points
423
+ start_row = 0
424
+ start_col = 0
425
+ index_col = 0 # Column number for index names
426
+ end_row = sheet.shape[0]
427
+ end_col = sheet.shape[1]
428
+ # ---------------- Find starting point
429
+ found_starting_point = False
430
+ for row in range(sheet.shape[0]):
431
+ for col in range(sheet.shape[1]):
432
+ # Check if current val is NaN (empty cell)
433
+ val = sheet.iat[row, col]
434
+ is_nan = (val != val)
435
+ # If not -> Bingo
436
+ found_starting_point = (not is_nan)
437
+ if found_starting_point:
438
+ start_row = row
439
+ start_col = col
440
+ index_col = col
441
+ break
442
+ if found_starting_point:
443
+ break
444
+ # ------------ Check table format with upper left corner
445
+ upper_left_corner = sheet.iloc[start_row:min(start_row+2, end_row), start_col:min(start_col+2, end_col)]
446
+ # Not enought data in given sheet -> stop ?
447
+ if (upper_left_corner.shape[0] < 2):
448
+ # Modify starting row to avoid missing table with only one line
449
+ start_row = max(0, start_row-1)
450
+ upper_left_corner = sheet.iloc[start_row:min(start_row+2, end_row), start_col:min(start_col+2, end_col)]
451
+ if (upper_left_corner.shape[1] < 2):
452
+ # Modify starting col to avoid missing table with only one col
453
+ start_col = max(0, start_col-1)
454
+ index_col = start_col
455
+ upper_left_corner = sheet.iloc[start_row:min(start_row+2, end_row), start_col:min(start_col+2, end_col)]
456
+ if (upper_left_corner.shape[0] < 2) or (upper_left_corner.shape[1] < 2):
457
+ # Ok table does not contain any data
458
+ return True
459
+ # Upper left corner is an isolated value ?
460
+ v1 = upper_left_corner.iloc[0, 1]
461
+ v2 = upper_left_corner.iloc[1, 0]
462
+ if (v1 != v1) and (v2 != v2):
463
+ # Retry but without the isolated value
464
+ sheet_copy = sheet.copy() # copy to be sure that we dont modify original sheet
465
+ sheet_copy.iloc[start_row, start_col] = np.nan
466
+ return _extractTablesFromSheet(sheet_copy, new_sheets, default_columns_names=default_columns_names)
467
+ # First column is an overhead ?
468
+ if (not _isValueAcceptedInMatrixTable(upper_left_corner.iloc[1, 1])):
469
+ # Retry but without the isolated value
470
+ sheet_copy = sheet.copy() # copy to be sure that we dont modify original sheet
471
+ sheet_copy.iloc[start_row, start_col:end_col] = np.nan
472
+ return _extractTablesFromSheet(sheet_copy, new_sheets, default_columns_names=default_columns_names)
473
+ # Check if the content of first row = column names
474
+ columns_names = None
475
+ # Check what upper left corner of table contains
476
+ # In all case : 'val' can be 'x', 'X' or some stringified float value.
477
+ # Case 1 : upper left corner = ['R1', 'val' / NaN]
478
+ # ... ['R2', 'val' / NaN]
479
+ # ... -> 'val' and NaN can be turned as float.
480
+ # Case 2 : upper left corner = ['C1', 'C2']
481
+ # ... ['val' / Nan, 'val' / NaN]
482
+ # ... -> On first row, can not turn columns names as float
483
+ # ... -> On first col, 'val' and NaN can be turned as float
484
+ # Case 3 : upper left corner = ['table name', 'C1' ]
485
+ # ... ['R1' , 'val' / NaN]
486
+ # ... -> On first row, can not turn table name or columns names as float
487
+ # ... -> On first col, No row name can be turned as float
488
+ if _isValueAcceptedInMatrixTable(upper_left_corner.iloc[0, 1]):
489
+ case = 1
490
+ else:
491
+ if _isValueAcceptedInMatrixTable(upper_left_corner.iloc[1, 0]):
492
+ case = 2
493
+ else:
494
+ case = 3
495
+ # Check in which case we are
496
+ if (case == 1):
497
+ # Case 1 -> need to use defaut columns names
498
+ columns_names = default_columns_names
499
+ # Start col is one col on the right, because first col is index names
500
+ start_col = min(start_col+1, end_col)
501
+ # Ending col is easy to find
502
+ end_col = min(start_col + len(columns_names), end_col)
503
+ if (case == 2):
504
+ # Case 2 -> There are columns name on the first row
505
+ columns_names = sheet.astype('str').iloc[start_row, start_col:].to_list()
506
+ # start row is one row below & index col is one col before
507
+ start_row = min(start_row+1, end_row)
508
+ index_col = max(0, index_col-1)
509
+ if (case == 3):
510
+ # Case 3 -> There are columns name on the first row, but starting one col on the right
511
+ columns_names = sheet.astype('str').iloc[start_row, (start_col+1):].to_list()
512
+ # start row is one row below & index col does not change, and start col is one col on the right
513
+ start_row = min(start_row+1, end_row)
514
+ start_col = min(start_col+1, end_col)
515
+ if (case == 2) or (case == 3):
516
+ # Case 2 & 3 : Find ending col
517
+ for [i, col_name] in enumerate(columns_names):
518
+ # Check if current col name is NaN (empty cell)
519
+ is_nan = (col_name != col_name)
520
+ # If nan -> Bingo
521
+ if is_nan:
522
+ end_col = min(start_col + i, end_col)
523
+ columns_names = columns_names[:i]
524
+ break
525
+ # No default column name was provided -> Error
526
+ if columns_names is None:
527
+ return False
528
+ # ------------ Check what first col contains
529
+ index_names = sheet.iloc[start_row:end_row, index_col].to_list()
530
+ # ------------- Find ending row
531
+ for (i, index_name) in enumerate(index_names):
532
+ # Check if current val is NaN (empty cell)
533
+ is_nan = (index_name != index_name)
534
+ # If nan -> Bingo
535
+ if is_nan:
536
+ end_row = min(i + start_row, end_row)
537
+ index_names = index_names[:i]
538
+ break
539
+ # New table
540
+ new_table = sheet.iloc[start_row:end_row, start_col:end_col]
541
+ if len(new_table.columns) != len(columns_names):
542
+ su_trace.logger.error('Could not read ter table')
543
+ return False
544
+ new_table.columns = [_.strip() if (type(_) is str) else _ for _ in columns_names]
545
+ new_table.index = [_.strip() if (type(_) is str) else _ for _ in index_names]
546
+ new_sheets.append(new_table)
547
+ # Find other table if needed
548
+ ok = True
549
+ ok &= _extractTablesFromSheet(
550
+ sheet.iloc[:, end_col:], new_sheets,
551
+ default_columns_names=columns_names) # Upper right missing part of sheet
552
+ ok &= _extractTablesFromSheet(
553
+ sheet.iloc[end_row:, :], new_sheets,
554
+ default_columns_names=columns_names) # Down missing part of sheet
555
+ # TODO revoir découpage des restes de table en recurrence
556
+ return ok
557
+
558
+
559
+ def _isValueAcceptedInMatrixTable(value):
560
+ """
561
+ In Matrix table, accepted values are NaN, Numbers and 'x' or 'X'
562
+
563
+ Parameters
564
+ ----------
565
+ :param value: Value to test
566
+ :type value: Any
567
+
568
+ Returns
569
+ -------
570
+ :return: True if value is Ok, else false
571
+ :rtype: boolean
572
+ """
573
+ # First check if value is a number or NaN
574
+ # by try to convert it to float
575
+ try:
576
+ float(value)
577
+ return True
578
+ except ValueError:
579
+ # If it fails, then it's not NaN or a number
580
+ # but it can be either 'x' or 'X'
581
+ OK_but_not_a_number = '[xX]'
582
+ try:
583
+ if (re.fullmatch(OK_but_not_a_number, str(value)) is not None):
584
+ return True
585
+ except ValueError:
586
+ pass
587
+ return False
588
+
589
+
590
+ def _hasDuplicatedEntry(entries: list):
591
+ """
592
+ """
593
+ duplicates = {}
594
+ for (i, entry) in enumerate(entries):
595
+ if entries.count(entry) > 1:
596
+ if entry not in duplicates.keys():
597
+ duplicates[entry] = []
598
+ duplicates[entry].append(i)
599
+ # duplicates = [entry for entry in entries if entries.count(entry) > 1]
600
+ return (len(duplicates) > 0), duplicates
601
+
602
+
603
+ def _fuseDuplicatedColumns(table: pd.DataFrame, dup_cols: dict):
604
+ # Get current columns names
605
+ new_columns_names = table.columns.to_list()
606
+ new_tables = {}
607
+ # For each duplicated column, get the column name and positions of duplicat
608
+ for (col_name, cols_index) in dup_cols.items():
609
+ # Fuse columns
610
+ new_tables[col_name] = table.loc[:, col_name].apply(lambda row: row.values[0], axis=1)
611
+ # Rename duplicated columns, except the first one
612
+ for (i, col_index) in enumerate(cols_index):
613
+ if i == 0:
614
+ continue
615
+ new_columns_names[col_index] = col_name+'_dup'
616
+ # Set new columns names
617
+ table.columns = new_columns_names
618
+ # Drop and replace
619
+ for (col_name, sub_table) in new_tables.items():
620
+ # Drop the renamed columns (except the first one)
621
+ table.drop(columns=(col_name+'_dup'), inplace=True)
622
+ # Apply the fused data on the remaining column
623
+ table[col_name] = sub_table
624
+
625
+
626
+ def _fuseDuplicatedRows(table: pd.DataFrame, dup_rows: dict):
627
+ # Get current columns names
628
+ new_index_names = table.index.to_list()
629
+ new_tables = {}
630
+ # For each duplicated column, get the column name and positions of duplicat
631
+ for (row_name, rows_index) in dup_rows.items():
632
+ # Fuse columns
633
+ new_tables[row_name] = table.loc[row_name, :].apply(lambda col: col.values[0], axis=0)
634
+ # Rename duplicated columns, except the first one
635
+ for (i, row_index) in enumerate(rows_index):
636
+ if i == 0:
637
+ continue
638
+ new_index_names[row_index] = row_name+'_dup'
639
+ # Set new index names
640
+ table.index = new_index_names
641
+ # Drop and replace
642
+ for (row_name, sub_table) in new_tables.items():
643
+ # Drop the renamed columns (except the first one)
644
+ table.drop(index=(row_name+'_dup'), inplace=True)
645
+ # Apply the fused data on the remaining column
646
+ table.loc[row_name, :] = sub_table
647
+
648
+
649
+ # PUBLIC FUNCTIONS ----------------------------------------------------------------
650
+ def consistantSheetName(
651
+ usr_sheet_name: str,
652
+ sankey: Sankey
653
+ ):
654
+ '''
655
+ Test if the usr_sheet_name is consistent with the allowed sheet list.
656
+
657
+ Parameters
658
+ ----------
659
+ usr_sheet_name : string
660
+ Sheet name to check.
661
+
662
+ Returns
663
+ -------
664
+ string
665
+ Result is empty string if the tested sheet is not consistant.
666
+ Result is the dictionary key corresponding of the allowed list found.
667
+
668
+ Notes
669
+ -----
670
+ - If the usr_sheet_name input is empty ('') the result is a list of
671
+ allowed sheet name as a string.
672
+ - A particular case is taken into account for proxy input file which
673
+ usualy has 3 proxy sheets (and one of them with 'sector' keyword in its name)
674
+ '''
675
+ _, res = _consistantSheetName(usr_sheet_name, sankey.xl_user_converter)
676
+ return res
677
+
678
+
679
+ def consistantColName(
680
+ sheet_name: str,
681
+ prop_col: str,
682
+ sankey: Sankey,
683
+ tags: list = []
684
+ ):
685
+ '''
686
+ Test if the prop_col is consistent with the allowed col list.
687
+
688
+ Parameters
689
+ ----------
690
+ :param sheet_name: Sheet name to check.
691
+ :type sheet_name: string
692
+
693
+ :param prop_cols: Column to find
694
+ :type prop_cols: string
695
+
696
+ :param tags: Tags list to check
697
+ :type tags: list
698
+
699
+ Returns
700
+ -------
701
+ :return:
702
+ If the column corresponds to an entry in the sheetname dictionnary, then result is the corresponding key.
703
+ If the column is a tag column / an additonnal column, the result is the standard format of the column name.
704
+ :rtype: string
705
+ '''
706
+ _, res = _consistantColName(
707
+ sheet_name,
708
+ prop_col,
709
+ sankey.xl_user_converter,
710
+ tags)
711
+ return res
712
+
713
+
714
+ def load_sankey_from_excel_file(
715
+ input_file: str,
716
+ sankey: Sankey,
717
+ do_coherence_checks: bool = False,
718
+ sheet_to_remove_names: list = None,
719
+ ):
720
+ '''
721
+ Main convertor routine. Call dedicated routine depending on input type
722
+ Use global variable 'su_trace' to trace the file processing
723
+
724
+ Parameters
725
+ ----------
726
+ :param input_file: input file name to load (with extension and path)
727
+ :type input_file: string
728
+
729
+ :param sankey: data struct as a Sankey object
730
+ :type sankey: Sankey, modified
731
+
732
+ :param do_coherence_checks: Do we trigger coherence checks on sankey structure ?
733
+ :type do_coherence_checks: bool
734
+
735
+ :param sheet_to_remove_names: List of sheet that will be rewrite or removed when re-export as excel
736
+ :type sheet_to_remove_names: list, modified, optionnal (default=None)
737
+
738
+ Returns
739
+ -------
740
+ :return: (Success ; Error message )
741
+ :rtype: (bool; string)
742
+ '''
743
+ # Read excel input
744
+ excel_file = pd.ExcelFile(input_file)
745
+ # If every went fine, get sheet name
746
+ excel_sheet_names = excel_file.sheet_names
747
+ # keeping sheets_to_show consistent sheets
748
+ necessary_sheet_names = {}
749
+ unconsistant_sheet_names = []
750
+ use_sheet_to_remove_names = True
751
+ if type(sheet_to_remove_names) is not list:
752
+ use_sheet_to_remove_names = False
753
+ for sheet_name in excel_sheet_names:
754
+ # Get sheet reference name for given sheet name
755
+ is_sheet_consistant, sheet_refkey = _consistantSheetName(sheet_name, sankey.xl_user_converter)
756
+ if is_sheet_consistant: # Got the reference name
757
+ if sheet_refkey not in necessary_sheet_names:
758
+ necessary_sheet_names[sheet_refkey] = [sheet_name]
759
+ else:
760
+ necessary_sheet_names[sheet_refkey].append(sheet_name)
761
+ else: # No reference name Found
762
+ unconsistant_sheet_names.append(sheet_name)
763
+ # Check if we got some sheets to process
764
+ if len(necessary_sheet_names.keys()) == 0:
765
+ err_msg = "We didn't find any sheet name as specified in the following table : \n"
766
+ err_msg += _allowedSheetNames()
767
+ return False, err_msg
768
+ # Debug log
769
+ su_trace.logger.debug('Names of excel sheets that will be processed : ')
770
+ [su_trace.logger.debug('- {}'.format(_)) for _ in necessary_sheet_names.values()]
771
+ if len(unconsistant_sheet_names) > 0:
772
+ su_trace.logger.debug('Names of excel sheets that will be ignored : ')
773
+ [su_trace.logger.debug('- {}'.format(_)) for _ in unconsistant_sheet_names]
774
+ if use_sheet_to_remove_names:
775
+ if len(sheet_to_remove_names) > 0:
776
+ su_trace.logger.debug('Names of excel sheets that will be removed : ')
777
+ [su_trace.logger.debug('- {}'.format(_)) for _ in sheet_to_remove_names]
778
+ # Update struct
779
+ return _read_sankey_from_excel_book(
780
+ input_file,
781
+ necessary_sheet_names,
782
+ sankey,
783
+ do_coherence_checks=do_coherence_checks)
784
+
785
+
786
+ def _read_sankey_from_excel_book(
787
+ excel_file_name: str,
788
+ sheet_names: dict,
789
+ sankey: Sankey,
790
+ do_coherence_checks: bool = False
791
+ ):
792
+ """
793
+ Parse all sheets from excel book to create a sankey struct.
794
+
795
+ Parameters
796
+ ----------
797
+ :param excel_book: Dataframe (eqv dict) corresponding to the sheets of the input excel file
798
+ :type excel_book: pd.DataFrame
799
+
800
+ :param sheet_names: input file worksheet dict as [reference sheet name: user sheet name]
801
+ :type sheet_names: dict
802
+
803
+ :param sankey: Sankey struct constructed from input
804
+ :type sankey: Sankey, modified
805
+
806
+ :param do_coherence_checks: Do we trigger coherence checks on sankey structure ?
807
+ :type do_coherence_checks: bool
808
+
809
+ Returns
810
+ -------
811
+ :return: (Success ; Error message )
812
+ :rtype: (bool; string)
813
+ """
814
+ # TODO : useless but I keep it for now
815
+ mfa_dict = {}
816
+ # Verify that we have the minimum number of sheets
817
+ ok, msg = check_sheets_before_reading(sheet_names)
818
+ if not ok:
819
+ return ok, msg
820
+ # First create standardized node type tags if needed
821
+ for _ in (CONST.PRODUCTS_SHEET, CONST.SECTORS_SHEET, CONST.EXCHANGES_SHEET):
822
+ if _ in sheet_names.keys():
823
+ sankey.get_or_create_tagg(
824
+ CONST.NODE_TYPE,
825
+ CONST.TAG_TYPE_NODE,
826
+ ':'.join([
827
+ CONST.NODE_TYPE_PRODUCT,
828
+ CONST.NODE_TYPE_SECTOR,
829
+ CONST.NODE_TYPE_EXCHANGE]))
830
+ break
831
+ # Then check all other TAGS
832
+ if CONST.TAG_SHEET in sheet_names.keys():
833
+ # Read tags
834
+ for tag_sheet_name in sheet_names[CONST.TAG_SHEET]:
835
+ su_trace.logger.info('Reading sheet {}'.format(tag_sheet_name))
836
+ ok, msg = xl_read_tags_sheet(pd.read_excel(excel_file_name, tag_sheet_name), sankey)
837
+ if not ok:
838
+ return ok, "Error on sheet {0} ({1}) : {2}".format(tag_sheet_name, CONST.TAG_SHEET, msg)
839
+ # Log warning messages
840
+ if len(msg) > 0:
841
+ su_trace.logger.error('Warning(s) on sheet {0} ({1}) :'.format(tag_sheet_name, CONST.TAG_SHEET))
842
+ for _ in msg.split('\n'):
843
+ if len(_) > 0:
844
+ su_trace.logger.error(' - {}'.format(_))
845
+ # Then check nodes, but in this order
846
+ options = {}
847
+ options['warn_on_new_nodes'] = False
848
+ options['warn_on_new_flux'] = False
849
+ prev_mfa_entry_name = []
850
+ sheets_processing_order = [
851
+ (CONST.NODES_SHEET, xl_read_nodes_sheet, [CONST.NODES_SHEET, options, sankey]),
852
+ (CONST.PRODUCTS_SHEET, xl_read_products_sectors_sheet, [CONST.PRODUCTS_SHEET, options, sankey]),
853
+ (CONST.SECTORS_SHEET, xl_read_products_sectors_sheet, [CONST.SECTORS_SHEET, options, sankey]),
854
+ (CONST.EXCHANGES_SHEET, xl_read_products_sectors_sheet, [CONST.EXCHANGES_SHEET, options, sankey]),
855
+ (CONST.IO_SHEET, xl_read_input_output_sheet, [options, mfa_dict, sankey]),
856
+ (CONST.TER_SHEET, xl_read_terbase_sheet, [options, mfa_dict, sankey]),
857
+ (CONST.DATA_SHEET, xl_read_data_sheet, [options, sankey]),
858
+ (CONST.IO_DATA_SHEET, xl_read_input_output_data_sheet, [options, mfa_dict, sankey]),
859
+ (CONST.MIN_MAX_SHEET, xl_read_min_max_sheet, [options, sankey]),
860
+ (CONST.CONSTRAINTS_SHEET, xl_read_constraints_sheet, [options, sankey]),
861
+ (CONST.RESULTS_SHEET, xl_read_result_sheet, [sankey]),
862
+ # (CONST.ANALYSIS_SHEET, xl_read_analysis_sheet, [mfa_dict, sankey]),
863
+ (CONST.UNCERTAINTY_SHEET, xl_read_uncertainty_sheet, [mfa_dict, sankey]),
864
+ (CONST.CONVERSIONS_SHEET, xl_read_conversions_sheet, [mfa_dict, sankey])
865
+ ]
866
+ # Process all sheets in correct order if they exist
867
+ for (std_sheet_name, extract_function, args) in sheets_processing_order:
868
+ if std_sheet_name in sheet_names.keys():
869
+ # Warn on new node creation
870
+ if (not options['warn_on_new_nodes']) and (len(prev_mfa_entry_name) > 0):
871
+ options['warn_on_new_nodes'] = \
872
+ (CONST.NODES_SHEET in prev_mfa_entry_name) or \
873
+ (CONST.IO_SHEET in prev_mfa_entry_name) or \
874
+ (CONST.TER_SHEET in prev_mfa_entry_name)
875
+ options['warn_on_new_nodes'] |= \
876
+ (CONST.PRODUCTS_SHEET in prev_mfa_entry_name) and \
877
+ (CONST.SECTORS_SHEET in prev_mfa_entry_name) and \
878
+ (std_sheet_name != CONST.EXCHANGES_SHEET)
879
+ # Warn on new flux creation
880
+ if (not options['warn_on_new_flux']) and (len(prev_mfa_entry_name) > 0):
881
+ options['warn_on_new_flux'] = \
882
+ (CONST.IO_SHEET in prev_mfa_entry_name) or \
883
+ (CONST.TER_SHEET in prev_mfa_entry_name) or \
884
+ (CONST.DATA_SHEET in prev_mfa_entry_name)
885
+ # User sheet name
886
+ for sheet_name in sheet_names[std_sheet_name]:
887
+ # Extract sheet
888
+ excel_sheet = pd.read_excel(excel_file_name, sheet_name)
889
+ # If nothing inside -> continue
890
+ nb_rows = excel_sheet.shape[0]
891
+ if nb_rows < 1:
892
+ continue
893
+ # Parse
894
+ su_trace.logger.info('Reading sheet {}'.format(sheet_name))
895
+ ok, msg = extract_function(excel_sheet, *args)
896
+ if not ok:
897
+ return ok, "Error on sheet {0} ({1}) : {2}".format(sheet_name, std_sheet_name, msg)
898
+ # Log warning messages
899
+ if len(msg) > 0:
900
+ su_trace.logger.error('Warning(s) on sheet {0} ({1}) :'.format(sheet_name, std_sheet_name))
901
+ for _ in msg.split('\n'):
902
+ if len(_) > 0:
903
+ su_trace.logger.error(' - {}'.format(_))
904
+ # Auto-compute missing flux
905
+ if std_sheet_name in [CONST.IO_SHEET, CONST.TER_SHEET, CONST.DATA_SHEET, CONST.RESULTS_SHEET]:
906
+ ok = sankey.autocompute_missing_flux()
907
+ if not ok:
908
+ return False, ''
909
+ # Ok node parsing
910
+ prev_mfa_entry_name.append(std_sheet_name)
911
+ # Synchronize all nodes levels
912
+ sankey.autocompute_nodes_levels()
913
+ # if sankey.has_at_least_one_mat_balance():
914
+ # Compute mat balance
915
+ sankey.autocompute_mat_balance()
916
+ # else:
917
+ # # Recompute mat_balance only if it was specified for at least a node
918
+ # su_trace.logger.info('Matter balance was not specified in entry file, no computing.')
919
+
920
+ # Overall coherence checks
921
+ if do_coherence_checks:
922
+ su_trace.logger.info('Overall coherence checks on Sankey structure')
923
+ ok = sankey.check_overall_sankey_coherence()
924
+ if not ok:
925
+ return False, 'Sankey structure is not coherent. Abort.'
926
+ # End
927
+ return True, ''
928
+
929
+
930
+ def check_sheets_before_reading(sheet_names):
931
+ """
932
+ Verify if there are enough sheets for parsing
933
+
934
+ Parameters
935
+ ----------
936
+ :param sheet_names: input file worksheet dict as [reference sheet name: user sheet name]
937
+ :type sheet_names: dict
938
+
939
+ Returns
940
+ -------
941
+ :return: (Success ; Error message )
942
+ :rtype: (bool; string)
943
+
944
+ """
945
+ # With data sheet, enought data to structure the Sankey
946
+ if CONST.DATA_SHEET in sheet_names.keys():
947
+ return True, 'OK - Data sheet'
948
+ # No data sheet -> Do we have Node sheet ?
949
+ if CONST.NODES_SHEET in sheet_names.keys():
950
+ return True, 'OK - Node sheet'
951
+ # No Node sheet -> Do we have Product & Sector ?
952
+ if (CONST.PRODUCTS_SHEET in sheet_names.keys()) and \
953
+ (CONST.SECTORS_SHEET in sheet_names.keys()):
954
+ return True, 'OK - Products & Sectors sheets'
955
+ # No product & sector sheets -> Do we have IO sheet ?
956
+ if (CONST.IO_SHEET in sheet_names.keys()):
957
+ return True, 'OK - IO sheets'
958
+ # No IO sheet -> Do we have TER sheet
959
+ if CONST.TER_SHEET in sheet_names.keys():
960
+ return True, 'OK - TER sheet'
961
+ # not enough sheets
962
+ err_msg = "Not enough sheets. To create the Sankey, we need at least one of theses sheets: \n"
963
+ err_msg += _allowedSheetNames([CONST.DATA_SHEET, CONST.NODES_SHEET, CONST.IO_SHEET, CONST.TER_SHEET])
964
+ err_msg += "Or all theses sheets instead : \n"
965
+ err_msg += _allowedSheetNames([CONST.PRODUCTS_SHEET, CONST.SECTORS_SHEET])
966
+ return False, err_msg
967
+
968
+
969
+ def xl_read_tags_sheet(
970
+ tags_sheet: dict,
971
+ sankey: Sankey
972
+ ):
973
+ '''
974
+ Read tags sheet.
975
+
976
+ Parameters
977
+ ----------
978
+ :param tags_sheet: Feuille excel à lire
979
+ :type tags_sheet: dict
980
+
981
+ :param sankey: Sankey struct constructed from input
982
+ :type sankey: Sankey, modified
983
+
984
+ Returns
985
+ -------
986
+ :return: (Success ; Error message )
987
+ :rtype: (bool; string)
988
+ '''
989
+ # Keep only the first columns. Clean the remaining empty right columns.
990
+ for i, col in enumerate(tags_sheet.columns): # iterable on columns names
991
+ if 'Unnamed' in col:
992
+ tags_sheet.drop(tags_sheet.columns[i:], inplace=True, axis=1)
993
+ break
994
+ # Standardise les noms de colonne celon le dictionnaire si il fait partie
995
+ # du dictionnaire sinon le recherche aussi dans les nodeTags
996
+ tags_sheet.columns = list(map(lambda x: consistantColName(CONST.TAG_SHEET, x, sankey), tags_sheet.columns))
997
+ # Waiting for these columns
998
+ # Obligatory columns to have in tags sheet, with their default type
999
+ oblig_columns = {CONST.TAG_NAME: '', CONST.TAG_TYPE: '', CONST.TAG_TAGS: ''}
1000
+ # Check if we have at least the obligatory columns
1001
+ ok, err_msg = _checkNeededColumns(tags_sheet.columns, oblig_columns.keys(), CONST.TAG_SHEET)
1002
+ if not ok:
1003
+ return ok, err_msg
1004
+ # Facultative columns we can have, with default value
1005
+ facul_columns = {CONST.TAG_IS_PALETTE: 0, CONST.TAG_COLORMAP: '', CONST.TAG_COLOR: ''}
1006
+ # Check if we need to add facultative columns
1007
+ for facul_column_name in facul_columns.keys():
1008
+ if facul_column_name not in tags_sheet.columns:
1009
+ tags_sheet[facul_column_name] = facul_columns[facul_column_name]
1010
+ # Convert data as specific type
1011
+ ok, msg = _castColumnType(
1012
+ tags_sheet, dict(oblig_columns, **facul_columns),
1013
+ empty_to_default_value=True)
1014
+ if not ok:
1015
+ return ok, msg
1016
+ # Update Sankey
1017
+ return sankey.update_from_tags_table(tags_sheet)
1018
+
1019
+
1020
+ def xl_read_data_sheet(
1021
+ data_sheet: pd.DataFrame,
1022
+ options: dict,
1023
+ sankey: Sankey
1024
+ ):
1025
+ '''
1026
+ Read data sheet.
1027
+
1028
+ Parameters
1029
+ ----------
1030
+ :param data_sheet: Feuille excel à lire
1031
+ :type data_sheet: pd.DataFrame
1032
+
1033
+ :param options: Dictionnary of parsing options
1034
+ :type options: dict
1035
+
1036
+ :param sankey: Sankey struct constructed from input
1037
+ :type sankey: Sankey, modified
1038
+
1039
+ Returns
1040
+ -------
1041
+ :return: (Success code; Error message )
1042
+ :rtype: (int; string)
1043
+ '''
1044
+ # Set column header consitant with specified columns names for data sheet
1045
+ tags = list(sankey.taggs[CONST.TAG_TYPE_FLUX].keys())
1046
+ tags += list(sankey.taggs[CONST.TAG_TYPE_DATA].keys())
1047
+ new_columns_names = list(
1048
+ map(lambda x: consistantColName(CONST.DATA_SHEET, x, sankey, tags),
1049
+ data_sheet.columns))
1050
+ # Waiting for these columns
1051
+ # Obligatory columns to have in tags sheet, with their default type
1052
+ oblig_columns = {
1053
+ CONST.DATA_ORIGIN: '',
1054
+ CONST.DATA_DESTINATION: '',
1055
+ }
1056
+ # Check if we have the mandatory columns (Origin, destination, values)
1057
+ ok, msg = _checkNeededColumns(new_columns_names, list(oblig_columns.keys()), CONST.DATA_SHEET)
1058
+ if not ok:
1059
+ return ok, msg
1060
+ # Ok to Update columns name with consistant names
1061
+ data_sheet.columns = new_columns_names
1062
+ # Facultative columns we can have, with default value
1063
+ facul_columns = {
1064
+ CONST.DATA_VALUE: 0.,
1065
+ CONST.DATA_QUANTITY: 0.0,
1066
+ CONST.DATA_FACTOR: 0.0,
1067
+ CONST.DATA_UNCERT: 0.0}
1068
+ # Convert columns data to default data type or None if Nan
1069
+ ok, msg = _castColumnType(
1070
+ data_sheet, dict(oblig_columns, **facul_columns))
1071
+ if not ok:
1072
+ return ok, msg
1073
+ # Update Sankey
1074
+ return sankey.update_from_data_table(
1075
+ data_sheet,
1076
+ options['warn_on_new_nodes'],
1077
+ options['warn_on_new_flux'])
1078
+
1079
+
1080
+ def xl_read_nodes_sheet(
1081
+ nodes_sheet: dict,
1082
+ mfa_entry_name: str,
1083
+ options: dict,
1084
+ sankey: Sankey
1085
+ ):
1086
+ """
1087
+ Read node sheet.
1088
+
1089
+ Parameters
1090
+ ----------
1091
+ :param nodes_sheet: Excel sheet to read (dataframe)
1092
+ :type nodes_sheet: dict
1093
+
1094
+ :param mfa_entry_name: Type of sheet to parse.
1095
+ :type mfa_entry_name: str
1096
+
1097
+ :param options: Dictionnary of parsing options.
1098
+ :type options: dict
1099
+
1100
+ :param sankey: Sankey struct constructed from input
1101
+ :type sankey: Sankey, modified
1102
+
1103
+ Returns
1104
+ -------
1105
+ :return: (Success ; Error message )
1106
+ :rtype: (bool; string)
1107
+
1108
+ """
1109
+ # Standardise les noms de colonne selon le dictionnaire,
1110
+ # sinon renvoit les noms de colones tels quels
1111
+ tags = list(sankey.taggs[CONST.TAG_TYPE_NODE].keys())
1112
+ tags += list(sankey.taggs[CONST.TAG_TYPE_LEVEL].keys())
1113
+ nodes_sheet.columns = list(
1114
+ map(lambda x: consistantColName(mfa_entry_name, x, sankey, tags),
1115
+ nodes_sheet.columns))
1116
+ # Waiting for these columns
1117
+ # Obligatory columns to have in tags sheet, with their default type
1118
+ oblig_columns = {
1119
+ CONST.NODES_LEVEL: 0,
1120
+ CONST.NODES_NODE: ''}
1121
+ # Check if we have at least the obligatory columns
1122
+ ok, msg = _checkNeededColumns(nodes_sheet.columns, list(oblig_columns.keys()), mfa_entry_name)
1123
+ if not ok:
1124
+ return ok, msg
1125
+ # Facultative columns we can have, wi
1126
+ facul_columns = {
1127
+ CONST.NODES_MAT_BALANCE: 1,
1128
+ CONST.NODES_SANKEY: 1,
1129
+ CONST.NODES_COLOR: '',
1130
+ CONST.NODES_DEFINITIONS: ''}
1131
+ # Convert to int, str, or None if Nan
1132
+ ok, msg = _castColumnType(
1133
+ nodes_sheet, dict(oblig_columns, **facul_columns))
1134
+ if not ok:
1135
+ return ok, msg
1136
+ # Update Sankey
1137
+ return sankey.update_from_nodes_table(
1138
+ nodes_sheet,
1139
+ warn_on_new_nodes=options['warn_on_new_nodes'])
1140
+
1141
+
1142
+ def xl_read_products_sectors_sheet(
1143
+ excel_sheet: dict,
1144
+ mfa_entry_name: str,
1145
+ options: dict,
1146
+ sankey: Sankey
1147
+ ):
1148
+ """
1149
+ Read either Product, Sector or Exchange sheet
1150
+
1151
+ Parameters
1152
+ ----------
1153
+ :param excel_sheet: Excel sheet to read (dataframe)
1154
+ :type excel_sheet: dict
1155
+
1156
+ :param mfa_entry_name: Type of sheet to parse.
1157
+ :type mfa_entry_name: str
1158
+
1159
+ :param options: Dictionnary of parsing options.
1160
+ :type options: dict
1161
+
1162
+ :param sankey: Sankey struct constructed from input
1163
+ :type sankey: Sankey, modified
1164
+
1165
+ Returns
1166
+ -------
1167
+ :return: (Success ; Error message )
1168
+ :rtype: (bool; string)
1169
+ """
1170
+ # Add tag column
1171
+ if mfa_entry_name == CONST.PRODUCTS_SHEET:
1172
+ excel_sheet[CONST.NODE_TYPE] = CONST.NODE_TYPE_PRODUCT
1173
+ elif mfa_entry_name == CONST.SECTORS_SHEET:
1174
+ excel_sheet[CONST.NODE_TYPE] = CONST.NODE_TYPE_SECTOR
1175
+ elif mfa_entry_name == CONST.EXCHANGES_SHEET:
1176
+ excel_sheet[CONST.NODE_TYPE] = CONST.NODE_TYPE_EXCHANGE
1177
+ # Read as node
1178
+ return xl_read_nodes_sheet(
1179
+ excel_sheet,
1180
+ mfa_entry_name,
1181
+ options,
1182
+ sankey)
1183
+
1184
+
1185
+ def xl_read_terbase_sheet(
1186
+ ter_excel_sheet: dict,
1187
+ options: dict,
1188
+ mfa_dict: dict,
1189
+ sankey: Sankey
1190
+ ):
1191
+ """
1192
+ Read TER sheet
1193
+
1194
+ Parameters
1195
+ ----------
1196
+ :param excel_sheet: Excel sheet to read (dataframe)
1197
+ :type excel_sheet: dict
1198
+
1199
+ :param options: Dictionnary of parsing options.
1200
+ :type options: dict
1201
+
1202
+ :param mfa_dict: Data struct for Sankey
1203
+ :type mfa_dict: dict, modified
1204
+
1205
+ :param sankey: Sankey struct constructed from input
1206
+ :type sankey: Sankey, modified
1207
+
1208
+ Returns
1209
+ -------
1210
+ :return: (Success ; Error message )
1211
+ :rtype: (bool; string)
1212
+ """
1213
+ # Extract all tables from sheet
1214
+ tables = []
1215
+ _extractTablesFromSheet(ter_excel_sheet, tables)
1216
+ if len(tables) != 2:
1217
+ err_msg = 'Could not find or extract the necessary two tables, found {}.\n'.format(len(tables))
1218
+ err_msg += 'Are all the tables here and correctly formatted ?'
1219
+ return False, err_msg
1220
+ # Do we have duplicated cols or row
1221
+ for i, table in enumerate(tables):
1222
+ has_dup_cols, dup_cols = _hasDuplicatedEntry(table.columns.to_list())
1223
+ if has_dup_cols:
1224
+ _fuseDuplicatedColumns(table, dup_cols)
1225
+ has_dup_rows, dup_rows = _hasDuplicatedEntry(table.index.to_list())
1226
+ if has_dup_rows:
1227
+ _fuseDuplicatedRows(table, dup_rows)
1228
+ # Do we have the sames columns and rows for each tables
1229
+ has_missing_entry = False
1230
+ msg = ""
1231
+ sets_headers = [(set(table.index.to_list()), set(table.columns.to_list())) for table in tables]
1232
+ for i in range(len(sets_headers) - 1):
1233
+ diff_rows = sets_headers[i][0] - sets_headers[i+1][0]
1234
+ if len(diff_rows) > 0:
1235
+ has_missing_entry = True
1236
+ msg += 'Tables {0} and {1} have incompatibles rows : {2}\n'.format(
1237
+ i, i+1, list(diff_rows))
1238
+ diff_cols = sets_headers[i][1] - sets_headers[i+1][1]
1239
+ if len(diff_cols) > 0:
1240
+ has_missing_entry = True
1241
+ msg += 'Tables {0} and {1} have incompatibles columns : {2}\n'.format(
1242
+ i, i+1, list(diff_cols))
1243
+ if has_missing_entry:
1244
+ return False, msg
1245
+ # Separate tables
1246
+ table_supplies = tables[0] # Define flux Sectors->Products, with Cols=Sectors, Rows=Product
1247
+ table_uses = tables[1] # Define flux Products->Sectors, with Cols=Sectors, Rows=Product
1248
+ # In Sankey struct
1249
+ log = ''
1250
+ ok, msg = sankey.update_from_matrix_table(
1251
+ table_supplies.T.replace({np.nan: None}),
1252
+ warn_on_new_nodes=options['warn_on_new_nodes'],
1253
+ warn_on_new_flux=options['warn_on_new_flux'],
1254
+ tagg_name='Type de noeud',
1255
+ tagg_type=CONST.TAG_TYPE_NODE,
1256
+ tag_name_col=CONST.NODE_TYPE_PRODUCT,
1257
+ tag_name_row=CONST.NODE_TYPE_SECTOR)
1258
+ if not ok:
1259
+ err = 'Could not process supplies table : {}'.format(msg)
1260
+ return ok, msg
1261
+ log += msg
1262
+ ok, msg = sankey.update_from_matrix_table(
1263
+ table_uses.replace({np.nan: None}),
1264
+ warn_on_new_nodes=options['warn_on_new_nodes'],
1265
+ warn_on_new_flux=options['warn_on_new_flux'],
1266
+ tagg_name='Type de noeud',
1267
+ tagg_type=CONST.TAG_TYPE_NODE,
1268
+ tag_name_col=CONST.NODE_TYPE_SECTOR,
1269
+ tag_name_row=CONST.NODE_TYPE_PRODUCT)
1270
+ log += msg
1271
+ if not ok:
1272
+ err = 'Could not process use table : {}'.format(msg)
1273
+ return ok, err
1274
+ # Set MFA dict - Needed for retrocompatibility
1275
+ # Set 'x' and 'X' as 1
1276
+ table_uses.replace({'x': 1}, inplace=True)
1277
+ table_uses.replace({'X': 1}, inplace=True)
1278
+ table_supplies.replace({'x': 1}, inplace=True)
1279
+ table_supplies.replace({'X': 1}, inplace=True)
1280
+ # Default type = int
1281
+ _castColumnType(table_uses, 0, empty_to_default_value=True)
1282
+ _castColumnType(table_supplies, 0, empty_to_default_value=True)
1283
+ # Save in MFA_dict
1284
+ mfa_dict[CONST.TER_SHEET] = {}
1285
+ mfa_dict[CONST.TER_SHEET]['use'] = table_uses
1286
+ mfa_dict[CONST.TER_SHEET]['supply'] = table_supplies
1287
+ return True, log
1288
+
1289
+
1290
+ def xl_read_input_output_sheet(
1291
+ io_excel_sheet: dict,
1292
+ options: dict,
1293
+ mfa_input: dict,
1294
+ sankey: Sankey,
1295
+ read_data_in_matrix=False
1296
+ ):
1297
+ """
1298
+ Read IO sheet
1299
+
1300
+ Parameters
1301
+ ----------
1302
+ :param io_excel_sheet: Excel sheet to read (dataframe)
1303
+ :type io_excel_sheet: dict
1304
+
1305
+ :param options: Dictionnary of parsing options.
1306
+ :type options: dict
1307
+
1308
+ :param mfa_dict: Data struct for Sankey
1309
+ :type mfa_dict: dict, modified
1310
+
1311
+ :param sankey: Sankey struct constructed from input
1312
+ :type sankey: Sankey, modified
1313
+
1314
+ Returns
1315
+ -------
1316
+ :return: (Success ; Error message )
1317
+ :rtype: (bool; string)
1318
+ """
1319
+ # Extract all tables from sheet
1320
+ tables = []
1321
+ _extractTablesFromSheet(io_excel_sheet, tables)
1322
+ if len(tables) != 1:
1323
+ err_msg = 'Did not found the correct amount of tables. Need one table, found {}.'.format(len(tables))
1324
+ if len(tables) == 0:
1325
+ err_msg += '\nIs the table in the given sheet or correctly formatted ?'
1326
+ return False, err_msg
1327
+ io_sheet = tables[0]
1328
+ # Do we have duplicated cols or row
1329
+ has_dup_cols, dup_cols = _hasDuplicatedEntry(io_sheet.columns.to_list())
1330
+ if has_dup_cols:
1331
+ _fuseDuplicatedColumns(io_sheet, dup_cols)
1332
+ has_dup_rows, dup_rows = _hasDuplicatedEntry(io_sheet.index.to_list())
1333
+ if has_dup_rows:
1334
+ _fuseDuplicatedRows(io_sheet, dup_rows)
1335
+ # In Sankey struct
1336
+ ok, msg = sankey.update_from_matrix_table(
1337
+ io_sheet.replace({np.nan: None}),
1338
+ data_in_matrix=read_data_in_matrix,
1339
+ warn_on_new_nodes=options['warn_on_new_nodes'],
1340
+ warn_on_new_flux=options['warn_on_new_flux'])
1341
+ # Update MFA data dict - Needed for retrocompatibility
1342
+ # Set 'x' and 'X' as 1
1343
+ io_sheet.replace({'x': 1}, inplace=True)
1344
+ io_sheet.replace({'X': 1}, inplace=True)
1345
+ # Default type = int
1346
+ _castColumnType(io_sheet, 0, empty_to_default_value=False)
1347
+ # Save in MFA_dict
1348
+ mfa_input[CONST.IO_SHEET] = io_sheet
1349
+ # Output
1350
+ return ok, msg
1351
+
1352
+
1353
+ def xl_read_input_output_data_sheet(
1354
+ io_excel_sheet: dict,
1355
+ options: dict,
1356
+ mfa_input: dict,
1357
+ sankey: Sankey
1358
+ ):
1359
+ """
1360
+ Read IO sheet
1361
+
1362
+ Parameters
1363
+ ----------
1364
+ :param io_excel_sheet: Excel sheet to read (dataframe)
1365
+ :type io_excel_sheet: dict
1366
+
1367
+ :param options: Dictionnary of parsing options.
1368
+ :type options: dict
1369
+
1370
+ :param mfa_dict: Data struct for Sankey
1371
+ :type mfa_dict: dict, modified
1372
+
1373
+ :param sankey: Sankey struct constructed from input
1374
+ :type sankey: Sankey, modified
1375
+
1376
+ Returns
1377
+ -------
1378
+ :return: (Success ; Error message )
1379
+ :rtype: (bool; string)
1380
+ """
1381
+ return xl_read_input_output_sheet(
1382
+ io_excel_sheet,
1383
+ options,
1384
+ mfa_input,
1385
+ sankey,
1386
+ read_data_in_matrix=True)
1387
+
1388
+
1389
+ def xl_read_min_max_sheet(
1390
+ min_max_sheet: pd.DataFrame,
1391
+ options: dict,
1392
+ sankey: Sankey
1393
+ ):
1394
+ """
1395
+ Read CONST.MIN_MAX_SHEET.
1396
+
1397
+ Parameters
1398
+ ----------
1399
+ :param min_max_sheet: Feuille excel à lire
1400
+ :type min_max_sheet: pd.DataFrame
1401
+
1402
+ :param sankey: Sankey struct constructed from input
1403
+ :type sankey: Sankey, modified
1404
+
1405
+ Returns
1406
+ -------
1407
+ :return: (Success ; Error message )
1408
+ :rtype: (bool; string)
1409
+ """
1410
+ # Set column header consitant with tags groups
1411
+ tags = list(sankey.taggs[CONST.TAG_TYPE_FLUX].keys())
1412
+ tags += list(sankey.taggs[CONST.TAG_TYPE_DATA].keys())
1413
+ new_columns_names = list(
1414
+ map(lambda x: consistantColName(CONST.MIN_MAX_SHEET, x, sankey, tags),
1415
+ min_max_sheet.columns))
1416
+ # Waiting for these columns
1417
+ # Obligatory columns to have in tags sheet, with their default type
1418
+ oblig_columns = {
1419
+ CONST.MIN_MAX_ORIGIN: '',
1420
+ CONST.MIN_MAX_DESTINATION: ''}
1421
+ # All columns are here ?
1422
+ ok, msg = _checkNeededColumns(new_columns_names, list(oblig_columns.keys()), CONST.MIN_MAX_SHEET)
1423
+ if not ok:
1424
+ return ok, msg
1425
+ # Ok to Update columns name with consistant names
1426
+ min_max_sheet.columns = new_columns_names
1427
+ # Facultative columns we can have, with default value
1428
+ facul_columns = {}
1429
+ for tag in tags:
1430
+ facul_columns[tag] = ''
1431
+ # Convert to int, str, or None if Nan
1432
+ ok, msg = _castColumnType(
1433
+ min_max_sheet, dict(oblig_columns, **facul_columns))
1434
+ if not ok:
1435
+ return ok, msg
1436
+ # Update sankey struct
1437
+ ok, msg = sankey.update_from_min_max_table(
1438
+ min_max_sheet,
1439
+ options['warn_on_new_nodes'],
1440
+ options['warn_on_new_flux'])
1441
+ if not ok:
1442
+ return ok, msg
1443
+ return True, ''
1444
+
1445
+
1446
+ def xl_read_constraints_sheet(
1447
+ constraints_sheet: pd.DataFrame,
1448
+ options: dict,
1449
+ sankey: Sankey
1450
+ ):
1451
+ """
1452
+ Read CONST.CONSTRAINTS_SHEET.
1453
+
1454
+ Parameters
1455
+ ----------
1456
+ :param constraints_sheet: Feuille excel à lire
1457
+ :type constraints_sheet: pd.DataFrame
1458
+
1459
+ :param sankey: Sankey struct constructed from input
1460
+ :type sankey: Sankey, modified
1461
+
1462
+ Returns
1463
+ -------
1464
+ :return: (Success ; Error message )
1465
+ :rtype: (bool; string)
1466
+ """
1467
+ # Set column header consitant with tags groups
1468
+ tags = list(sankey.taggs[CONST.TAG_TYPE_FLUX].keys())
1469
+ tags += list(sankey.taggs[CONST.TAG_TYPE_DATA].keys())
1470
+ new_columns_names = list(
1471
+ map(lambda x: consistantColName(CONST.CONSTRAINTS_SHEET, x, sankey, tags),
1472
+ constraints_sheet.columns))
1473
+ # Waiting for these columns
1474
+ # Obligatory columns to have in tags sheet, with their default type
1475
+ oblig_columns = {
1476
+ CONST.CONSTRAINT_ID: '',
1477
+ CONST.CONSTRAINT_ORIGIN: '',
1478
+ CONST.CONSTRAINT_DESTINATION: ''}
1479
+ onlyone_columns = {
1480
+ CONST.CONSTRAINT_EQ: 0.0,
1481
+ CONST.CONSTRAINT_INEQ_INF: 0.0,
1482
+ CONST.CONSTRAINT_INEQ_SUP: 0.0}
1483
+ # All columns are here ?
1484
+ ok, msg = _checkNeededColumns(
1485
+ new_columns_names,
1486
+ list(oblig_columns.keys()),
1487
+ CONST.CONSTRAINTS_SHEET,
1488
+ list(onlyone_columns.keys()))
1489
+ if not ok:
1490
+ return ok, msg
1491
+ # Ok to Update columns name with consistant names
1492
+ constraints_sheet.columns = new_columns_names
1493
+ # Facultative columns we can have, with default value
1494
+ facul_columns = {}
1495
+ for tag in tags:
1496
+ facul_columns[tag] = ''
1497
+ # Convert columns data to default data type or None if Nan
1498
+ ok, msg = _castColumnType(
1499
+ constraints_sheet, dict(oblig_columns, **onlyone_columns, **facul_columns))
1500
+ if not ok:
1501
+ return ok, msg
1502
+ ok, msg = sankey.update_from_constraints_table(
1503
+ constraints_sheet,
1504
+ options['warn_on_new_nodes'],
1505
+ options['warn_on_new_flux'])
1506
+ if not ok:
1507
+ return ok, msg
1508
+ return True, ''
1509
+
1510
+
1511
+ def xl_read_result_sheet(
1512
+ result_sheet: pd.DataFrame,
1513
+ sankey: Sankey
1514
+ ):
1515
+ '''
1516
+ Read result sheet.
1517
+
1518
+ Parameters
1519
+ ----------
1520
+ :param result_sheet: Feuille excel à lire
1521
+ :type result_sheet: pd.DataFrame
1522
+
1523
+ :param options: Dictionnary of parsing options
1524
+ :type options: dict
1525
+
1526
+ :param sankey: Sankey struct constructed from input
1527
+ :type sankey: Sankey, modified
1528
+
1529
+ Returns
1530
+ -------
1531
+ :return: (Success code; Error message )
1532
+ :rtype: (int; string)
1533
+ '''
1534
+ # Set column header consitant with specified columns names for data sheet
1535
+ tags = list(sankey.taggs[CONST.TAG_TYPE_FLUX].keys())
1536
+ tags += list(sankey.taggs[CONST.TAG_TYPE_DATA].keys())
1537
+ new_columns_names = list(
1538
+ map(lambda x: consistantColName(CONST.RESULTS_SHEET, x, sankey, tags),
1539
+ result_sheet.columns))
1540
+ # Waiting for these columns
1541
+ # Obligatory columns to have in tags sheet, with their default type
1542
+ oblig_columns = {
1543
+ CONST.RESULTS_ORIGIN: '',
1544
+ CONST.RESULTS_DESTINATION: '',
1545
+ CONST.RESULTS_VALUE: 0.}
1546
+ # Check if we have the mandatory columns (Origin, destination, values)
1547
+ ok, msg = _checkNeededColumns(
1548
+ new_columns_names,
1549
+ list(oblig_columns.keys()),
1550
+ CONST.RESULTS_SHEET)
1551
+ if not ok:
1552
+ return ok, msg
1553
+ # Ok to Update columns name with consistant names
1554
+ result_sheet.columns = new_columns_names
1555
+ # Facultative columns we can have, with default value
1556
+ facul_columns = {
1557
+ CONST.RESULTS_FREE_MIN: 0.0,
1558
+ CONST.RESULTS_FREE_MAX: 0.0}
1559
+ # Convert columns data to default data type or None if Nan
1560
+ ok, msg = _castColumnType(
1561
+ result_sheet, dict(oblig_columns, **facul_columns))
1562
+ if not ok:
1563
+ return ok, msg
1564
+ # Update Sankey
1565
+ return sankey.update_from_result_table(result_sheet)
1566
+
1567
+
1568
+ def xl_read_analysis_sheet(
1569
+ analysis_sheet: pd.DataFrame,
1570
+ mfa_dict: dict,
1571
+ sankey: Sankey
1572
+ ):
1573
+ """
1574
+ Read Analysis sheet.
1575
+
1576
+ Parameters
1577
+ ----------
1578
+ :param analysis_sheet: Feuille excel à lire
1579
+ :type analysis_sheet: pd.DataFrame
1580
+
1581
+ :param mfa_dict: MFA data after parsing
1582
+ :type mfa_dict: dict, modified
1583
+
1584
+ :param sankey: Sankey struct constructed from input
1585
+ :type sankey: Sankey, modified
1586
+
1587
+ Returns
1588
+ -------
1589
+ :return: (Success ; Error message )
1590
+ :rtype: (bool; string)
1591
+ """
1592
+ # Set column header consitant with tags groups
1593
+ tags = list(sankey.taggs[CONST.TAG_TYPE_FLUX].keys())
1594
+ tags += list(sankey.taggs[CONST.TAG_TYPE_DATA].keys())
1595
+ new_columns_names = list(
1596
+ map(lambda x: consistantColName(CONST.ANALYSIS_SHEET, x, sankey, tags),
1597
+ analysis_sheet.columns))
1598
+ # Waiting for these columns
1599
+ # Obligatory columns to have in tags sheet, with their default type
1600
+ oblig_columns = {
1601
+ CONST.RESULTS_ORIGIN: '',
1602
+ CONST.RESULTS_DESTINATION: '',
1603
+ CONST.RESULTS_VALUE: 0.0}
1604
+ # All columns are here ?
1605
+ ok, msg = _checkNeededColumns(
1606
+ new_columns_names,
1607
+ list(oblig_columns.keys()),
1608
+ CONST.ANALYSIS_SHEET)
1609
+ if not ok:
1610
+ return ok, msg
1611
+ # Ok to Update columns name with consistant names
1612
+ analysis_sheet.columns = new_columns_names
1613
+ # Facultative columns we can have, with default value
1614
+ facul_columns = {}
1615
+ for tag in tags:
1616
+ facul_columns[tag] = ''
1617
+ # Convert columns data to default data type or None if Nan
1618
+ ok, msg = _castColumnType(
1619
+ analysis_sheet, dict(oblig_columns, **facul_columns))
1620
+ if not ok:
1621
+ return ok, msg
1622
+ # Update Sankey - analysis part
1623
+ ok, msg = sankey.update_from_analysis_table(
1624
+ analysis_sheet)
1625
+ if not ok:
1626
+ return ok, msg
1627
+ # Update MFA data dict
1628
+ mfa_dict[CONST.ANALYSIS_SHEET] = analysis_sheet
1629
+ return True, ''
1630
+
1631
+
1632
+ def xl_read_uncertainty_sheet(
1633
+ uncertainty_sheet: pd.DataFrame,
1634
+ mfa_dict: dict,
1635
+ sankey: Sankey
1636
+ ):
1637
+ """
1638
+ Read UNCERTAINTY SHEET.
1639
+
1640
+ Parameters
1641
+ ----------
1642
+ :param uncertainty_sheet: Feuille excel à lire
1643
+ :type uncertainty_sheet: pd.DataFrame
1644
+
1645
+ :param mfa_dict: MFA data after parsing
1646
+ :type mfa_dict: dict, modified
1647
+
1648
+ :param sankey: Sankey struct constructed from input
1649
+ :type sankey: Sankey, modified
1650
+
1651
+ Returns
1652
+ -------
1653
+ :return: (Success ; Error message )
1654
+ :rtype: (bool; string)
1655
+ """
1656
+ # Filter out empty columns
1657
+ for i, col in enumerate(uncertainty_sheet.columns): # iterable on columns names
1658
+ if 'Unnamed' in col:
1659
+ uncertainty_sheet.drop(uncertainty_sheet.columns[i:], inplace=True, axis=1)
1660
+ break
1661
+ # Set column header consitant with tags groups
1662
+ tags = list(sankey.taggs[CONST.TAG_TYPE_FLUX].keys())
1663
+ tags += list(sankey.taggs[CONST.TAG_TYPE_DATA].keys())
1664
+ new_columns_names = list(
1665
+ map(lambda x: consistantColName(CONST.UNCERTAINTY_SHEET, x, sankey, tags),
1666
+ uncertainty_sheet.columns))
1667
+ # Waiting for these columns
1668
+ # Obligatory columns to have in tags sheet, with their default type
1669
+ oblig_columns = {
1670
+ CONST.UNCERTAINTY_ORIGIN: '',
1671
+ CONST.UNCERTAINTY_DESTINATION: ''}
1672
+ # All columns are here ?
1673
+ ok, msg = _checkNeededColumns(
1674
+ new_columns_names,
1675
+ list(oblig_columns.keys()),
1676
+ CONST.UNCERTAINTY_SHEET)
1677
+ if not ok:
1678
+ return ok, msg
1679
+ # Ok to Update columns name with consistant names
1680
+ uncertainty_sheet.columns = new_columns_names
1681
+ # Facultative columns we can have, with default value and default position in sheet
1682
+ facul_columns = {}
1683
+ facul_column_pos = 2
1684
+ for _ in CONST.UNCERTAINTY_SHEET_COLS:
1685
+ facul_columns['{}'.format(_)] = {'val': 0.0, 'pos': facul_column_pos}
1686
+ facul_column_pos += 1
1687
+ for tag in tags:
1688
+ facul_columns[tag] = {'val': '', 'pos': facul_column_pos}
1689
+ facul_column_pos += 1
1690
+ # Check if we need to add facultative columns
1691
+ for facul_column_name, facul_column in facul_columns.items():
1692
+ if facul_column_name not in uncertainty_sheet.columns:
1693
+ uncertainty_sheet.insert(
1694
+ facul_column['pos'], facul_column_name, facul_column['val'])
1695
+ # Convert to int, str, or None if Nan
1696
+ ok, msg = _castColumnType(
1697
+ uncertainty_sheet,
1698
+ dict(oblig_columns, **facul_columns),
1699
+ empty_to_default_value=True)
1700
+ if not ok:
1701
+ return ok, msg
1702
+ # Update Sankey - Uncertainty part
1703
+ ok, msg = sankey.update_from_uncertainty_table(
1704
+ uncertainty_sheet)
1705
+ if not ok:
1706
+ return ok, msg
1707
+ mfa_dict[CONST.UNCERTAINTY_SHEET] = uncertainty_sheet
1708
+ return True, ''
1709
+
1710
+
1711
+ def xl_read_conversions_sheet(
1712
+ conversions_sheet: dict,
1713
+ mfa_dict: dict,
1714
+ sankey: Sankey
1715
+ ):
1716
+ """
1717
+ Read CONVERSION SHEET.
1718
+ TODO this sheet must be changed.
1719
+
1720
+ Parameters
1721
+ ----------
1722
+ :param conversions_sheet: Feuille excel à lire
1723
+ :type conversions_sheet: pd.DataFrame
1724
+
1725
+ :param mfa_dict: MFA data after parsing
1726
+ :type mfa_dict: dict, modified
1727
+
1728
+ :param sankey: Sankey struct constructed from input
1729
+ :type sankey: Sankey, modified
1730
+
1731
+ Returns
1732
+ -------
1733
+ :return: (Success ; Error message )
1734
+ :rtype: (bool; string)
1735
+ """
1736
+ # Set column header consitant with tags groups
1737
+ new_columns_names = list(
1738
+ map(lambda x: consistantColName(CONST.CONVERSIONS_SHEET, x, sankey),
1739
+ conversions_sheet.columns))
1740
+ # Waiting for these columns
1741
+ oblig_columns = {
1742
+ CONST.CONVERSIONS_LOCATION: '',
1743
+ CONST.CONVERSIONS_PRODUCT: '',
1744
+ CONST.CONVERSIONS_NATURAL_UNIT: '',
1745
+ CONST.CONVERSIONS_FACTOR: 0.0}
1746
+ # All columns are here ?
1747
+ ok, msg = _checkNeededColumns(new_columns_names, list(oblig_columns.keys()), CONST.CONVERSIONS_SHEET)
1748
+ if not ok:
1749
+ return ok, msg
1750
+ # Ok to Update columns name with consistant names
1751
+ conversions_sheet.columns = new_columns_names
1752
+ # # Facultative columns we can have, with default value
1753
+ # facul_columns = {
1754
+ # CONST.CONVERSIONS_FACTOR_INV: 0.0}
1755
+ # # Convert columns data to default data type or None if Nan
1756
+ # ok, msg = _castColumnType(
1757
+ # conversions_sheet.iloc[1:], dict(oblig_columns, **facul_columns))
1758
+ # if not ok:
1759
+ # return ok, msg
1760
+ conversions_sheet.replace({np.nan: None}, inplace=True)
1761
+ # Update Sankey - analysis part
1762
+ nodes = []
1763
+ ok, msg = sankey.update_from_conversions_table(conversions_sheet, nodes)
1764
+ if not ok:
1765
+ return ok, msg
1766
+ # Update MFA data dict
1767
+ nodes2tooltips = {}
1768
+ nodes2units_conv = {}
1769
+ nodes2natural_unit = {}
1770
+ for node in nodes:
1771
+ for localisation in node.unit.keys():
1772
+ name = localisation + '/' + node.name
1773
+ node2tooltips = []
1774
+ node2units_conv = [1.0]
1775
+ for tooltip in sankey.tooltips.keys():
1776
+ if tooltip in node.tooltips.keys():
1777
+ node2tooltips.append(node.tooltips[tooltip].content)
1778
+ else:
1779
+ node2tooltips.append(None)
1780
+ for unit in sankey.units.keys():
1781
+ other_factors = node.get_other_factors(localisation)
1782
+ try:
1783
+ node2units_conv.append(other_factors[unit])
1784
+ except Exception:
1785
+ node2units_conv.append(None)
1786
+ nodes2tooltips[name] = node2tooltips
1787
+ nodes2units_conv[name] = node2units_conv
1788
+ nodes2natural_unit[name] = node.get_natural_unit(localisation)
1789
+ mfa_dict[CONST.CONVERSIONS_SHEET] = {
1790
+ 'tooltip_names': [[name, desc] for name, desc in sankey.tooltips.items()],
1791
+ 'units_names': [[name, desc] for name, desc in sankey.units.items()],
1792
+ 'nodes2tooltips': nodes2tooltips,
1793
+ 'nodes2units_conv': nodes2units_conv,
1794
+ 'nodes2natural_unit': nodes2natural_unit}
1795
+ return True, ''
1796
+
1797
+
1798
+ def write_excel_from_sankey(
1799
+ excel_filename: str,
1800
+ sankey: Sankey,
1801
+ mode: str = 'a',
1802
+ sheets_to_remove__names: list = [],
1803
+ **kwargs
1804
+ ):
1805
+ """
1806
+ _summary_
1807
+
1808
+ Parameters
1809
+ ----------
1810
+ :param excel_filename: Name of Excel file to write
1811
+ :type excel_filename: str
1812
+
1813
+ :param sankey: Sankey structure to write to Excel file
1814
+ :type sankey: Sankey
1815
+
1816
+ Optional parameters
1817
+ -------------------
1818
+ :param mode: Writing mode (see pandas.ExcelWriter for more infos)
1819
+ :type mode: str, optional (defaults to 'a')
1820
+
1821
+ :param sheets_to_remove__names: List of sheets (by name) to remove for Excel file if they are present
1822
+ :type sheets_to_remove__names: list[str, ...], optional (defaults to [])
1823
+
1824
+ Hidden parameters
1825
+ -----------------
1826
+ :param additional_sheets: Dict of tables (pandas.DataFrame) to add in Excel file
1827
+ :type additional_sheets: Dict{str: pandas.DataFrame}
1828
+ """
1829
+ # Post-process function
1830
+ def _post_process_excel_file(
1831
+ excel_file
1832
+ ):
1833
+ # Extract excel book
1834
+ excel = excel_file.book
1835
+ # Remove sheets
1836
+ for sheet_to_remove__name in sheets_to_remove__names:
1837
+ sheets = excel._sheets
1838
+ try:
1839
+ sheet_to_remove__id = sheets.index(excel[sheet_to_remove__name])
1840
+ sheet = sheets.pop(sheet_to_remove__id)
1841
+ except Exception:
1842
+ pass
1843
+ # Read-me sheet must always be the first sheet
1844
+ try:
1845
+ read_me_sheet__id = excel.worksheets.index(excel['READ ME'])
1846
+ sheet = sheets.pop(read_me_sheet__id)
1847
+ sheets.insert(0, sheet)
1848
+ except Exception:
1849
+ pass
1850
+ # File is open and saved by xlwings to activate the formulas.
1851
+ # if has_xl_wings:
1852
+ # try:
1853
+ # app = xl.App(visible=False)
1854
+ # book = app.books.open(excel_filename)
1855
+ # book.save()
1856
+ # app.kill()
1857
+ # except Exception:
1858
+ # pass
1859
+ # Write sheets from sankey
1860
+ if mode == 'a':
1861
+ with pd.ExcelWriter(excel_filename, engine='openpyxl', mode=mode, if_sheet_exists='replace') as excel_file:
1862
+ sankey.write_in_excel_file(excel_file, **kwargs)
1863
+ _post_process_excel_file(excel_file)
1864
+ else:
1865
+ with pd.ExcelWriter(excel_filename, engine='openpyxl', mode=mode) as excel_file:
1866
+ sankey.write_in_excel_file(excel_file, **kwargs)
1867
+ _post_process_excel_file(excel_file)