SankeyExcelParser 1.0.0b0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. SankeyExcelParser/__init__.py +0 -0
  2. SankeyExcelParser/io_excel.py +1867 -0
  3. SankeyExcelParser/io_excel_constants.py +811 -0
  4. SankeyExcelParser/sankey.py +3138 -0
  5. SankeyExcelParser/sankey_utils/__init__.py +0 -0
  6. SankeyExcelParser/sankey_utils/data.py +1118 -0
  7. SankeyExcelParser/sankey_utils/excel_source.py +31 -0
  8. SankeyExcelParser/sankey_utils/flux.py +344 -0
  9. SankeyExcelParser/sankey_utils/functions.py +278 -0
  10. SankeyExcelParser/sankey_utils/node.py +340 -0
  11. SankeyExcelParser/sankey_utils/protos/__init__.py +0 -0
  12. SankeyExcelParser/sankey_utils/protos/flux.py +84 -0
  13. SankeyExcelParser/sankey_utils/protos/node.py +386 -0
  14. SankeyExcelParser/sankey_utils/protos/sankey_object.py +135 -0
  15. SankeyExcelParser/sankey_utils/protos/tag_group.py +95 -0
  16. SankeyExcelParser/sankey_utils/sankey_object.py +165 -0
  17. SankeyExcelParser/sankey_utils/table_object.py +37 -0
  18. SankeyExcelParser/sankey_utils/tag.py +95 -0
  19. SankeyExcelParser/sankey_utils/tag_group.py +206 -0
  20. SankeyExcelParser/su_trace.py +239 -0
  21. SankeyExcelParser/tests/integration/__init__.py +0 -0
  22. SankeyExcelParser/tests/integration/test_base.py +356 -0
  23. SankeyExcelParser/tests/integration/test_run_check_input.py +100 -0
  24. SankeyExcelParser/tests/integration/test_run_conversions.py +96 -0
  25. SankeyExcelParser/tests/integration/test_run_load_input.py +94 -0
  26. SankeyExcelParser/tests/unit/__init__.py +0 -0
  27. SankeyExcelParser-1.0.0b0.data/scripts/run_parse_and_write_excel.py +155 -0
  28. SankeyExcelParser-1.0.0b0.data/scripts/run_parse_excel.py +115 -0
  29. SankeyExcelParser-1.0.0b0.dist-info/METADATA +113 -0
  30. SankeyExcelParser-1.0.0b0.dist-info/RECORD +32 -0
  31. SankeyExcelParser-1.0.0b0.dist-info/WHEEL +5 -0
  32. SankeyExcelParser-1.0.0b0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1867 @@
1
+ """
2
+ This module is dedicated to the conversion from outside format to internal json format.
3
+ Outside formats may be: a workbook (excel), another json file, a database etc...
4
+ Structure and specifications of internal json format are defined in this module. Internal
5
+ json format can take two main forms: one to adress input informations and a second one
6
+ for output communications.
7
+ """
8
+
9
+ # External libs -----------------------------------------------------
10
+ import pandas as pd
11
+ import numpy as np
12
+ import re
13
+
14
+ # Local libs -------------------------------------------------------
15
+ import SankeyExcelParser.io_excel_constants as CONST
16
+ import SankeyExcelParser.su_trace as su_trace
17
+
18
+ # External modules -------------------------------------------------
19
+ from unidecode import unidecode
20
+
21
+ # Local modules -----------------------------------------------------
22
+ from SankeyExcelParser.sankey import Sankey, UserExcelConverter
23
+
24
+ # has_xl_wings = True
25
+ # try:
26
+ # # import xlwings as xl
27
+ # import pythoncom
28
+ # pythoncom.CoInitialize()
29
+ # except Exception:
30
+ # has_xl_wings = False
31
+
32
+
33
+ # Private functions ----------------------------------------------------------------
34
+ def _compareStrings(
35
+ string_in: str,
36
+ string_ref: str,
37
+ strip_input_string=False
38
+ ):
39
+ """
40
+ Uniformize strings for easier comparison.
41
+
42
+ Parameters
43
+ ----------
44
+ :param string_in: String to compare.
45
+ :type string_in: str
46
+
47
+ :param string_ref: Ref string to compare with.
48
+ :type string_ref: str
49
+
50
+ :param strip_input_string: Remove ' ' at start / or end for input string.
51
+ :type strip_input_string: boolean, optionnal (default=False)
52
+
53
+ Returns
54
+ -------
55
+ :return: True if strings mean the same thing, False otherwise
56
+ :rtype: bool
57
+ """
58
+ s1 = string_in.lower()
59
+ s2 = string_ref.lower()
60
+ if strip_input_string:
61
+ s1 = s1.strip()
62
+ return (re.fullmatch(unidecode(s2), unidecode(s1)) is not None)
63
+
64
+
65
+ def _consistantColName(
66
+ sheet_name: str,
67
+ usr_col_name: str,
68
+ xl_names_converter: UserExcelConverter,
69
+ tags: list = []
70
+ ):
71
+ '''
72
+ Test if the usr_col_name is consistent with the allowed col list.
73
+
74
+ Parameters
75
+ ----------
76
+ :param sheet_name: Sheet name to check.
77
+ :type sheet_name: string
78
+
79
+ :param prop_cols: Column to find
80
+ :type prop_cols: string
81
+
82
+ :param tags: Tags list to check
83
+ :type tags: list
84
+
85
+ Returns
86
+ -------
87
+ :return:
88
+ If the column corresponds to an entry in the sheetname dictionnary, then result is the corresponding key.
89
+ If the column is a tag column / an additonnal column, the result is the standard format of the column name.
90
+ :rtype: string
91
+ '''
92
+ # Check if Sheet is about data
93
+ if _compareStrings(sheet_name, 'flux data', strip_input_string=True):
94
+ xl_names_converter.add_new_col(sheet_name, CONST.DATA_SHEET, usr_col_name)
95
+ return True, CONST.DATA_SHEET
96
+ sheet_name_lower = sheet_name.lower()
97
+ usr_col_name_lower = usr_col_name.lower()
98
+ if sheet_name_lower != '' and usr_col_name_lower != '':
99
+ # Is the proposed column a tag column ?
100
+ for tag in tags:
101
+ if _compareStrings(usr_col_name_lower, tag, strip_input_string=True):
102
+ return True, tag
103
+ # Is the proposed column in allowed columns ?
104
+ for std_col_name in CONST.DICT_OF_COLS_NAMES__RE[sheet_name_lower].keys():
105
+ for allowed_col_re in CONST.DICT_OF_COLS_NAMES__RE[sheet_name_lower][std_col_name]:
106
+ if _compareStrings(usr_col_name_lower, allowed_col_re, strip_input_string=True):
107
+ xl_names_converter.add_new_col(sheet_name_lower, std_col_name, usr_col_name)
108
+ return True, std_col_name
109
+ return False, usr_col_name
110
+
111
+
112
+ def _consistantSheetName(
113
+ usr_sheet_name: str,
114
+ xl_names_converter: UserExcelConverter,
115
+ ):
116
+ '''
117
+ Test if the usr_sheet_name is consistent with the allowed sheet list.
118
+
119
+ Parameters
120
+ ----------
121
+ :param usr_sheet_name: Sheet name to check.
122
+ :type usr_sheet_name: string
123
+
124
+ Returns
125
+ -------
126
+ :return:
127
+ - out1: True if tested sheet is consistant.
128
+ - out2: The dictionary key corresponding of the allowed list found, if tested sheet is consitant.
129
+ List of allowed sheet names if not.
130
+ :rtype: (bool, string)
131
+
132
+ Notes
133
+ -----
134
+ - If the usr_sheet_name input is empty ('') the result is a list of
135
+ allowed sheet name as a string.
136
+ - A particular case is taken into account for proxy input file which
137
+ usualy has 3 proxy sheets (and one of them with 'sector' keyword in its name)
138
+ '''
139
+ # Check if Sheet is about data
140
+ if _compareStrings(usr_sheet_name, 'flux data', strip_input_string=True):
141
+ xl_names_converter.add_new_sheet(CONST.DATA_SHEET, usr_sheet_name)
142
+ return True, CONST.DATA_SHEET
143
+ # If we have a sheet to check
144
+ if usr_sheet_name != '':
145
+ # Is sheet in list of possible names for sheets
146
+ for std_sheet_name in CONST.DICT_OF_SHEET_NAMES__RE.keys():
147
+ for allow_sheet_re in CONST.DICT_OF_SHEET_NAMES__RE[std_sheet_name]:
148
+ if _compareStrings(usr_sheet_name, allow_sheet_re, strip_input_string=True):
149
+ xl_names_converter.add_new_sheet(std_sheet_name, usr_sheet_name)
150
+ return True, std_sheet_name
151
+ #  We didn't found the corresponding key
152
+ return False, _allowedSheetNames()
153
+
154
+
155
+ def _allowedSheetNames(sheets_to_show=[]):
156
+ '''
157
+ Return the table of allowed sheet names with respect to their type of informations.
158
+
159
+ Parameters
160
+ ----------
161
+ :param sheets_to_show: list of sheet to print. If list empty, print all.
162
+ :type sheets_to_show: list, optional, default=[]
163
+
164
+ Returns
165
+ -------
166
+ :return:
167
+ Result is empty string if the tested col is not consistant.
168
+ Result is the dictionary key corresponding of the allowed list found.
169
+ :rtype: string
170
+ '''
171
+ wcol1 = 30
172
+ wcol2 = 70
173
+ # Create table header
174
+ list_allowed = '{0: <{w1}} | {1: <{w2}}\n'.format("Sheet type", "Possible sheet names", w1=wcol1, w2=wcol2)
175
+ list_allowed += '-'*(wcol1 + wcol2 + 3) + '\n'
176
+ # Keys to show = table first column
177
+ if len(sheets_to_show) > 0:
178
+ list_dict_keys = [_ for _ in sheets_to_show if _ in CONST.DICT_OF_SHEET_NAMES.keys()]
179
+ else:
180
+ list_dict_keys = CONST.DICT_OF_SHEET_NAMES.keys()
181
+ # Create table
182
+ for dict_key in list_dict_keys:
183
+ list_allowed += '{: <{w}} | '.format(dict_key, w=wcol1)
184
+ if len(CONST.DICT_OF_SHEET_NAMES[dict_key]) != 0:
185
+ list_allowed += ', '.join(set(CONST.DICT_OF_SHEET_NAMES[dict_key]))
186
+ list_allowed += '\n'
187
+ return list_allowed
188
+
189
+
190
+ def _checkNeededColumns(
191
+ columns: list,
192
+ columns_needed: list,
193
+ sheet_name: str,
194
+ columns_needed_onlyone: list = []
195
+ ):
196
+ """_summary_
197
+
198
+ Parameters
199
+ ----------
200
+ :param columns: Current list of columns
201
+ :type columns: list
202
+
203
+ :param columns_needed: List of columns to have
204
+ :type columns_needed: list
205
+
206
+ :param sheet_name: Sheet name from which to check names
207
+ :type sheet_name: str
208
+
209
+ :param columns_needed_onlyone: List of columns in which at least only one is needed
210
+ :type columns_needed_onlyone: list
211
+
212
+ Returns
213
+ -------
214
+ :return: (Success?, Log message)
215
+ :rtype: (bool, str)
216
+ """
217
+ # Check columns need
218
+ for column_needed in columns_needed:
219
+ if not (column_needed in columns):
220
+ err_msg = 'The \"{}\" column is missing '.format(column_needed)
221
+ err_msg += 'or does not have the correct name.\n'
222
+ err_msg += '\n'
223
+ err_msg += '{}\n'.format(CONST.DICT_OF_COMMENTS[sheet_name][column_needed][0])
224
+ err_msg += '\n'
225
+ std_column_names = set(CONST.DICT_OF_COLS_NAMES[sheet_name][column_needed])
226
+ err_msg += 'Acceptable names for this column : {}'.format(
227
+ ', '.join(['\"{}\"'.format(_) for _ in std_column_names]))
228
+ return False, err_msg
229
+ # Check optionnal columns (need_only one)
230
+ if len(columns_needed_onlyone) > 0:
231
+ if (not any(np.in1d(columns_needed_onlyone, columns))):
232
+ err_msg = 'A mandatory column is missing or does not have the correct name.\n'
233
+ err_msg += 'A least one of these columns must be present : {}'.format(
234
+ ', '.join(['\"{}\"'.format(_) for _ in columns_needed_onlyone]))
235
+ return False, err_msg
236
+ return True, ''
237
+
238
+
239
+ def _castColumnType(
240
+ sheet: pd.DataFrame,
241
+ columns_types,
242
+ empty_to_default_value=False
243
+ ):
244
+ """
245
+ Set specific columns values to str.
246
+
247
+ Parameters
248
+ ----------
249
+ :param sheet: Sheet to modify.
250
+ :type sheet: pandas.DataFrame, modified
251
+
252
+ :param columns_types: Dict of column and their default types/values OR any default value.
253
+ :type columns_types: any
254
+
255
+ :param empty_to_default_value: If true, set empty cells with default value, if not, set as None.
256
+ :type: bool
257
+
258
+ Returns
259
+ -------
260
+ :return: (Success? ; Log message)
261
+ :rtype: (bool, str)
262
+ """
263
+ # Filter column to convert / Columns that are in sheet
264
+ if type(columns_types) is dict:
265
+ cols_to_convert = \
266
+ [(_, columns_types[_]) for _ in columns_types.keys() if _ in sheet.columns]
267
+ else:
268
+ cols_to_convert = \
269
+ [(_, columns_types) for _ in sheet.columns]
270
+ # Convert
271
+ for (col, _) in cols_to_convert:
272
+ try:
273
+ # Special type
274
+ if type(_) is dict:
275
+ val = _['val']
276
+ else:
277
+ val = _
278
+ # Convert as string
279
+ if type(val) is str:
280
+ sheet[col] = sheet[col].replace({np.nan: 'None'})
281
+ sheet[col] = sheet[col].astype(str)
282
+ if empty_to_default_value:
283
+ sheet[col] = sheet[col].replace({'None': val})
284
+ else:
285
+ sheet[col] = sheet[col].replace({'None': None})
286
+ # Convert as float
287
+ elif type(val) is float:
288
+ sheet[col] = sheet[col].astype(float)
289
+ if empty_to_default_value:
290
+ sheet[col] = sheet[col].replace({np.nan: val})
291
+ else:
292
+ sheet[col] = sheet[col].replace({np.nan: None})
293
+ # Convert as int
294
+ elif type(val) is int:
295
+ sheet[col] = sheet[col].replace({np.nan: -702313053})
296
+ sheet[col] = sheet[col].astype(int)
297
+ if empty_to_default_value:
298
+ sheet[col] = sheet[col].replace({-702313053: val})
299
+ else:
300
+ sheet[col] = sheet[col].replace({-702313053: None})
301
+ # Convert to other types
302
+ else:
303
+ sheet[col] = sheet[col].astype(type(val))
304
+ except Exception:
305
+ err = 'Column \"{}\" contains values '.format(col)
306
+ err += 'that could not be read as {} values'.format(type(val))
307
+ return False, err
308
+ # Replace remaining empty data with None
309
+ sheet.replace({np.nan: None}, inplace=True)
310
+ return True, ''
311
+
312
+
313
+ def _pd_sorted_col(
314
+ dft: pd.DataFrame,
315
+ lico: list
316
+ ):
317
+ """
318
+ Sort columns order of a dataframe in function of a column list
319
+
320
+ Parameters
321
+ ----------
322
+ :param dft: Input dataframe to sort.
323
+ :type dft: pandas.DataFrame
324
+
325
+ :param lico: Ordered list of columns to have.
326
+ :type lico: list
327
+
328
+ Returns
329
+ -------
330
+ :return: Sorted dataframe.
331
+ :rtype: (bool; string)
332
+
333
+ """
334
+ li_df = list(dft)
335
+ if li_df != lico:
336
+ dftm = pd.DataFrame(columns=lico)
337
+ for col in lico:
338
+ dftm[col] = dft[col]
339
+ return dftm
340
+
341
+
342
+ def _extractTablesFromSheet(
343
+ sheet: pd.DataFrame,
344
+ new_sheets: list,
345
+ default_columns_names=None
346
+ ):
347
+ """
348
+ Extract all tables from an excel sheet.
349
+
350
+ Ex: Extract tables from a sheet like this
351
+
352
+ +----+----+----+----+----+
353
+ | - | - | - | - | - |
354
+ +----+----+----+----+----+
355
+ | - | - | C1 | C2 | C3 |
356
+ +----+----+----+----+----+
357
+ | - | R1 | x | - | x |
358
+ +----+----+----+----+----+
359
+ | - | R2 | x | x | - |
360
+ +----+----+----+----+----+
361
+ | - | - | - | - | - |
362
+ +----+----+----+----+----+
363
+ | - | - | C4 | C5 | C6 |
364
+ +----+----+----+----+----+
365
+ | - | R3 | - | x | x |
366
+ +----+----+----+----+----+
367
+ | - | R4 | x | - | - |
368
+ +----+----+----+----+----+
369
+
370
+ Or like this
371
+
372
+ +----+----+----+----+----+
373
+ | - | - | - | - | - |
374
+ +----+----+----+----+----+
375
+ | - | - | C1 | C2 | C3 |
376
+ +----+----+----+----+----+
377
+ | - | R1 | x | - | x |
378
+ +----+----+----+----+----+
379
+ | - | R2 | x | x | - |
380
+ +----+----+----+----+----+
381
+ | - | - | - | - | - |
382
+ +----+----+----+----+----+
383
+ | - | R3 | - | x | x |
384
+ +----+----+----+----+----+
385
+ | - | R4 | x | - | - |
386
+ +----+----+----+----+----+
387
+
388
+ Parameters
389
+ ----------
390
+ :param sheet: Sheet to parse
391
+ :type sheet: pd.DataFrame
392
+
393
+ :param new_sheets: List of sheets extracted from sheet
394
+ :type new_sheets: list(pd.DataFrame), modified
395
+
396
+ Returns
397
+ -------
398
+ :return: _description_
399
+ :rtype: _type_
400
+ """
401
+ # Nothing to do
402
+ if sheet.empty:
403
+ return True
404
+ # If we dont have any default column name -> read column index
405
+ # -> Useful if first row is composed of name of node
406
+ # -> Need to get rid of Unamed cols
407
+ # -> Then if nodes are mentionned in more than one column, panda add a '.x' (x a number)
408
+ # at the end of the node name, so we need to get rid of that too...
409
+ if default_columns_names is None:
410
+ default_columns_names = []
411
+ for _ in sheet.columns:
412
+ if isinstance(_, str):
413
+ if (re.fullmatch('Unnamed:.*', _) is None):
414
+ end_ = re.search('([.][0-9]+)\Z', _) # noqa: W605
415
+ if end_ is not None:
416
+ default_columns_names.append(_[:-len(end_[0])])
417
+ else:
418
+ default_columns_names.append(_)
419
+ # Need to reindex sheet to use enumerated correctly index and columns
420
+ sheet = sheet.reset_index(drop=True)
421
+ sheet = sheet.T.reset_index(drop=True).T
422
+ # ----------------- Initialize starting and ending points
423
+ start_row = 0
424
+ start_col = 0
425
+ index_col = 0 # Column number for index names
426
+ end_row = sheet.shape[0]
427
+ end_col = sheet.shape[1]
428
+ # ---------------- Find starting point
429
+ found_starting_point = False
430
+ for row in range(sheet.shape[0]):
431
+ for col in range(sheet.shape[1]):
432
+ # Check if current val is NaN (empty cell)
433
+ val = sheet.iat[row, col]
434
+ is_nan = (val != val)
435
+ # If not -> Bingo
436
+ found_starting_point = (not is_nan)
437
+ if found_starting_point:
438
+ start_row = row
439
+ start_col = col
440
+ index_col = col
441
+ break
442
+ if found_starting_point:
443
+ break
444
+ # ------------ Check table format with upper left corner
445
+ upper_left_corner = sheet.iloc[start_row:min(start_row+2, end_row), start_col:min(start_col+2, end_col)]
446
+ # Not enought data in given sheet -> stop ?
447
+ if (upper_left_corner.shape[0] < 2):
448
+ # Modify starting row to avoid missing table with only one line
449
+ start_row = max(0, start_row-1)
450
+ upper_left_corner = sheet.iloc[start_row:min(start_row+2, end_row), start_col:min(start_col+2, end_col)]
451
+ if (upper_left_corner.shape[1] < 2):
452
+ # Modify starting col to avoid missing table with only one col
453
+ start_col = max(0, start_col-1)
454
+ index_col = start_col
455
+ upper_left_corner = sheet.iloc[start_row:min(start_row+2, end_row), start_col:min(start_col+2, end_col)]
456
+ if (upper_left_corner.shape[0] < 2) or (upper_left_corner.shape[1] < 2):
457
+ # Ok table does not contain any data
458
+ return True
459
+ # Upper left corner is an isolated value ?
460
+ v1 = upper_left_corner.iloc[0, 1]
461
+ v2 = upper_left_corner.iloc[1, 0]
462
+ if (v1 != v1) and (v2 != v2):
463
+ # Retry but without the isolated value
464
+ sheet_copy = sheet.copy() # copy to be sure that we dont modify original sheet
465
+ sheet_copy.iloc[start_row, start_col] = np.nan
466
+ return _extractTablesFromSheet(sheet_copy, new_sheets, default_columns_names=default_columns_names)
467
+ # First column is an overhead ?
468
+ if (not _isValueAcceptedInMatrixTable(upper_left_corner.iloc[1, 1])):
469
+ # Retry but without the isolated value
470
+ sheet_copy = sheet.copy() # copy to be sure that we dont modify original sheet
471
+ sheet_copy.iloc[start_row, start_col:end_col] = np.nan
472
+ return _extractTablesFromSheet(sheet_copy, new_sheets, default_columns_names=default_columns_names)
473
+ # Check if the content of first row = column names
474
+ columns_names = None
475
+ # Check what upper left corner of table contains
476
+ # In all case : 'val' can be 'x', 'X' or some stringified float value.
477
+ # Case 1 : upper left corner = ['R1', 'val' / NaN]
478
+ # ... ['R2', 'val' / NaN]
479
+ # ... -> 'val' and NaN can be turned as float.
480
+ # Case 2 : upper left corner = ['C1', 'C2']
481
+ # ... ['val' / Nan, 'val' / NaN]
482
+ # ... -> On first row, can not turn columns names as float
483
+ # ... -> On first col, 'val' and NaN can be turned as float
484
+ # Case 3 : upper left corner = ['table name', 'C1' ]
485
+ # ... ['R1' , 'val' / NaN]
486
+ # ... -> On first row, can not turn table name or columns names as float
487
+ # ... -> On first col, No row name can be turned as float
488
+ if _isValueAcceptedInMatrixTable(upper_left_corner.iloc[0, 1]):
489
+ case = 1
490
+ else:
491
+ if _isValueAcceptedInMatrixTable(upper_left_corner.iloc[1, 0]):
492
+ case = 2
493
+ else:
494
+ case = 3
495
+ # Check in which case we are
496
+ if (case == 1):
497
+ # Case 1 -> need to use defaut columns names
498
+ columns_names = default_columns_names
499
+ # Start col is one col on the right, because first col is index names
500
+ start_col = min(start_col+1, end_col)
501
+ # Ending col is easy to find
502
+ end_col = min(start_col + len(columns_names), end_col)
503
+ if (case == 2):
504
+ # Case 2 -> There are columns name on the first row
505
+ columns_names = sheet.astype('str').iloc[start_row, start_col:].to_list()
506
+ # start row is one row below & index col is one col before
507
+ start_row = min(start_row+1, end_row)
508
+ index_col = max(0, index_col-1)
509
+ if (case == 3):
510
+ # Case 3 -> There are columns name on the first row, but starting one col on the right
511
+ columns_names = sheet.astype('str').iloc[start_row, (start_col+1):].to_list()
512
+ # start row is one row below & index col does not change, and start col is one col on the right
513
+ start_row = min(start_row+1, end_row)
514
+ start_col = min(start_col+1, end_col)
515
+ if (case == 2) or (case == 3):
516
+ # Case 2 & 3 : Find ending col
517
+ for [i, col_name] in enumerate(columns_names):
518
+ # Check if current col name is NaN (empty cell)
519
+ is_nan = (col_name != col_name)
520
+ # If nan -> Bingo
521
+ if is_nan:
522
+ end_col = min(start_col + i, end_col)
523
+ columns_names = columns_names[:i]
524
+ break
525
+ # No default column name was provided -> Error
526
+ if columns_names is None:
527
+ return False
528
+ # ------------ Check what first col contains
529
+ index_names = sheet.iloc[start_row:end_row, index_col].to_list()
530
+ # ------------- Find ending row
531
+ for (i, index_name) in enumerate(index_names):
532
+ # Check if current val is NaN (empty cell)
533
+ is_nan = (index_name != index_name)
534
+ # If nan -> Bingo
535
+ if is_nan:
536
+ end_row = min(i + start_row, end_row)
537
+ index_names = index_names[:i]
538
+ break
539
+ # New table
540
+ new_table = sheet.iloc[start_row:end_row, start_col:end_col]
541
+ if len(new_table.columns) != len(columns_names):
542
+ su_trace.logger.error('Could not read ter table')
543
+ return False
544
+ new_table.columns = [_.strip() if (type(_) is str) else _ for _ in columns_names]
545
+ new_table.index = [_.strip() if (type(_) is str) else _ for _ in index_names]
546
+ new_sheets.append(new_table)
547
+ # Find other table if needed
548
+ ok = True
549
+ ok &= _extractTablesFromSheet(
550
+ sheet.iloc[:, end_col:], new_sheets,
551
+ default_columns_names=columns_names) # Upper right missing part of sheet
552
+ ok &= _extractTablesFromSheet(
553
+ sheet.iloc[end_row:, :], new_sheets,
554
+ default_columns_names=columns_names) # Down missing part of sheet
555
+ # TODO revoir découpage des restes de table en recurrence
556
+ return ok
557
+
558
+
559
+ def _isValueAcceptedInMatrixTable(value):
560
+ """
561
+ In Matrix table, accepted values are NaN, Numbers and 'x' or 'X'
562
+
563
+ Parameters
564
+ ----------
565
+ :param value: Value to test
566
+ :type value: Any
567
+
568
+ Returns
569
+ -------
570
+ :return: True if value is Ok, else false
571
+ :rtype: boolean
572
+ """
573
+ # First check if value is a number or NaN
574
+ # by try to convert it to float
575
+ try:
576
+ float(value)
577
+ return True
578
+ except ValueError:
579
+ # If it fails, then it's not NaN or a number
580
+ # but it can be either 'x' or 'X'
581
+ OK_but_not_a_number = '[xX]'
582
+ try:
583
+ if (re.fullmatch(OK_but_not_a_number, str(value)) is not None):
584
+ return True
585
+ except ValueError:
586
+ pass
587
+ return False
588
+
589
+
590
+ def _hasDuplicatedEntry(entries: list):
591
+ """
592
+ """
593
+ duplicates = {}
594
+ for (i, entry) in enumerate(entries):
595
+ if entries.count(entry) > 1:
596
+ if entry not in duplicates.keys():
597
+ duplicates[entry] = []
598
+ duplicates[entry].append(i)
599
+ # duplicates = [entry for entry in entries if entries.count(entry) > 1]
600
+ return (len(duplicates) > 0), duplicates
601
+
602
+
603
+ def _fuseDuplicatedColumns(table: pd.DataFrame, dup_cols: dict):
604
+ # Get current columns names
605
+ new_columns_names = table.columns.to_list()
606
+ new_tables = {}
607
+ # For each duplicated column, get the column name and positions of duplicat
608
+ for (col_name, cols_index) in dup_cols.items():
609
+ # Fuse columns
610
+ new_tables[col_name] = table.loc[:, col_name].apply(lambda row: row.values[0], axis=1)
611
+ # Rename duplicated columns, except the first one
612
+ for (i, col_index) in enumerate(cols_index):
613
+ if i == 0:
614
+ continue
615
+ new_columns_names[col_index] = col_name+'_dup'
616
+ # Set new columns names
617
+ table.columns = new_columns_names
618
+ # Drop and replace
619
+ for (col_name, sub_table) in new_tables.items():
620
+ # Drop the renamed columns (except the first one)
621
+ table.drop(columns=(col_name+'_dup'), inplace=True)
622
+ # Apply the fused data on the remaining column
623
+ table[col_name] = sub_table
624
+
625
+
626
+ def _fuseDuplicatedRows(table: pd.DataFrame, dup_rows: dict):
627
+ # Get current columns names
628
+ new_index_names = table.index.to_list()
629
+ new_tables = {}
630
+ # For each duplicated column, get the column name and positions of duplicat
631
+ for (row_name, rows_index) in dup_rows.items():
632
+ # Fuse columns
633
+ new_tables[row_name] = table.loc[row_name, :].apply(lambda col: col.values[0], axis=0)
634
+ # Rename duplicated columns, except the first one
635
+ for (i, row_index) in enumerate(rows_index):
636
+ if i == 0:
637
+ continue
638
+ new_index_names[row_index] = row_name+'_dup'
639
+ # Set new index names
640
+ table.index = new_index_names
641
+ # Drop and replace
642
+ for (row_name, sub_table) in new_tables.items():
643
+ # Drop the renamed columns (except the first one)
644
+ table.drop(index=(row_name+'_dup'), inplace=True)
645
+ # Apply the fused data on the remaining column
646
+ table.loc[row_name, :] = sub_table
647
+
648
+
649
+ # PUBLIC FUNCTIONS ----------------------------------------------------------------
650
+ def consistantSheetName(
651
+ usr_sheet_name: str,
652
+ sankey: Sankey
653
+ ):
654
+ '''
655
+ Test if the usr_sheet_name is consistent with the allowed sheet list.
656
+
657
+ Parameters
658
+ ----------
659
+ usr_sheet_name : string
660
+ Sheet name to check.
661
+
662
+ Returns
663
+ -------
664
+ string
665
+ Result is empty string if the tested sheet is not consistant.
666
+ Result is the dictionary key corresponding of the allowed list found.
667
+
668
+ Notes
669
+ -----
670
+ - If the usr_sheet_name input is empty ('') the result is a list of
671
+ allowed sheet name as a string.
672
+ - A particular case is taken into account for proxy input file which
673
+ usualy has 3 proxy sheets (and one of them with 'sector' keyword in its name)
674
+ '''
675
+ _, res = _consistantSheetName(usr_sheet_name, sankey.xl_user_converter)
676
+ return res
677
+
678
+
679
+ def consistantColName(
680
+ sheet_name: str,
681
+ prop_col: str,
682
+ sankey: Sankey,
683
+ tags: list = []
684
+ ):
685
+ '''
686
+ Test if the prop_col is consistent with the allowed col list.
687
+
688
+ Parameters
689
+ ----------
690
+ :param sheet_name: Sheet name to check.
691
+ :type sheet_name: string
692
+
693
+ :param prop_cols: Column to find
694
+ :type prop_cols: string
695
+
696
+ :param tags: Tags list to check
697
+ :type tags: list
698
+
699
+ Returns
700
+ -------
701
+ :return:
702
+ If the column corresponds to an entry in the sheetname dictionnary, then result is the corresponding key.
703
+ If the column is a tag column / an additonnal column, the result is the standard format of the column name.
704
+ :rtype: string
705
+ '''
706
+ _, res = _consistantColName(
707
+ sheet_name,
708
+ prop_col,
709
+ sankey.xl_user_converter,
710
+ tags)
711
+ return res
712
+
713
+
714
+ def load_sankey_from_excel_file(
715
+ input_file: str,
716
+ sankey: Sankey,
717
+ do_coherence_checks: bool = False,
718
+ sheet_to_remove_names: list = None,
719
+ ):
720
+ '''
721
+ Main convertor routine. Call dedicated routine depending on input type
722
+ Use global variable 'su_trace' to trace the file processing
723
+
724
+ Parameters
725
+ ----------
726
+ :param input_file: input file name to load (with extension and path)
727
+ :type input_file: string
728
+
729
+ :param sankey: data struct as a Sankey object
730
+ :type sankey: Sankey, modified
731
+
732
+ :param do_coherence_checks: Do we trigger coherence checks on sankey structure ?
733
+ :type do_coherence_checks: bool
734
+
735
+ :param sheet_to_remove_names: List of sheet that will be rewrite or removed when re-export as excel
736
+ :type sheet_to_remove_names: list, modified, optionnal (default=None)
737
+
738
+ Returns
739
+ -------
740
+ :return: (Success ; Error message )
741
+ :rtype: (bool; string)
742
+ '''
743
+ # Read excel input
744
+ excel_file = pd.ExcelFile(input_file)
745
+ # If every went fine, get sheet name
746
+ excel_sheet_names = excel_file.sheet_names
747
+ # keeping sheets_to_show consistent sheets
748
+ necessary_sheet_names = {}
749
+ unconsistant_sheet_names = []
750
+ use_sheet_to_remove_names = True
751
+ if type(sheet_to_remove_names) is not list:
752
+ use_sheet_to_remove_names = False
753
+ for sheet_name in excel_sheet_names:
754
+ # Get sheet reference name for given sheet name
755
+ is_sheet_consistant, sheet_refkey = _consistantSheetName(sheet_name, sankey.xl_user_converter)
756
+ if is_sheet_consistant: # Got the reference name
757
+ if sheet_refkey not in necessary_sheet_names:
758
+ necessary_sheet_names[sheet_refkey] = [sheet_name]
759
+ else:
760
+ necessary_sheet_names[sheet_refkey].append(sheet_name)
761
+ else: # No reference name Found
762
+ unconsistant_sheet_names.append(sheet_name)
763
+ # Check if we got some sheets to process
764
+ if len(necessary_sheet_names.keys()) == 0:
765
+ err_msg = "We didn't find any sheet name as specified in the following table : \n"
766
+ err_msg += _allowedSheetNames()
767
+ return False, err_msg
768
+ # Debug log
769
+ su_trace.logger.debug('Names of excel sheets that will be processed : ')
770
+ [su_trace.logger.debug('- {}'.format(_)) for _ in necessary_sheet_names.values()]
771
+ if len(unconsistant_sheet_names) > 0:
772
+ su_trace.logger.debug('Names of excel sheets that will be ignored : ')
773
+ [su_trace.logger.debug('- {}'.format(_)) for _ in unconsistant_sheet_names]
774
+ if use_sheet_to_remove_names:
775
+ if len(sheet_to_remove_names) > 0:
776
+ su_trace.logger.debug('Names of excel sheets that will be removed : ')
777
+ [su_trace.logger.debug('- {}'.format(_)) for _ in sheet_to_remove_names]
778
+ # Update struct
779
+ return _read_sankey_from_excel_book(
780
+ input_file,
781
+ necessary_sheet_names,
782
+ sankey,
783
+ do_coherence_checks=do_coherence_checks)
784
+
785
+
786
+ def _read_sankey_from_excel_book(
787
+ excel_file_name: str,
788
+ sheet_names: dict,
789
+ sankey: Sankey,
790
+ do_coherence_checks: bool = False
791
+ ):
792
+ """
793
+ Parse all sheets from excel book to create a sankey struct.
794
+
795
+ Parameters
796
+ ----------
797
+ :param excel_book: Dataframe (eqv dict) corresponding to the sheets of the input excel file
798
+ :type excel_book: pd.DataFrame
799
+
800
+ :param sheet_names: input file worksheet dict as [reference sheet name: user sheet name]
801
+ :type sheet_names: dict
802
+
803
+ :param sankey: Sankey struct constructed from input
804
+ :type sankey: Sankey, modified
805
+
806
+ :param do_coherence_checks: Do we trigger coherence checks on sankey structure ?
807
+ :type do_coherence_checks: bool
808
+
809
+ Returns
810
+ -------
811
+ :return: (Success ; Error message )
812
+ :rtype: (bool; string)
813
+ """
814
+ # TODO : useless but I keep it for now
815
+ mfa_dict = {}
816
+ # Verify that we have the minimum number of sheets
817
+ ok, msg = check_sheets_before_reading(sheet_names)
818
+ if not ok:
819
+ return ok, msg
820
+ # First create standardized node type tags if needed
821
+ for _ in (CONST.PRODUCTS_SHEET, CONST.SECTORS_SHEET, CONST.EXCHANGES_SHEET):
822
+ if _ in sheet_names.keys():
823
+ sankey.get_or_create_tagg(
824
+ CONST.NODE_TYPE,
825
+ CONST.TAG_TYPE_NODE,
826
+ ':'.join([
827
+ CONST.NODE_TYPE_PRODUCT,
828
+ CONST.NODE_TYPE_SECTOR,
829
+ CONST.NODE_TYPE_EXCHANGE]))
830
+ break
831
+ # Then check all other TAGS
832
+ if CONST.TAG_SHEET in sheet_names.keys():
833
+ # Read tags
834
+ for tag_sheet_name in sheet_names[CONST.TAG_SHEET]:
835
+ su_trace.logger.info('Reading sheet {}'.format(tag_sheet_name))
836
+ ok, msg = xl_read_tags_sheet(pd.read_excel(excel_file_name, tag_sheet_name), sankey)
837
+ if not ok:
838
+ return ok, "Error on sheet {0} ({1}) : {2}".format(tag_sheet_name, CONST.TAG_SHEET, msg)
839
+ # Log warning messages
840
+ if len(msg) > 0:
841
+ su_trace.logger.error('Warning(s) on sheet {0} ({1}) :'.format(tag_sheet_name, CONST.TAG_SHEET))
842
+ for _ in msg.split('\n'):
843
+ if len(_) > 0:
844
+ su_trace.logger.error(' - {}'.format(_))
845
+ # Then check nodes, but in this order
846
+ options = {}
847
+ options['warn_on_new_nodes'] = False
848
+ options['warn_on_new_flux'] = False
849
+ prev_mfa_entry_name = []
850
+ sheets_processing_order = [
851
+ (CONST.NODES_SHEET, xl_read_nodes_sheet, [CONST.NODES_SHEET, options, sankey]),
852
+ (CONST.PRODUCTS_SHEET, xl_read_products_sectors_sheet, [CONST.PRODUCTS_SHEET, options, sankey]),
853
+ (CONST.SECTORS_SHEET, xl_read_products_sectors_sheet, [CONST.SECTORS_SHEET, options, sankey]),
854
+ (CONST.EXCHANGES_SHEET, xl_read_products_sectors_sheet, [CONST.EXCHANGES_SHEET, options, sankey]),
855
+ (CONST.IO_SHEET, xl_read_input_output_sheet, [options, mfa_dict, sankey]),
856
+ (CONST.TER_SHEET, xl_read_terbase_sheet, [options, mfa_dict, sankey]),
857
+ (CONST.DATA_SHEET, xl_read_data_sheet, [options, sankey]),
858
+ (CONST.IO_DATA_SHEET, xl_read_input_output_data_sheet, [options, mfa_dict, sankey]),
859
+ (CONST.MIN_MAX_SHEET, xl_read_min_max_sheet, [options, sankey]),
860
+ (CONST.CONSTRAINTS_SHEET, xl_read_constraints_sheet, [options, sankey]),
861
+ (CONST.RESULTS_SHEET, xl_read_result_sheet, [sankey]),
862
+ # (CONST.ANALYSIS_SHEET, xl_read_analysis_sheet, [mfa_dict, sankey]),
863
+ (CONST.UNCERTAINTY_SHEET, xl_read_uncertainty_sheet, [mfa_dict, sankey]),
864
+ (CONST.CONVERSIONS_SHEET, xl_read_conversions_sheet, [mfa_dict, sankey])
865
+ ]
866
+ # Process all sheets in correct order if they exist
867
+ for (std_sheet_name, extract_function, args) in sheets_processing_order:
868
+ if std_sheet_name in sheet_names.keys():
869
+ # Warn on new node creation
870
+ if (not options['warn_on_new_nodes']) and (len(prev_mfa_entry_name) > 0):
871
+ options['warn_on_new_nodes'] = \
872
+ (CONST.NODES_SHEET in prev_mfa_entry_name) or \
873
+ (CONST.IO_SHEET in prev_mfa_entry_name) or \
874
+ (CONST.TER_SHEET in prev_mfa_entry_name)
875
+ options['warn_on_new_nodes'] |= \
876
+ (CONST.PRODUCTS_SHEET in prev_mfa_entry_name) and \
877
+ (CONST.SECTORS_SHEET in prev_mfa_entry_name) and \
878
+ (std_sheet_name != CONST.EXCHANGES_SHEET)
879
+ # Warn on new flux creation
880
+ if (not options['warn_on_new_flux']) and (len(prev_mfa_entry_name) > 0):
881
+ options['warn_on_new_flux'] = \
882
+ (CONST.IO_SHEET in prev_mfa_entry_name) or \
883
+ (CONST.TER_SHEET in prev_mfa_entry_name) or \
884
+ (CONST.DATA_SHEET in prev_mfa_entry_name)
885
+ # User sheet name
886
+ for sheet_name in sheet_names[std_sheet_name]:
887
+ # Extract sheet
888
+ excel_sheet = pd.read_excel(excel_file_name, sheet_name)
889
+ # If nothing inside -> continue
890
+ nb_rows = excel_sheet.shape[0]
891
+ if nb_rows < 1:
892
+ continue
893
+ # Parse
894
+ su_trace.logger.info('Reading sheet {}'.format(sheet_name))
895
+ ok, msg = extract_function(excel_sheet, *args)
896
+ if not ok:
897
+ return ok, "Error on sheet {0} ({1}) : {2}".format(sheet_name, std_sheet_name, msg)
898
+ # Log warning messages
899
+ if len(msg) > 0:
900
+ su_trace.logger.error('Warning(s) on sheet {0} ({1}) :'.format(sheet_name, std_sheet_name))
901
+ for _ in msg.split('\n'):
902
+ if len(_) > 0:
903
+ su_trace.logger.error(' - {}'.format(_))
904
+ # Auto-compute missing flux
905
+ if std_sheet_name in [CONST.IO_SHEET, CONST.TER_SHEET, CONST.DATA_SHEET, CONST.RESULTS_SHEET]:
906
+ ok = sankey.autocompute_missing_flux()
907
+ if not ok:
908
+ return False, ''
909
+ # Ok node parsing
910
+ prev_mfa_entry_name.append(std_sheet_name)
911
+ # Synchronize all nodes levels
912
+ sankey.autocompute_nodes_levels()
913
+ # if sankey.has_at_least_one_mat_balance():
914
+ # Compute mat balance
915
+ sankey.autocompute_mat_balance()
916
+ # else:
917
+ # # Recompute mat_balance only if it was specified for at least a node
918
+ # su_trace.logger.info('Matter balance was not specified in entry file, no computing.')
919
+
920
+ # Overall coherence checks
921
+ if do_coherence_checks:
922
+ su_trace.logger.info('Overall coherence checks on Sankey structure')
923
+ ok = sankey.check_overall_sankey_coherence()
924
+ if not ok:
925
+ return False, 'Sankey structure is not coherent. Abort.'
926
+ # End
927
+ return True, ''
928
+
929
+
930
+ def check_sheets_before_reading(sheet_names):
931
+ """
932
+ Verify if there are enough sheets for parsing
933
+
934
+ Parameters
935
+ ----------
936
+ :param sheet_names: input file worksheet dict as [reference sheet name: user sheet name]
937
+ :type sheet_names: dict
938
+
939
+ Returns
940
+ -------
941
+ :return: (Success ; Error message )
942
+ :rtype: (bool; string)
943
+
944
+ """
945
+ # With data sheet, enought data to structure the Sankey
946
+ if CONST.DATA_SHEET in sheet_names.keys():
947
+ return True, 'OK - Data sheet'
948
+ # No data sheet -> Do we have Node sheet ?
949
+ if CONST.NODES_SHEET in sheet_names.keys():
950
+ return True, 'OK - Node sheet'
951
+ # No Node sheet -> Do we have Product & Sector ?
952
+ if (CONST.PRODUCTS_SHEET in sheet_names.keys()) and \
953
+ (CONST.SECTORS_SHEET in sheet_names.keys()):
954
+ return True, 'OK - Products & Sectors sheets'
955
+ # No product & sector sheets -> Do we have IO sheet ?
956
+ if (CONST.IO_SHEET in sheet_names.keys()):
957
+ return True, 'OK - IO sheets'
958
+ # No IO sheet -> Do we have TER sheet
959
+ if CONST.TER_SHEET in sheet_names.keys():
960
+ return True, 'OK - TER sheet'
961
+ # not enough sheets
962
+ err_msg = "Not enough sheets. To create the Sankey, we need at least one of theses sheets: \n"
963
+ err_msg += _allowedSheetNames([CONST.DATA_SHEET, CONST.NODES_SHEET, CONST.IO_SHEET, CONST.TER_SHEET])
964
+ err_msg += "Or all theses sheets instead : \n"
965
+ err_msg += _allowedSheetNames([CONST.PRODUCTS_SHEET, CONST.SECTORS_SHEET])
966
+ return False, err_msg
967
+
968
+
969
+ def xl_read_tags_sheet(
970
+ tags_sheet: dict,
971
+ sankey: Sankey
972
+ ):
973
+ '''
974
+ Read tags sheet.
975
+
976
+ Parameters
977
+ ----------
978
+ :param tags_sheet: Feuille excel à lire
979
+ :type tags_sheet: dict
980
+
981
+ :param sankey: Sankey struct constructed from input
982
+ :type sankey: Sankey, modified
983
+
984
+ Returns
985
+ -------
986
+ :return: (Success ; Error message )
987
+ :rtype: (bool; string)
988
+ '''
989
+ # Keep only the first columns. Clean the remaining empty right columns.
990
+ for i, col in enumerate(tags_sheet.columns): # iterable on columns names
991
+ if 'Unnamed' in col:
992
+ tags_sheet.drop(tags_sheet.columns[i:], inplace=True, axis=1)
993
+ break
994
+ # Standardise les noms de colonne celon le dictionnaire si il fait partie
995
+ # du dictionnaire sinon le recherche aussi dans les nodeTags
996
+ tags_sheet.columns = list(map(lambda x: consistantColName(CONST.TAG_SHEET, x, sankey), tags_sheet.columns))
997
+ # Waiting for these columns
998
+ # Obligatory columns to have in tags sheet, with their default type
999
+ oblig_columns = {CONST.TAG_NAME: '', CONST.TAG_TYPE: '', CONST.TAG_TAGS: ''}
1000
+ # Check if we have at least the obligatory columns
1001
+ ok, err_msg = _checkNeededColumns(tags_sheet.columns, oblig_columns.keys(), CONST.TAG_SHEET)
1002
+ if not ok:
1003
+ return ok, err_msg
1004
+ # Facultative columns we can have, with default value
1005
+ facul_columns = {CONST.TAG_IS_PALETTE: 0, CONST.TAG_COLORMAP: '', CONST.TAG_COLOR: ''}
1006
+ # Check if we need to add facultative columns
1007
+ for facul_column_name in facul_columns.keys():
1008
+ if facul_column_name not in tags_sheet.columns:
1009
+ tags_sheet[facul_column_name] = facul_columns[facul_column_name]
1010
+ # Convert data as specific type
1011
+ ok, msg = _castColumnType(
1012
+ tags_sheet, dict(oblig_columns, **facul_columns),
1013
+ empty_to_default_value=True)
1014
+ if not ok:
1015
+ return ok, msg
1016
+ # Update Sankey
1017
+ return sankey.update_from_tags_table(tags_sheet)
1018
+
1019
+
1020
+ def xl_read_data_sheet(
1021
+ data_sheet: pd.DataFrame,
1022
+ options: dict,
1023
+ sankey: Sankey
1024
+ ):
1025
+ '''
1026
+ Read data sheet.
1027
+
1028
+ Parameters
1029
+ ----------
1030
+ :param data_sheet: Feuille excel à lire
1031
+ :type data_sheet: pd.DataFrame
1032
+
1033
+ :param options: Dictionnary of parsing options
1034
+ :type options: dict
1035
+
1036
+ :param sankey: Sankey struct constructed from input
1037
+ :type sankey: Sankey, modified
1038
+
1039
+ Returns
1040
+ -------
1041
+ :return: (Success code; Error message )
1042
+ :rtype: (int; string)
1043
+ '''
1044
+ # Set column header consitant with specified columns names for data sheet
1045
+ tags = list(sankey.taggs[CONST.TAG_TYPE_FLUX].keys())
1046
+ tags += list(sankey.taggs[CONST.TAG_TYPE_DATA].keys())
1047
+ new_columns_names = list(
1048
+ map(lambda x: consistantColName(CONST.DATA_SHEET, x, sankey, tags),
1049
+ data_sheet.columns))
1050
+ # Waiting for these columns
1051
+ # Obligatory columns to have in tags sheet, with their default type
1052
+ oblig_columns = {
1053
+ CONST.DATA_ORIGIN: '',
1054
+ CONST.DATA_DESTINATION: '',
1055
+ }
1056
+ # Check if we have the mandatory columns (Origin, destination, values)
1057
+ ok, msg = _checkNeededColumns(new_columns_names, list(oblig_columns.keys()), CONST.DATA_SHEET)
1058
+ if not ok:
1059
+ return ok, msg
1060
+ # Ok to Update columns name with consistant names
1061
+ data_sheet.columns = new_columns_names
1062
+ # Facultative columns we can have, with default value
1063
+ facul_columns = {
1064
+ CONST.DATA_VALUE: 0.,
1065
+ CONST.DATA_QUANTITY: 0.0,
1066
+ CONST.DATA_FACTOR: 0.0,
1067
+ CONST.DATA_UNCERT: 0.0}
1068
+ # Convert columns data to default data type or None if Nan
1069
+ ok, msg = _castColumnType(
1070
+ data_sheet, dict(oblig_columns, **facul_columns))
1071
+ if not ok:
1072
+ return ok, msg
1073
+ # Update Sankey
1074
+ return sankey.update_from_data_table(
1075
+ data_sheet,
1076
+ options['warn_on_new_nodes'],
1077
+ options['warn_on_new_flux'])
1078
+
1079
+
1080
+ def xl_read_nodes_sheet(
1081
+ nodes_sheet: dict,
1082
+ mfa_entry_name: str,
1083
+ options: dict,
1084
+ sankey: Sankey
1085
+ ):
1086
+ """
1087
+ Read node sheet.
1088
+
1089
+ Parameters
1090
+ ----------
1091
+ :param nodes_sheet: Excel sheet to read (dataframe)
1092
+ :type nodes_sheet: dict
1093
+
1094
+ :param mfa_entry_name: Type of sheet to parse.
1095
+ :type mfa_entry_name: str
1096
+
1097
+ :param options: Dictionnary of parsing options.
1098
+ :type options: dict
1099
+
1100
+ :param sankey: Sankey struct constructed from input
1101
+ :type sankey: Sankey, modified
1102
+
1103
+ Returns
1104
+ -------
1105
+ :return: (Success ; Error message )
1106
+ :rtype: (bool; string)
1107
+
1108
+ """
1109
+ # Standardise les noms de colonne selon le dictionnaire,
1110
+ # sinon renvoit les noms de colones tels quels
1111
+ tags = list(sankey.taggs[CONST.TAG_TYPE_NODE].keys())
1112
+ tags += list(sankey.taggs[CONST.TAG_TYPE_LEVEL].keys())
1113
+ nodes_sheet.columns = list(
1114
+ map(lambda x: consistantColName(mfa_entry_name, x, sankey, tags),
1115
+ nodes_sheet.columns))
1116
+ # Waiting for these columns
1117
+ # Obligatory columns to have in tags sheet, with their default type
1118
+ oblig_columns = {
1119
+ CONST.NODES_LEVEL: 0,
1120
+ CONST.NODES_NODE: ''}
1121
+ # Check if we have at least the obligatory columns
1122
+ ok, msg = _checkNeededColumns(nodes_sheet.columns, list(oblig_columns.keys()), mfa_entry_name)
1123
+ if not ok:
1124
+ return ok, msg
1125
+ # Facultative columns we can have, wi
1126
+ facul_columns = {
1127
+ CONST.NODES_MAT_BALANCE: 1,
1128
+ CONST.NODES_SANKEY: 1,
1129
+ CONST.NODES_COLOR: '',
1130
+ CONST.NODES_DEFINITIONS: ''}
1131
+ # Convert to int, str, or None if Nan
1132
+ ok, msg = _castColumnType(
1133
+ nodes_sheet, dict(oblig_columns, **facul_columns))
1134
+ if not ok:
1135
+ return ok, msg
1136
+ # Update Sankey
1137
+ return sankey.update_from_nodes_table(
1138
+ nodes_sheet,
1139
+ warn_on_new_nodes=options['warn_on_new_nodes'])
1140
+
1141
+
1142
+ def xl_read_products_sectors_sheet(
1143
+ excel_sheet: dict,
1144
+ mfa_entry_name: str,
1145
+ options: dict,
1146
+ sankey: Sankey
1147
+ ):
1148
+ """
1149
+ Read either Product, Sector or Exchange sheet
1150
+
1151
+ Parameters
1152
+ ----------
1153
+ :param excel_sheet: Excel sheet to read (dataframe)
1154
+ :type excel_sheet: dict
1155
+
1156
+ :param mfa_entry_name: Type of sheet to parse.
1157
+ :type mfa_entry_name: str
1158
+
1159
+ :param options: Dictionnary of parsing options.
1160
+ :type options: dict
1161
+
1162
+ :param sankey: Sankey struct constructed from input
1163
+ :type sankey: Sankey, modified
1164
+
1165
+ Returns
1166
+ -------
1167
+ :return: (Success ; Error message )
1168
+ :rtype: (bool; string)
1169
+ """
1170
+ # Add tag column
1171
+ if mfa_entry_name == CONST.PRODUCTS_SHEET:
1172
+ excel_sheet[CONST.NODE_TYPE] = CONST.NODE_TYPE_PRODUCT
1173
+ elif mfa_entry_name == CONST.SECTORS_SHEET:
1174
+ excel_sheet[CONST.NODE_TYPE] = CONST.NODE_TYPE_SECTOR
1175
+ elif mfa_entry_name == CONST.EXCHANGES_SHEET:
1176
+ excel_sheet[CONST.NODE_TYPE] = CONST.NODE_TYPE_EXCHANGE
1177
+ # Read as node
1178
+ return xl_read_nodes_sheet(
1179
+ excel_sheet,
1180
+ mfa_entry_name,
1181
+ options,
1182
+ sankey)
1183
+
1184
+
1185
+ def xl_read_terbase_sheet(
1186
+ ter_excel_sheet: dict,
1187
+ options: dict,
1188
+ mfa_dict: dict,
1189
+ sankey: Sankey
1190
+ ):
1191
+ """
1192
+ Read TER sheet
1193
+
1194
+ Parameters
1195
+ ----------
1196
+ :param excel_sheet: Excel sheet to read (dataframe)
1197
+ :type excel_sheet: dict
1198
+
1199
+ :param options: Dictionnary of parsing options.
1200
+ :type options: dict
1201
+
1202
+ :param mfa_dict: Data struct for Sankey
1203
+ :type mfa_dict: dict, modified
1204
+
1205
+ :param sankey: Sankey struct constructed from input
1206
+ :type sankey: Sankey, modified
1207
+
1208
+ Returns
1209
+ -------
1210
+ :return: (Success ; Error message )
1211
+ :rtype: (bool; string)
1212
+ """
1213
+ # Extract all tables from sheet
1214
+ tables = []
1215
+ _extractTablesFromSheet(ter_excel_sheet, tables)
1216
+ if len(tables) != 2:
1217
+ err_msg = 'Could not find or extract the necessary two tables, found {}.\n'.format(len(tables))
1218
+ err_msg += 'Are all the tables here and correctly formatted ?'
1219
+ return False, err_msg
1220
+ # Do we have duplicated cols or row
1221
+ for i, table in enumerate(tables):
1222
+ has_dup_cols, dup_cols = _hasDuplicatedEntry(table.columns.to_list())
1223
+ if has_dup_cols:
1224
+ _fuseDuplicatedColumns(table, dup_cols)
1225
+ has_dup_rows, dup_rows = _hasDuplicatedEntry(table.index.to_list())
1226
+ if has_dup_rows:
1227
+ _fuseDuplicatedRows(table, dup_rows)
1228
+ # Do we have the sames columns and rows for each tables
1229
+ has_missing_entry = False
1230
+ msg = ""
1231
+ sets_headers = [(set(table.index.to_list()), set(table.columns.to_list())) for table in tables]
1232
+ for i in range(len(sets_headers) - 1):
1233
+ diff_rows = sets_headers[i][0] - sets_headers[i+1][0]
1234
+ if len(diff_rows) > 0:
1235
+ has_missing_entry = True
1236
+ msg += 'Tables {0} and {1} have incompatibles rows : {2}\n'.format(
1237
+ i, i+1, list(diff_rows))
1238
+ diff_cols = sets_headers[i][1] - sets_headers[i+1][1]
1239
+ if len(diff_cols) > 0:
1240
+ has_missing_entry = True
1241
+ msg += 'Tables {0} and {1} have incompatibles columns : {2}\n'.format(
1242
+ i, i+1, list(diff_cols))
1243
+ if has_missing_entry:
1244
+ return False, msg
1245
+ # Separate tables
1246
+ table_supplies = tables[0] # Define flux Sectors->Products, with Cols=Sectors, Rows=Product
1247
+ table_uses = tables[1] # Define flux Products->Sectors, with Cols=Sectors, Rows=Product
1248
+ # In Sankey struct
1249
+ log = ''
1250
+ ok, msg = sankey.update_from_matrix_table(
1251
+ table_supplies.T.replace({np.nan: None}),
1252
+ warn_on_new_nodes=options['warn_on_new_nodes'],
1253
+ warn_on_new_flux=options['warn_on_new_flux'],
1254
+ tagg_name='Type de noeud',
1255
+ tagg_type=CONST.TAG_TYPE_NODE,
1256
+ tag_name_col=CONST.NODE_TYPE_PRODUCT,
1257
+ tag_name_row=CONST.NODE_TYPE_SECTOR)
1258
+ if not ok:
1259
+ err = 'Could not process supplies table : {}'.format(msg)
1260
+ return ok, msg
1261
+ log += msg
1262
+ ok, msg = sankey.update_from_matrix_table(
1263
+ table_uses.replace({np.nan: None}),
1264
+ warn_on_new_nodes=options['warn_on_new_nodes'],
1265
+ warn_on_new_flux=options['warn_on_new_flux'],
1266
+ tagg_name='Type de noeud',
1267
+ tagg_type=CONST.TAG_TYPE_NODE,
1268
+ tag_name_col=CONST.NODE_TYPE_SECTOR,
1269
+ tag_name_row=CONST.NODE_TYPE_PRODUCT)
1270
+ log += msg
1271
+ if not ok:
1272
+ err = 'Could not process use table : {}'.format(msg)
1273
+ return ok, err
1274
+ # Set MFA dict - Needed for retrocompatibility
1275
+ # Set 'x' and 'X' as 1
1276
+ table_uses.replace({'x': 1}, inplace=True)
1277
+ table_uses.replace({'X': 1}, inplace=True)
1278
+ table_supplies.replace({'x': 1}, inplace=True)
1279
+ table_supplies.replace({'X': 1}, inplace=True)
1280
+ # Default type = int
1281
+ _castColumnType(table_uses, 0, empty_to_default_value=True)
1282
+ _castColumnType(table_supplies, 0, empty_to_default_value=True)
1283
+ # Save in MFA_dict
1284
+ mfa_dict[CONST.TER_SHEET] = {}
1285
+ mfa_dict[CONST.TER_SHEET]['use'] = table_uses
1286
+ mfa_dict[CONST.TER_SHEET]['supply'] = table_supplies
1287
+ return True, log
1288
+
1289
+
1290
+ def xl_read_input_output_sheet(
1291
+ io_excel_sheet: dict,
1292
+ options: dict,
1293
+ mfa_input: dict,
1294
+ sankey: Sankey,
1295
+ read_data_in_matrix=False
1296
+ ):
1297
+ """
1298
+ Read IO sheet
1299
+
1300
+ Parameters
1301
+ ----------
1302
+ :param io_excel_sheet: Excel sheet to read (dataframe)
1303
+ :type io_excel_sheet: dict
1304
+
1305
+ :param options: Dictionnary of parsing options.
1306
+ :type options: dict
1307
+
1308
+ :param mfa_dict: Data struct for Sankey
1309
+ :type mfa_dict: dict, modified
1310
+
1311
+ :param sankey: Sankey struct constructed from input
1312
+ :type sankey: Sankey, modified
1313
+
1314
+ Returns
1315
+ -------
1316
+ :return: (Success ; Error message )
1317
+ :rtype: (bool; string)
1318
+ """
1319
+ # Extract all tables from sheet
1320
+ tables = []
1321
+ _extractTablesFromSheet(io_excel_sheet, tables)
1322
+ if len(tables) != 1:
1323
+ err_msg = 'Did not found the correct amount of tables. Need one table, found {}.'.format(len(tables))
1324
+ if len(tables) == 0:
1325
+ err_msg += '\nIs the table in the given sheet or correctly formatted ?'
1326
+ return False, err_msg
1327
+ io_sheet = tables[0]
1328
+ # Do we have duplicated cols or row
1329
+ has_dup_cols, dup_cols = _hasDuplicatedEntry(io_sheet.columns.to_list())
1330
+ if has_dup_cols:
1331
+ _fuseDuplicatedColumns(io_sheet, dup_cols)
1332
+ has_dup_rows, dup_rows = _hasDuplicatedEntry(io_sheet.index.to_list())
1333
+ if has_dup_rows:
1334
+ _fuseDuplicatedRows(io_sheet, dup_rows)
1335
+ # In Sankey struct
1336
+ ok, msg = sankey.update_from_matrix_table(
1337
+ io_sheet.replace({np.nan: None}),
1338
+ data_in_matrix=read_data_in_matrix,
1339
+ warn_on_new_nodes=options['warn_on_new_nodes'],
1340
+ warn_on_new_flux=options['warn_on_new_flux'])
1341
+ # Update MFA data dict - Needed for retrocompatibility
1342
+ # Set 'x' and 'X' as 1
1343
+ io_sheet.replace({'x': 1}, inplace=True)
1344
+ io_sheet.replace({'X': 1}, inplace=True)
1345
+ # Default type = int
1346
+ _castColumnType(io_sheet, 0, empty_to_default_value=False)
1347
+ # Save in MFA_dict
1348
+ mfa_input[CONST.IO_SHEET] = io_sheet
1349
+ # Output
1350
+ return ok, msg
1351
+
1352
+
1353
+ def xl_read_input_output_data_sheet(
1354
+ io_excel_sheet: dict,
1355
+ options: dict,
1356
+ mfa_input: dict,
1357
+ sankey: Sankey
1358
+ ):
1359
+ """
1360
+ Read IO sheet
1361
+
1362
+ Parameters
1363
+ ----------
1364
+ :param io_excel_sheet: Excel sheet to read (dataframe)
1365
+ :type io_excel_sheet: dict
1366
+
1367
+ :param options: Dictionnary of parsing options.
1368
+ :type options: dict
1369
+
1370
+ :param mfa_dict: Data struct for Sankey
1371
+ :type mfa_dict: dict, modified
1372
+
1373
+ :param sankey: Sankey struct constructed from input
1374
+ :type sankey: Sankey, modified
1375
+
1376
+ Returns
1377
+ -------
1378
+ :return: (Success ; Error message )
1379
+ :rtype: (bool; string)
1380
+ """
1381
+ return xl_read_input_output_sheet(
1382
+ io_excel_sheet,
1383
+ options,
1384
+ mfa_input,
1385
+ sankey,
1386
+ read_data_in_matrix=True)
1387
+
1388
+
1389
+ def xl_read_min_max_sheet(
1390
+ min_max_sheet: pd.DataFrame,
1391
+ options: dict,
1392
+ sankey: Sankey
1393
+ ):
1394
+ """
1395
+ Read CONST.MIN_MAX_SHEET.
1396
+
1397
+ Parameters
1398
+ ----------
1399
+ :param min_max_sheet: Feuille excel à lire
1400
+ :type min_max_sheet: pd.DataFrame
1401
+
1402
+ :param sankey: Sankey struct constructed from input
1403
+ :type sankey: Sankey, modified
1404
+
1405
+ Returns
1406
+ -------
1407
+ :return: (Success ; Error message )
1408
+ :rtype: (bool; string)
1409
+ """
1410
+ # Set column header consitant with tags groups
1411
+ tags = list(sankey.taggs[CONST.TAG_TYPE_FLUX].keys())
1412
+ tags += list(sankey.taggs[CONST.TAG_TYPE_DATA].keys())
1413
+ new_columns_names = list(
1414
+ map(lambda x: consistantColName(CONST.MIN_MAX_SHEET, x, sankey, tags),
1415
+ min_max_sheet.columns))
1416
+ # Waiting for these columns
1417
+ # Obligatory columns to have in tags sheet, with their default type
1418
+ oblig_columns = {
1419
+ CONST.MIN_MAX_ORIGIN: '',
1420
+ CONST.MIN_MAX_DESTINATION: ''}
1421
+ # All columns are here ?
1422
+ ok, msg = _checkNeededColumns(new_columns_names, list(oblig_columns.keys()), CONST.MIN_MAX_SHEET)
1423
+ if not ok:
1424
+ return ok, msg
1425
+ # Ok to Update columns name with consistant names
1426
+ min_max_sheet.columns = new_columns_names
1427
+ # Facultative columns we can have, with default value
1428
+ facul_columns = {}
1429
+ for tag in tags:
1430
+ facul_columns[tag] = ''
1431
+ # Convert to int, str, or None if Nan
1432
+ ok, msg = _castColumnType(
1433
+ min_max_sheet, dict(oblig_columns, **facul_columns))
1434
+ if not ok:
1435
+ return ok, msg
1436
+ # Update sankey struct
1437
+ ok, msg = sankey.update_from_min_max_table(
1438
+ min_max_sheet,
1439
+ options['warn_on_new_nodes'],
1440
+ options['warn_on_new_flux'])
1441
+ if not ok:
1442
+ return ok, msg
1443
+ return True, ''
1444
+
1445
+
1446
+ def xl_read_constraints_sheet(
1447
+ constraints_sheet: pd.DataFrame,
1448
+ options: dict,
1449
+ sankey: Sankey
1450
+ ):
1451
+ """
1452
+ Read CONST.CONSTRAINTS_SHEET.
1453
+
1454
+ Parameters
1455
+ ----------
1456
+ :param constraints_sheet: Feuille excel à lire
1457
+ :type constraints_sheet: pd.DataFrame
1458
+
1459
+ :param sankey: Sankey struct constructed from input
1460
+ :type sankey: Sankey, modified
1461
+
1462
+ Returns
1463
+ -------
1464
+ :return: (Success ; Error message )
1465
+ :rtype: (bool; string)
1466
+ """
1467
+ # Set column header consitant with tags groups
1468
+ tags = list(sankey.taggs[CONST.TAG_TYPE_FLUX].keys())
1469
+ tags += list(sankey.taggs[CONST.TAG_TYPE_DATA].keys())
1470
+ new_columns_names = list(
1471
+ map(lambda x: consistantColName(CONST.CONSTRAINTS_SHEET, x, sankey, tags),
1472
+ constraints_sheet.columns))
1473
+ # Waiting for these columns
1474
+ # Obligatory columns to have in tags sheet, with their default type
1475
+ oblig_columns = {
1476
+ CONST.CONSTRAINT_ID: '',
1477
+ CONST.CONSTRAINT_ORIGIN: '',
1478
+ CONST.CONSTRAINT_DESTINATION: ''}
1479
+ onlyone_columns = {
1480
+ CONST.CONSTRAINT_EQ: 0.0,
1481
+ CONST.CONSTRAINT_INEQ_INF: 0.0,
1482
+ CONST.CONSTRAINT_INEQ_SUP: 0.0}
1483
+ # All columns are here ?
1484
+ ok, msg = _checkNeededColumns(
1485
+ new_columns_names,
1486
+ list(oblig_columns.keys()),
1487
+ CONST.CONSTRAINTS_SHEET,
1488
+ list(onlyone_columns.keys()))
1489
+ if not ok:
1490
+ return ok, msg
1491
+ # Ok to Update columns name with consistant names
1492
+ constraints_sheet.columns = new_columns_names
1493
+ # Facultative columns we can have, with default value
1494
+ facul_columns = {}
1495
+ for tag in tags:
1496
+ facul_columns[tag] = ''
1497
+ # Convert columns data to default data type or None if Nan
1498
+ ok, msg = _castColumnType(
1499
+ constraints_sheet, dict(oblig_columns, **onlyone_columns, **facul_columns))
1500
+ if not ok:
1501
+ return ok, msg
1502
+ ok, msg = sankey.update_from_constraints_table(
1503
+ constraints_sheet,
1504
+ options['warn_on_new_nodes'],
1505
+ options['warn_on_new_flux'])
1506
+ if not ok:
1507
+ return ok, msg
1508
+ return True, ''
1509
+
1510
+
1511
+ def xl_read_result_sheet(
1512
+ result_sheet: pd.DataFrame,
1513
+ sankey: Sankey
1514
+ ):
1515
+ '''
1516
+ Read result sheet.
1517
+
1518
+ Parameters
1519
+ ----------
1520
+ :param result_sheet: Feuille excel à lire
1521
+ :type result_sheet: pd.DataFrame
1522
+
1523
+ :param options: Dictionnary of parsing options
1524
+ :type options: dict
1525
+
1526
+ :param sankey: Sankey struct constructed from input
1527
+ :type sankey: Sankey, modified
1528
+
1529
+ Returns
1530
+ -------
1531
+ :return: (Success code; Error message )
1532
+ :rtype: (int; string)
1533
+ '''
1534
+ # Set column header consitant with specified columns names for data sheet
1535
+ tags = list(sankey.taggs[CONST.TAG_TYPE_FLUX].keys())
1536
+ tags += list(sankey.taggs[CONST.TAG_TYPE_DATA].keys())
1537
+ new_columns_names = list(
1538
+ map(lambda x: consistantColName(CONST.RESULTS_SHEET, x, sankey, tags),
1539
+ result_sheet.columns))
1540
+ # Waiting for these columns
1541
+ # Obligatory columns to have in tags sheet, with their default type
1542
+ oblig_columns = {
1543
+ CONST.RESULTS_ORIGIN: '',
1544
+ CONST.RESULTS_DESTINATION: '',
1545
+ CONST.RESULTS_VALUE: 0.}
1546
+ # Check if we have the mandatory columns (Origin, destination, values)
1547
+ ok, msg = _checkNeededColumns(
1548
+ new_columns_names,
1549
+ list(oblig_columns.keys()),
1550
+ CONST.RESULTS_SHEET)
1551
+ if not ok:
1552
+ return ok, msg
1553
+ # Ok to Update columns name with consistant names
1554
+ result_sheet.columns = new_columns_names
1555
+ # Facultative columns we can have, with default value
1556
+ facul_columns = {
1557
+ CONST.RESULTS_FREE_MIN: 0.0,
1558
+ CONST.RESULTS_FREE_MAX: 0.0}
1559
+ # Convert columns data to default data type or None if Nan
1560
+ ok, msg = _castColumnType(
1561
+ result_sheet, dict(oblig_columns, **facul_columns))
1562
+ if not ok:
1563
+ return ok, msg
1564
+ # Update Sankey
1565
+ return sankey.update_from_result_table(result_sheet)
1566
+
1567
+
1568
+ def xl_read_analysis_sheet(
1569
+ analysis_sheet: pd.DataFrame,
1570
+ mfa_dict: dict,
1571
+ sankey: Sankey
1572
+ ):
1573
+ """
1574
+ Read Analysis sheet.
1575
+
1576
+ Parameters
1577
+ ----------
1578
+ :param analysis_sheet: Feuille excel à lire
1579
+ :type analysis_sheet: pd.DataFrame
1580
+
1581
+ :param mfa_dict: MFA data after parsing
1582
+ :type mfa_dict: dict, modified
1583
+
1584
+ :param sankey: Sankey struct constructed from input
1585
+ :type sankey: Sankey, modified
1586
+
1587
+ Returns
1588
+ -------
1589
+ :return: (Success ; Error message )
1590
+ :rtype: (bool; string)
1591
+ """
1592
+ # Set column header consitant with tags groups
1593
+ tags = list(sankey.taggs[CONST.TAG_TYPE_FLUX].keys())
1594
+ tags += list(sankey.taggs[CONST.TAG_TYPE_DATA].keys())
1595
+ new_columns_names = list(
1596
+ map(lambda x: consistantColName(CONST.ANALYSIS_SHEET, x, sankey, tags),
1597
+ analysis_sheet.columns))
1598
+ # Waiting for these columns
1599
+ # Obligatory columns to have in tags sheet, with their default type
1600
+ oblig_columns = {
1601
+ CONST.RESULTS_ORIGIN: '',
1602
+ CONST.RESULTS_DESTINATION: '',
1603
+ CONST.RESULTS_VALUE: 0.0}
1604
+ # All columns are here ?
1605
+ ok, msg = _checkNeededColumns(
1606
+ new_columns_names,
1607
+ list(oblig_columns.keys()),
1608
+ CONST.ANALYSIS_SHEET)
1609
+ if not ok:
1610
+ return ok, msg
1611
+ # Ok to Update columns name with consistant names
1612
+ analysis_sheet.columns = new_columns_names
1613
+ # Facultative columns we can have, with default value
1614
+ facul_columns = {}
1615
+ for tag in tags:
1616
+ facul_columns[tag] = ''
1617
+ # Convert columns data to default data type or None if Nan
1618
+ ok, msg = _castColumnType(
1619
+ analysis_sheet, dict(oblig_columns, **facul_columns))
1620
+ if not ok:
1621
+ return ok, msg
1622
+ # Update Sankey - analysis part
1623
+ ok, msg = sankey.update_from_analysis_table(
1624
+ analysis_sheet)
1625
+ if not ok:
1626
+ return ok, msg
1627
+ # Update MFA data dict
1628
+ mfa_dict[CONST.ANALYSIS_SHEET] = analysis_sheet
1629
+ return True, ''
1630
+
1631
+
1632
+ def xl_read_uncertainty_sheet(
1633
+ uncertainty_sheet: pd.DataFrame,
1634
+ mfa_dict: dict,
1635
+ sankey: Sankey
1636
+ ):
1637
+ """
1638
+ Read UNCERTAINTY SHEET.
1639
+
1640
+ Parameters
1641
+ ----------
1642
+ :param uncertainty_sheet: Feuille excel à lire
1643
+ :type uncertainty_sheet: pd.DataFrame
1644
+
1645
+ :param mfa_dict: MFA data after parsing
1646
+ :type mfa_dict: dict, modified
1647
+
1648
+ :param sankey: Sankey struct constructed from input
1649
+ :type sankey: Sankey, modified
1650
+
1651
+ Returns
1652
+ -------
1653
+ :return: (Success ; Error message )
1654
+ :rtype: (bool; string)
1655
+ """
1656
+ # Filter out empty columns
1657
+ for i, col in enumerate(uncertainty_sheet.columns): # iterable on columns names
1658
+ if 'Unnamed' in col:
1659
+ uncertainty_sheet.drop(uncertainty_sheet.columns[i:], inplace=True, axis=1)
1660
+ break
1661
+ # Set column header consitant with tags groups
1662
+ tags = list(sankey.taggs[CONST.TAG_TYPE_FLUX].keys())
1663
+ tags += list(sankey.taggs[CONST.TAG_TYPE_DATA].keys())
1664
+ new_columns_names = list(
1665
+ map(lambda x: consistantColName(CONST.UNCERTAINTY_SHEET, x, sankey, tags),
1666
+ uncertainty_sheet.columns))
1667
+ # Waiting for these columns
1668
+ # Obligatory columns to have in tags sheet, with their default type
1669
+ oblig_columns = {
1670
+ CONST.UNCERTAINTY_ORIGIN: '',
1671
+ CONST.UNCERTAINTY_DESTINATION: ''}
1672
+ # All columns are here ?
1673
+ ok, msg = _checkNeededColumns(
1674
+ new_columns_names,
1675
+ list(oblig_columns.keys()),
1676
+ CONST.UNCERTAINTY_SHEET)
1677
+ if not ok:
1678
+ return ok, msg
1679
+ # Ok to Update columns name with consistant names
1680
+ uncertainty_sheet.columns = new_columns_names
1681
+ # Facultative columns we can have, with default value and default position in sheet
1682
+ facul_columns = {}
1683
+ facul_column_pos = 2
1684
+ for _ in CONST.UNCERTAINTY_SHEET_COLS:
1685
+ facul_columns['{}'.format(_)] = {'val': 0.0, 'pos': facul_column_pos}
1686
+ facul_column_pos += 1
1687
+ for tag in tags:
1688
+ facul_columns[tag] = {'val': '', 'pos': facul_column_pos}
1689
+ facul_column_pos += 1
1690
+ # Check if we need to add facultative columns
1691
+ for facul_column_name, facul_column in facul_columns.items():
1692
+ if facul_column_name not in uncertainty_sheet.columns:
1693
+ uncertainty_sheet.insert(
1694
+ facul_column['pos'], facul_column_name, facul_column['val'])
1695
+ # Convert to int, str, or None if Nan
1696
+ ok, msg = _castColumnType(
1697
+ uncertainty_sheet,
1698
+ dict(oblig_columns, **facul_columns),
1699
+ empty_to_default_value=True)
1700
+ if not ok:
1701
+ return ok, msg
1702
+ # Update Sankey - Uncertainty part
1703
+ ok, msg = sankey.update_from_uncertainty_table(
1704
+ uncertainty_sheet)
1705
+ if not ok:
1706
+ return ok, msg
1707
+ mfa_dict[CONST.UNCERTAINTY_SHEET] = uncertainty_sheet
1708
+ return True, ''
1709
+
1710
+
1711
+ def xl_read_conversions_sheet(
1712
+ conversions_sheet: dict,
1713
+ mfa_dict: dict,
1714
+ sankey: Sankey
1715
+ ):
1716
+ """
1717
+ Read CONVERSION SHEET.
1718
+ TODO this sheet must be changed.
1719
+
1720
+ Parameters
1721
+ ----------
1722
+ :param conversions_sheet: Feuille excel à lire
1723
+ :type conversions_sheet: pd.DataFrame
1724
+
1725
+ :param mfa_dict: MFA data after parsing
1726
+ :type mfa_dict: dict, modified
1727
+
1728
+ :param sankey: Sankey struct constructed from input
1729
+ :type sankey: Sankey, modified
1730
+
1731
+ Returns
1732
+ -------
1733
+ :return: (Success ; Error message )
1734
+ :rtype: (bool; string)
1735
+ """
1736
+ # Set column header consitant with tags groups
1737
+ new_columns_names = list(
1738
+ map(lambda x: consistantColName(CONST.CONVERSIONS_SHEET, x, sankey),
1739
+ conversions_sheet.columns))
1740
+ # Waiting for these columns
1741
+ oblig_columns = {
1742
+ CONST.CONVERSIONS_LOCATION: '',
1743
+ CONST.CONVERSIONS_PRODUCT: '',
1744
+ CONST.CONVERSIONS_NATURAL_UNIT: '',
1745
+ CONST.CONVERSIONS_FACTOR: 0.0}
1746
+ # All columns are here ?
1747
+ ok, msg = _checkNeededColumns(new_columns_names, list(oblig_columns.keys()), CONST.CONVERSIONS_SHEET)
1748
+ if not ok:
1749
+ return ok, msg
1750
+ # Ok to Update columns name with consistant names
1751
+ conversions_sheet.columns = new_columns_names
1752
+ # # Facultative columns we can have, with default value
1753
+ # facul_columns = {
1754
+ # CONST.CONVERSIONS_FACTOR_INV: 0.0}
1755
+ # # Convert columns data to default data type or None if Nan
1756
+ # ok, msg = _castColumnType(
1757
+ # conversions_sheet.iloc[1:], dict(oblig_columns, **facul_columns))
1758
+ # if not ok:
1759
+ # return ok, msg
1760
+ conversions_sheet.replace({np.nan: None}, inplace=True)
1761
+ # Update Sankey - analysis part
1762
+ nodes = []
1763
+ ok, msg = sankey.update_from_conversions_table(conversions_sheet, nodes)
1764
+ if not ok:
1765
+ return ok, msg
1766
+ # Update MFA data dict
1767
+ nodes2tooltips = {}
1768
+ nodes2units_conv = {}
1769
+ nodes2natural_unit = {}
1770
+ for node in nodes:
1771
+ for localisation in node.unit.keys():
1772
+ name = localisation + '/' + node.name
1773
+ node2tooltips = []
1774
+ node2units_conv = [1.0]
1775
+ for tooltip in sankey.tooltips.keys():
1776
+ if tooltip in node.tooltips.keys():
1777
+ node2tooltips.append(node.tooltips[tooltip].content)
1778
+ else:
1779
+ node2tooltips.append(None)
1780
+ for unit in sankey.units.keys():
1781
+ other_factors = node.get_other_factors(localisation)
1782
+ try:
1783
+ node2units_conv.append(other_factors[unit])
1784
+ except Exception:
1785
+ node2units_conv.append(None)
1786
+ nodes2tooltips[name] = node2tooltips
1787
+ nodes2units_conv[name] = node2units_conv
1788
+ nodes2natural_unit[name] = node.get_natural_unit(localisation)
1789
+ mfa_dict[CONST.CONVERSIONS_SHEET] = {
1790
+ 'tooltip_names': [[name, desc] for name, desc in sankey.tooltips.items()],
1791
+ 'units_names': [[name, desc] for name, desc in sankey.units.items()],
1792
+ 'nodes2tooltips': nodes2tooltips,
1793
+ 'nodes2units_conv': nodes2units_conv,
1794
+ 'nodes2natural_unit': nodes2natural_unit}
1795
+ return True, ''
1796
+
1797
+
1798
+ def write_excel_from_sankey(
1799
+ excel_filename: str,
1800
+ sankey: Sankey,
1801
+ mode: str = 'a',
1802
+ sheets_to_remove__names: list = [],
1803
+ **kwargs
1804
+ ):
1805
+ """
1806
+ _summary_
1807
+
1808
+ Parameters
1809
+ ----------
1810
+ :param excel_filename: Name of Excel file to write
1811
+ :type excel_filename: str
1812
+
1813
+ :param sankey: Sankey structure to write to Excel file
1814
+ :type sankey: Sankey
1815
+
1816
+ Optional parameters
1817
+ -------------------
1818
+ :param mode: Writing mode (see pandas.ExcelWriter for more infos)
1819
+ :type mode: str, optional (defaults to 'a')
1820
+
1821
+ :param sheets_to_remove__names: List of sheets (by name) to remove for Excel file if they are present
1822
+ :type sheets_to_remove__names: list[str, ...], optional (defaults to [])
1823
+
1824
+ Hidden parameters
1825
+ -----------------
1826
+ :param additional_sheets: Dict of tables (pandas.DataFrame) to add in Excel file
1827
+ :type additional_sheets: Dict{str: pandas.DataFrame}
1828
+ """
1829
+ # Post-process function
1830
+ def _post_process_excel_file(
1831
+ excel_file
1832
+ ):
1833
+ # Extract excel book
1834
+ excel = excel_file.book
1835
+ # Remove sheets
1836
+ for sheet_to_remove__name in sheets_to_remove__names:
1837
+ sheets = excel._sheets
1838
+ try:
1839
+ sheet_to_remove__id = sheets.index(excel[sheet_to_remove__name])
1840
+ sheet = sheets.pop(sheet_to_remove__id)
1841
+ except Exception:
1842
+ pass
1843
+ # Read-me sheet must always be the first sheet
1844
+ try:
1845
+ read_me_sheet__id = excel.worksheets.index(excel['READ ME'])
1846
+ sheet = sheets.pop(read_me_sheet__id)
1847
+ sheets.insert(0, sheet)
1848
+ except Exception:
1849
+ pass
1850
+ # File is open and saved by xlwings to activate the formulas.
1851
+ # if has_xl_wings:
1852
+ # try:
1853
+ # app = xl.App(visible=False)
1854
+ # book = app.books.open(excel_filename)
1855
+ # book.save()
1856
+ # app.kill()
1857
+ # except Exception:
1858
+ # pass
1859
+ # Write sheets from sankey
1860
+ if mode == 'a':
1861
+ with pd.ExcelWriter(excel_filename, engine='openpyxl', mode=mode, if_sheet_exists='replace') as excel_file:
1862
+ sankey.write_in_excel_file(excel_file, **kwargs)
1863
+ _post_process_excel_file(excel_file)
1864
+ else:
1865
+ with pd.ExcelWriter(excel_filename, engine='openpyxl', mode=mode) as excel_file:
1866
+ sankey.write_in_excel_file(excel_file, **kwargs)
1867
+ _post_process_excel_file(excel_file)