SankeyExcelParser 1.0.0b0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- SankeyExcelParser/__init__.py +0 -0
- SankeyExcelParser/io_excel.py +1867 -0
- SankeyExcelParser/io_excel_constants.py +811 -0
- SankeyExcelParser/sankey.py +3138 -0
- SankeyExcelParser/sankey_utils/__init__.py +0 -0
- SankeyExcelParser/sankey_utils/data.py +1118 -0
- SankeyExcelParser/sankey_utils/excel_source.py +31 -0
- SankeyExcelParser/sankey_utils/flux.py +344 -0
- SankeyExcelParser/sankey_utils/functions.py +278 -0
- SankeyExcelParser/sankey_utils/node.py +340 -0
- SankeyExcelParser/sankey_utils/protos/__init__.py +0 -0
- SankeyExcelParser/sankey_utils/protos/flux.py +84 -0
- SankeyExcelParser/sankey_utils/protos/node.py +386 -0
- SankeyExcelParser/sankey_utils/protos/sankey_object.py +135 -0
- SankeyExcelParser/sankey_utils/protos/tag_group.py +95 -0
- SankeyExcelParser/sankey_utils/sankey_object.py +165 -0
- SankeyExcelParser/sankey_utils/table_object.py +37 -0
- SankeyExcelParser/sankey_utils/tag.py +95 -0
- SankeyExcelParser/sankey_utils/tag_group.py +206 -0
- SankeyExcelParser/su_trace.py +239 -0
- SankeyExcelParser/tests/integration/__init__.py +0 -0
- SankeyExcelParser/tests/integration/test_base.py +356 -0
- SankeyExcelParser/tests/integration/test_run_check_input.py +100 -0
- SankeyExcelParser/tests/integration/test_run_conversions.py +96 -0
- SankeyExcelParser/tests/integration/test_run_load_input.py +94 -0
- SankeyExcelParser/tests/unit/__init__.py +0 -0
- SankeyExcelParser-1.0.0b0.data/scripts/run_parse_and_write_excel.py +155 -0
- SankeyExcelParser-1.0.0b0.data/scripts/run_parse_excel.py +115 -0
- SankeyExcelParser-1.0.0b0.dist-info/METADATA +113 -0
- SankeyExcelParser-1.0.0b0.dist-info/RECORD +32 -0
- SankeyExcelParser-1.0.0b0.dist-info/WHEEL +5 -0
- SankeyExcelParser-1.0.0b0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1867 @@
|
|
1
|
+
"""
|
2
|
+
This module is dedicated to the conversion from outside format to internal json format.
|
3
|
+
Outside formats may be: a workbook (excel), another json file, a database etc...
|
4
|
+
Structure and specifications of internal json format are defined in this module. Internal
|
5
|
+
json format can take two main forms: one to adress input informations and a second one
|
6
|
+
for output communications.
|
7
|
+
"""
|
8
|
+
|
9
|
+
# External libs -----------------------------------------------------
|
10
|
+
import pandas as pd
|
11
|
+
import numpy as np
|
12
|
+
import re
|
13
|
+
|
14
|
+
# Local libs -------------------------------------------------------
|
15
|
+
import SankeyExcelParser.io_excel_constants as CONST
|
16
|
+
import SankeyExcelParser.su_trace as su_trace
|
17
|
+
|
18
|
+
# External modules -------------------------------------------------
|
19
|
+
from unidecode import unidecode
|
20
|
+
|
21
|
+
# Local modules -----------------------------------------------------
|
22
|
+
from SankeyExcelParser.sankey import Sankey, UserExcelConverter
|
23
|
+
|
24
|
+
# has_xl_wings = True
|
25
|
+
# try:
|
26
|
+
# # import xlwings as xl
|
27
|
+
# import pythoncom
|
28
|
+
# pythoncom.CoInitialize()
|
29
|
+
# except Exception:
|
30
|
+
# has_xl_wings = False
|
31
|
+
|
32
|
+
|
33
|
+
# Private functions ----------------------------------------------------------------
|
34
|
+
def _compareStrings(
|
35
|
+
string_in: str,
|
36
|
+
string_ref: str,
|
37
|
+
strip_input_string=False
|
38
|
+
):
|
39
|
+
"""
|
40
|
+
Uniformize strings for easier comparison.
|
41
|
+
|
42
|
+
Parameters
|
43
|
+
----------
|
44
|
+
:param string_in: String to compare.
|
45
|
+
:type string_in: str
|
46
|
+
|
47
|
+
:param string_ref: Ref string to compare with.
|
48
|
+
:type string_ref: str
|
49
|
+
|
50
|
+
:param strip_input_string: Remove ' ' at start / or end for input string.
|
51
|
+
:type strip_input_string: boolean, optionnal (default=False)
|
52
|
+
|
53
|
+
Returns
|
54
|
+
-------
|
55
|
+
:return: True if strings mean the same thing, False otherwise
|
56
|
+
:rtype: bool
|
57
|
+
"""
|
58
|
+
s1 = string_in.lower()
|
59
|
+
s2 = string_ref.lower()
|
60
|
+
if strip_input_string:
|
61
|
+
s1 = s1.strip()
|
62
|
+
return (re.fullmatch(unidecode(s2), unidecode(s1)) is not None)
|
63
|
+
|
64
|
+
|
65
|
+
def _consistantColName(
|
66
|
+
sheet_name: str,
|
67
|
+
usr_col_name: str,
|
68
|
+
xl_names_converter: UserExcelConverter,
|
69
|
+
tags: list = []
|
70
|
+
):
|
71
|
+
'''
|
72
|
+
Test if the usr_col_name is consistent with the allowed col list.
|
73
|
+
|
74
|
+
Parameters
|
75
|
+
----------
|
76
|
+
:param sheet_name: Sheet name to check.
|
77
|
+
:type sheet_name: string
|
78
|
+
|
79
|
+
:param prop_cols: Column to find
|
80
|
+
:type prop_cols: string
|
81
|
+
|
82
|
+
:param tags: Tags list to check
|
83
|
+
:type tags: list
|
84
|
+
|
85
|
+
Returns
|
86
|
+
-------
|
87
|
+
:return:
|
88
|
+
If the column corresponds to an entry in the sheetname dictionnary, then result is the corresponding key.
|
89
|
+
If the column is a tag column / an additonnal column, the result is the standard format of the column name.
|
90
|
+
:rtype: string
|
91
|
+
'''
|
92
|
+
# Check if Sheet is about data
|
93
|
+
if _compareStrings(sheet_name, 'flux data', strip_input_string=True):
|
94
|
+
xl_names_converter.add_new_col(sheet_name, CONST.DATA_SHEET, usr_col_name)
|
95
|
+
return True, CONST.DATA_SHEET
|
96
|
+
sheet_name_lower = sheet_name.lower()
|
97
|
+
usr_col_name_lower = usr_col_name.lower()
|
98
|
+
if sheet_name_lower != '' and usr_col_name_lower != '':
|
99
|
+
# Is the proposed column a tag column ?
|
100
|
+
for tag in tags:
|
101
|
+
if _compareStrings(usr_col_name_lower, tag, strip_input_string=True):
|
102
|
+
return True, tag
|
103
|
+
# Is the proposed column in allowed columns ?
|
104
|
+
for std_col_name in CONST.DICT_OF_COLS_NAMES__RE[sheet_name_lower].keys():
|
105
|
+
for allowed_col_re in CONST.DICT_OF_COLS_NAMES__RE[sheet_name_lower][std_col_name]:
|
106
|
+
if _compareStrings(usr_col_name_lower, allowed_col_re, strip_input_string=True):
|
107
|
+
xl_names_converter.add_new_col(sheet_name_lower, std_col_name, usr_col_name)
|
108
|
+
return True, std_col_name
|
109
|
+
return False, usr_col_name
|
110
|
+
|
111
|
+
|
112
|
+
def _consistantSheetName(
|
113
|
+
usr_sheet_name: str,
|
114
|
+
xl_names_converter: UserExcelConverter,
|
115
|
+
):
|
116
|
+
'''
|
117
|
+
Test if the usr_sheet_name is consistent with the allowed sheet list.
|
118
|
+
|
119
|
+
Parameters
|
120
|
+
----------
|
121
|
+
:param usr_sheet_name: Sheet name to check.
|
122
|
+
:type usr_sheet_name: string
|
123
|
+
|
124
|
+
Returns
|
125
|
+
-------
|
126
|
+
:return:
|
127
|
+
- out1: True if tested sheet is consistant.
|
128
|
+
- out2: The dictionary key corresponding of the allowed list found, if tested sheet is consitant.
|
129
|
+
List of allowed sheet names if not.
|
130
|
+
:rtype: (bool, string)
|
131
|
+
|
132
|
+
Notes
|
133
|
+
-----
|
134
|
+
- If the usr_sheet_name input is empty ('') the result is a list of
|
135
|
+
allowed sheet name as a string.
|
136
|
+
- A particular case is taken into account for proxy input file which
|
137
|
+
usualy has 3 proxy sheets (and one of them with 'sector' keyword in its name)
|
138
|
+
'''
|
139
|
+
# Check if Sheet is about data
|
140
|
+
if _compareStrings(usr_sheet_name, 'flux data', strip_input_string=True):
|
141
|
+
xl_names_converter.add_new_sheet(CONST.DATA_SHEET, usr_sheet_name)
|
142
|
+
return True, CONST.DATA_SHEET
|
143
|
+
# If we have a sheet to check
|
144
|
+
if usr_sheet_name != '':
|
145
|
+
# Is sheet in list of possible names for sheets
|
146
|
+
for std_sheet_name in CONST.DICT_OF_SHEET_NAMES__RE.keys():
|
147
|
+
for allow_sheet_re in CONST.DICT_OF_SHEET_NAMES__RE[std_sheet_name]:
|
148
|
+
if _compareStrings(usr_sheet_name, allow_sheet_re, strip_input_string=True):
|
149
|
+
xl_names_converter.add_new_sheet(std_sheet_name, usr_sheet_name)
|
150
|
+
return True, std_sheet_name
|
151
|
+
# We didn't found the corresponding key
|
152
|
+
return False, _allowedSheetNames()
|
153
|
+
|
154
|
+
|
155
|
+
def _allowedSheetNames(sheets_to_show=[]):
|
156
|
+
'''
|
157
|
+
Return the table of allowed sheet names with respect to their type of informations.
|
158
|
+
|
159
|
+
Parameters
|
160
|
+
----------
|
161
|
+
:param sheets_to_show: list of sheet to print. If list empty, print all.
|
162
|
+
:type sheets_to_show: list, optional, default=[]
|
163
|
+
|
164
|
+
Returns
|
165
|
+
-------
|
166
|
+
:return:
|
167
|
+
Result is empty string if the tested col is not consistant.
|
168
|
+
Result is the dictionary key corresponding of the allowed list found.
|
169
|
+
:rtype: string
|
170
|
+
'''
|
171
|
+
wcol1 = 30
|
172
|
+
wcol2 = 70
|
173
|
+
# Create table header
|
174
|
+
list_allowed = '{0: <{w1}} | {1: <{w2}}\n'.format("Sheet type", "Possible sheet names", w1=wcol1, w2=wcol2)
|
175
|
+
list_allowed += '-'*(wcol1 + wcol2 + 3) + '\n'
|
176
|
+
# Keys to show = table first column
|
177
|
+
if len(sheets_to_show) > 0:
|
178
|
+
list_dict_keys = [_ for _ in sheets_to_show if _ in CONST.DICT_OF_SHEET_NAMES.keys()]
|
179
|
+
else:
|
180
|
+
list_dict_keys = CONST.DICT_OF_SHEET_NAMES.keys()
|
181
|
+
# Create table
|
182
|
+
for dict_key in list_dict_keys:
|
183
|
+
list_allowed += '{: <{w}} | '.format(dict_key, w=wcol1)
|
184
|
+
if len(CONST.DICT_OF_SHEET_NAMES[dict_key]) != 0:
|
185
|
+
list_allowed += ', '.join(set(CONST.DICT_OF_SHEET_NAMES[dict_key]))
|
186
|
+
list_allowed += '\n'
|
187
|
+
return list_allowed
|
188
|
+
|
189
|
+
|
190
|
+
def _checkNeededColumns(
|
191
|
+
columns: list,
|
192
|
+
columns_needed: list,
|
193
|
+
sheet_name: str,
|
194
|
+
columns_needed_onlyone: list = []
|
195
|
+
):
|
196
|
+
"""_summary_
|
197
|
+
|
198
|
+
Parameters
|
199
|
+
----------
|
200
|
+
:param columns: Current list of columns
|
201
|
+
:type columns: list
|
202
|
+
|
203
|
+
:param columns_needed: List of columns to have
|
204
|
+
:type columns_needed: list
|
205
|
+
|
206
|
+
:param sheet_name: Sheet name from which to check names
|
207
|
+
:type sheet_name: str
|
208
|
+
|
209
|
+
:param columns_needed_onlyone: List of columns in which at least only one is needed
|
210
|
+
:type columns_needed_onlyone: list
|
211
|
+
|
212
|
+
Returns
|
213
|
+
-------
|
214
|
+
:return: (Success?, Log message)
|
215
|
+
:rtype: (bool, str)
|
216
|
+
"""
|
217
|
+
# Check columns need
|
218
|
+
for column_needed in columns_needed:
|
219
|
+
if not (column_needed in columns):
|
220
|
+
err_msg = 'The \"{}\" column is missing '.format(column_needed)
|
221
|
+
err_msg += 'or does not have the correct name.\n'
|
222
|
+
err_msg += '\n'
|
223
|
+
err_msg += '{}\n'.format(CONST.DICT_OF_COMMENTS[sheet_name][column_needed][0])
|
224
|
+
err_msg += '\n'
|
225
|
+
std_column_names = set(CONST.DICT_OF_COLS_NAMES[sheet_name][column_needed])
|
226
|
+
err_msg += 'Acceptable names for this column : {}'.format(
|
227
|
+
', '.join(['\"{}\"'.format(_) for _ in std_column_names]))
|
228
|
+
return False, err_msg
|
229
|
+
# Check optionnal columns (need_only one)
|
230
|
+
if len(columns_needed_onlyone) > 0:
|
231
|
+
if (not any(np.in1d(columns_needed_onlyone, columns))):
|
232
|
+
err_msg = 'A mandatory column is missing or does not have the correct name.\n'
|
233
|
+
err_msg += 'A least one of these columns must be present : {}'.format(
|
234
|
+
', '.join(['\"{}\"'.format(_) for _ in columns_needed_onlyone]))
|
235
|
+
return False, err_msg
|
236
|
+
return True, ''
|
237
|
+
|
238
|
+
|
239
|
+
def _castColumnType(
|
240
|
+
sheet: pd.DataFrame,
|
241
|
+
columns_types,
|
242
|
+
empty_to_default_value=False
|
243
|
+
):
|
244
|
+
"""
|
245
|
+
Set specific columns values to str.
|
246
|
+
|
247
|
+
Parameters
|
248
|
+
----------
|
249
|
+
:param sheet: Sheet to modify.
|
250
|
+
:type sheet: pandas.DataFrame, modified
|
251
|
+
|
252
|
+
:param columns_types: Dict of column and their default types/values OR any default value.
|
253
|
+
:type columns_types: any
|
254
|
+
|
255
|
+
:param empty_to_default_value: If true, set empty cells with default value, if not, set as None.
|
256
|
+
:type: bool
|
257
|
+
|
258
|
+
Returns
|
259
|
+
-------
|
260
|
+
:return: (Success? ; Log message)
|
261
|
+
:rtype: (bool, str)
|
262
|
+
"""
|
263
|
+
# Filter column to convert / Columns that are in sheet
|
264
|
+
if type(columns_types) is dict:
|
265
|
+
cols_to_convert = \
|
266
|
+
[(_, columns_types[_]) for _ in columns_types.keys() if _ in sheet.columns]
|
267
|
+
else:
|
268
|
+
cols_to_convert = \
|
269
|
+
[(_, columns_types) for _ in sheet.columns]
|
270
|
+
# Convert
|
271
|
+
for (col, _) in cols_to_convert:
|
272
|
+
try:
|
273
|
+
# Special type
|
274
|
+
if type(_) is dict:
|
275
|
+
val = _['val']
|
276
|
+
else:
|
277
|
+
val = _
|
278
|
+
# Convert as string
|
279
|
+
if type(val) is str:
|
280
|
+
sheet[col] = sheet[col].replace({np.nan: 'None'})
|
281
|
+
sheet[col] = sheet[col].astype(str)
|
282
|
+
if empty_to_default_value:
|
283
|
+
sheet[col] = sheet[col].replace({'None': val})
|
284
|
+
else:
|
285
|
+
sheet[col] = sheet[col].replace({'None': None})
|
286
|
+
# Convert as float
|
287
|
+
elif type(val) is float:
|
288
|
+
sheet[col] = sheet[col].astype(float)
|
289
|
+
if empty_to_default_value:
|
290
|
+
sheet[col] = sheet[col].replace({np.nan: val})
|
291
|
+
else:
|
292
|
+
sheet[col] = sheet[col].replace({np.nan: None})
|
293
|
+
# Convert as int
|
294
|
+
elif type(val) is int:
|
295
|
+
sheet[col] = sheet[col].replace({np.nan: -702313053})
|
296
|
+
sheet[col] = sheet[col].astype(int)
|
297
|
+
if empty_to_default_value:
|
298
|
+
sheet[col] = sheet[col].replace({-702313053: val})
|
299
|
+
else:
|
300
|
+
sheet[col] = sheet[col].replace({-702313053: None})
|
301
|
+
# Convert to other types
|
302
|
+
else:
|
303
|
+
sheet[col] = sheet[col].astype(type(val))
|
304
|
+
except Exception:
|
305
|
+
err = 'Column \"{}\" contains values '.format(col)
|
306
|
+
err += 'that could not be read as {} values'.format(type(val))
|
307
|
+
return False, err
|
308
|
+
# Replace remaining empty data with None
|
309
|
+
sheet.replace({np.nan: None}, inplace=True)
|
310
|
+
return True, ''
|
311
|
+
|
312
|
+
|
313
|
+
def _pd_sorted_col(
|
314
|
+
dft: pd.DataFrame,
|
315
|
+
lico: list
|
316
|
+
):
|
317
|
+
"""
|
318
|
+
Sort columns order of a dataframe in function of a column list
|
319
|
+
|
320
|
+
Parameters
|
321
|
+
----------
|
322
|
+
:param dft: Input dataframe to sort.
|
323
|
+
:type dft: pandas.DataFrame
|
324
|
+
|
325
|
+
:param lico: Ordered list of columns to have.
|
326
|
+
:type lico: list
|
327
|
+
|
328
|
+
Returns
|
329
|
+
-------
|
330
|
+
:return: Sorted dataframe.
|
331
|
+
:rtype: (bool; string)
|
332
|
+
|
333
|
+
"""
|
334
|
+
li_df = list(dft)
|
335
|
+
if li_df != lico:
|
336
|
+
dftm = pd.DataFrame(columns=lico)
|
337
|
+
for col in lico:
|
338
|
+
dftm[col] = dft[col]
|
339
|
+
return dftm
|
340
|
+
|
341
|
+
|
342
|
+
def _extractTablesFromSheet(
|
343
|
+
sheet: pd.DataFrame,
|
344
|
+
new_sheets: list,
|
345
|
+
default_columns_names=None
|
346
|
+
):
|
347
|
+
"""
|
348
|
+
Extract all tables from an excel sheet.
|
349
|
+
|
350
|
+
Ex: Extract tables from a sheet like this
|
351
|
+
|
352
|
+
+----+----+----+----+----+
|
353
|
+
| - | - | - | - | - |
|
354
|
+
+----+----+----+----+----+
|
355
|
+
| - | - | C1 | C2 | C3 |
|
356
|
+
+----+----+----+----+----+
|
357
|
+
| - | R1 | x | - | x |
|
358
|
+
+----+----+----+----+----+
|
359
|
+
| - | R2 | x | x | - |
|
360
|
+
+----+----+----+----+----+
|
361
|
+
| - | - | - | - | - |
|
362
|
+
+----+----+----+----+----+
|
363
|
+
| - | - | C4 | C5 | C6 |
|
364
|
+
+----+----+----+----+----+
|
365
|
+
| - | R3 | - | x | x |
|
366
|
+
+----+----+----+----+----+
|
367
|
+
| - | R4 | x | - | - |
|
368
|
+
+----+----+----+----+----+
|
369
|
+
|
370
|
+
Or like this
|
371
|
+
|
372
|
+
+----+----+----+----+----+
|
373
|
+
| - | - | - | - | - |
|
374
|
+
+----+----+----+----+----+
|
375
|
+
| - | - | C1 | C2 | C3 |
|
376
|
+
+----+----+----+----+----+
|
377
|
+
| - | R1 | x | - | x |
|
378
|
+
+----+----+----+----+----+
|
379
|
+
| - | R2 | x | x | - |
|
380
|
+
+----+----+----+----+----+
|
381
|
+
| - | - | - | - | - |
|
382
|
+
+----+----+----+----+----+
|
383
|
+
| - | R3 | - | x | x |
|
384
|
+
+----+----+----+----+----+
|
385
|
+
| - | R4 | x | - | - |
|
386
|
+
+----+----+----+----+----+
|
387
|
+
|
388
|
+
Parameters
|
389
|
+
----------
|
390
|
+
:param sheet: Sheet to parse
|
391
|
+
:type sheet: pd.DataFrame
|
392
|
+
|
393
|
+
:param new_sheets: List of sheets extracted from sheet
|
394
|
+
:type new_sheets: list(pd.DataFrame), modified
|
395
|
+
|
396
|
+
Returns
|
397
|
+
-------
|
398
|
+
:return: _description_
|
399
|
+
:rtype: _type_
|
400
|
+
"""
|
401
|
+
# Nothing to do
|
402
|
+
if sheet.empty:
|
403
|
+
return True
|
404
|
+
# If we dont have any default column name -> read column index
|
405
|
+
# -> Useful if first row is composed of name of node
|
406
|
+
# -> Need to get rid of Unamed cols
|
407
|
+
# -> Then if nodes are mentionned in more than one column, panda add a '.x' (x a number)
|
408
|
+
# at the end of the node name, so we need to get rid of that too...
|
409
|
+
if default_columns_names is None:
|
410
|
+
default_columns_names = []
|
411
|
+
for _ in sheet.columns:
|
412
|
+
if isinstance(_, str):
|
413
|
+
if (re.fullmatch('Unnamed:.*', _) is None):
|
414
|
+
end_ = re.search('([.][0-9]+)\Z', _) # noqa: W605
|
415
|
+
if end_ is not None:
|
416
|
+
default_columns_names.append(_[:-len(end_[0])])
|
417
|
+
else:
|
418
|
+
default_columns_names.append(_)
|
419
|
+
# Need to reindex sheet to use enumerated correctly index and columns
|
420
|
+
sheet = sheet.reset_index(drop=True)
|
421
|
+
sheet = sheet.T.reset_index(drop=True).T
|
422
|
+
# ----------------- Initialize starting and ending points
|
423
|
+
start_row = 0
|
424
|
+
start_col = 0
|
425
|
+
index_col = 0 # Column number for index names
|
426
|
+
end_row = sheet.shape[0]
|
427
|
+
end_col = sheet.shape[1]
|
428
|
+
# ---------------- Find starting point
|
429
|
+
found_starting_point = False
|
430
|
+
for row in range(sheet.shape[0]):
|
431
|
+
for col in range(sheet.shape[1]):
|
432
|
+
# Check if current val is NaN (empty cell)
|
433
|
+
val = sheet.iat[row, col]
|
434
|
+
is_nan = (val != val)
|
435
|
+
# If not -> Bingo
|
436
|
+
found_starting_point = (not is_nan)
|
437
|
+
if found_starting_point:
|
438
|
+
start_row = row
|
439
|
+
start_col = col
|
440
|
+
index_col = col
|
441
|
+
break
|
442
|
+
if found_starting_point:
|
443
|
+
break
|
444
|
+
# ------------ Check table format with upper left corner
|
445
|
+
upper_left_corner = sheet.iloc[start_row:min(start_row+2, end_row), start_col:min(start_col+2, end_col)]
|
446
|
+
# Not enought data in given sheet -> stop ?
|
447
|
+
if (upper_left_corner.shape[0] < 2):
|
448
|
+
# Modify starting row to avoid missing table with only one line
|
449
|
+
start_row = max(0, start_row-1)
|
450
|
+
upper_left_corner = sheet.iloc[start_row:min(start_row+2, end_row), start_col:min(start_col+2, end_col)]
|
451
|
+
if (upper_left_corner.shape[1] < 2):
|
452
|
+
# Modify starting col to avoid missing table with only one col
|
453
|
+
start_col = max(0, start_col-1)
|
454
|
+
index_col = start_col
|
455
|
+
upper_left_corner = sheet.iloc[start_row:min(start_row+2, end_row), start_col:min(start_col+2, end_col)]
|
456
|
+
if (upper_left_corner.shape[0] < 2) or (upper_left_corner.shape[1] < 2):
|
457
|
+
# Ok table does not contain any data
|
458
|
+
return True
|
459
|
+
# Upper left corner is an isolated value ?
|
460
|
+
v1 = upper_left_corner.iloc[0, 1]
|
461
|
+
v2 = upper_left_corner.iloc[1, 0]
|
462
|
+
if (v1 != v1) and (v2 != v2):
|
463
|
+
# Retry but without the isolated value
|
464
|
+
sheet_copy = sheet.copy() # copy to be sure that we dont modify original sheet
|
465
|
+
sheet_copy.iloc[start_row, start_col] = np.nan
|
466
|
+
return _extractTablesFromSheet(sheet_copy, new_sheets, default_columns_names=default_columns_names)
|
467
|
+
# First column is an overhead ?
|
468
|
+
if (not _isValueAcceptedInMatrixTable(upper_left_corner.iloc[1, 1])):
|
469
|
+
# Retry but without the isolated value
|
470
|
+
sheet_copy = sheet.copy() # copy to be sure that we dont modify original sheet
|
471
|
+
sheet_copy.iloc[start_row, start_col:end_col] = np.nan
|
472
|
+
return _extractTablesFromSheet(sheet_copy, new_sheets, default_columns_names=default_columns_names)
|
473
|
+
# Check if the content of first row = column names
|
474
|
+
columns_names = None
|
475
|
+
# Check what upper left corner of table contains
|
476
|
+
# In all case : 'val' can be 'x', 'X' or some stringified float value.
|
477
|
+
# Case 1 : upper left corner = ['R1', 'val' / NaN]
|
478
|
+
# ... ['R2', 'val' / NaN]
|
479
|
+
# ... -> 'val' and NaN can be turned as float.
|
480
|
+
# Case 2 : upper left corner = ['C1', 'C2']
|
481
|
+
# ... ['val' / Nan, 'val' / NaN]
|
482
|
+
# ... -> On first row, can not turn columns names as float
|
483
|
+
# ... -> On first col, 'val' and NaN can be turned as float
|
484
|
+
# Case 3 : upper left corner = ['table name', 'C1' ]
|
485
|
+
# ... ['R1' , 'val' / NaN]
|
486
|
+
# ... -> On first row, can not turn table name or columns names as float
|
487
|
+
# ... -> On first col, No row name can be turned as float
|
488
|
+
if _isValueAcceptedInMatrixTable(upper_left_corner.iloc[0, 1]):
|
489
|
+
case = 1
|
490
|
+
else:
|
491
|
+
if _isValueAcceptedInMatrixTable(upper_left_corner.iloc[1, 0]):
|
492
|
+
case = 2
|
493
|
+
else:
|
494
|
+
case = 3
|
495
|
+
# Check in which case we are
|
496
|
+
if (case == 1):
|
497
|
+
# Case 1 -> need to use defaut columns names
|
498
|
+
columns_names = default_columns_names
|
499
|
+
# Start col is one col on the right, because first col is index names
|
500
|
+
start_col = min(start_col+1, end_col)
|
501
|
+
# Ending col is easy to find
|
502
|
+
end_col = min(start_col + len(columns_names), end_col)
|
503
|
+
if (case == 2):
|
504
|
+
# Case 2 -> There are columns name on the first row
|
505
|
+
columns_names = sheet.astype('str').iloc[start_row, start_col:].to_list()
|
506
|
+
# start row is one row below & index col is one col before
|
507
|
+
start_row = min(start_row+1, end_row)
|
508
|
+
index_col = max(0, index_col-1)
|
509
|
+
if (case == 3):
|
510
|
+
# Case 3 -> There are columns name on the first row, but starting one col on the right
|
511
|
+
columns_names = sheet.astype('str').iloc[start_row, (start_col+1):].to_list()
|
512
|
+
# start row is one row below & index col does not change, and start col is one col on the right
|
513
|
+
start_row = min(start_row+1, end_row)
|
514
|
+
start_col = min(start_col+1, end_col)
|
515
|
+
if (case == 2) or (case == 3):
|
516
|
+
# Case 2 & 3 : Find ending col
|
517
|
+
for [i, col_name] in enumerate(columns_names):
|
518
|
+
# Check if current col name is NaN (empty cell)
|
519
|
+
is_nan = (col_name != col_name)
|
520
|
+
# If nan -> Bingo
|
521
|
+
if is_nan:
|
522
|
+
end_col = min(start_col + i, end_col)
|
523
|
+
columns_names = columns_names[:i]
|
524
|
+
break
|
525
|
+
# No default column name was provided -> Error
|
526
|
+
if columns_names is None:
|
527
|
+
return False
|
528
|
+
# ------------ Check what first col contains
|
529
|
+
index_names = sheet.iloc[start_row:end_row, index_col].to_list()
|
530
|
+
# ------------- Find ending row
|
531
|
+
for (i, index_name) in enumerate(index_names):
|
532
|
+
# Check if current val is NaN (empty cell)
|
533
|
+
is_nan = (index_name != index_name)
|
534
|
+
# If nan -> Bingo
|
535
|
+
if is_nan:
|
536
|
+
end_row = min(i + start_row, end_row)
|
537
|
+
index_names = index_names[:i]
|
538
|
+
break
|
539
|
+
# New table
|
540
|
+
new_table = sheet.iloc[start_row:end_row, start_col:end_col]
|
541
|
+
if len(new_table.columns) != len(columns_names):
|
542
|
+
su_trace.logger.error('Could not read ter table')
|
543
|
+
return False
|
544
|
+
new_table.columns = [_.strip() if (type(_) is str) else _ for _ in columns_names]
|
545
|
+
new_table.index = [_.strip() if (type(_) is str) else _ for _ in index_names]
|
546
|
+
new_sheets.append(new_table)
|
547
|
+
# Find other table if needed
|
548
|
+
ok = True
|
549
|
+
ok &= _extractTablesFromSheet(
|
550
|
+
sheet.iloc[:, end_col:], new_sheets,
|
551
|
+
default_columns_names=columns_names) # Upper right missing part of sheet
|
552
|
+
ok &= _extractTablesFromSheet(
|
553
|
+
sheet.iloc[end_row:, :], new_sheets,
|
554
|
+
default_columns_names=columns_names) # Down missing part of sheet
|
555
|
+
# TODO revoir découpage des restes de table en recurrence
|
556
|
+
return ok
|
557
|
+
|
558
|
+
|
559
|
+
def _isValueAcceptedInMatrixTable(value):
|
560
|
+
"""
|
561
|
+
In Matrix table, accepted values are NaN, Numbers and 'x' or 'X'
|
562
|
+
|
563
|
+
Parameters
|
564
|
+
----------
|
565
|
+
:param value: Value to test
|
566
|
+
:type value: Any
|
567
|
+
|
568
|
+
Returns
|
569
|
+
-------
|
570
|
+
:return: True if value is Ok, else false
|
571
|
+
:rtype: boolean
|
572
|
+
"""
|
573
|
+
# First check if value is a number or NaN
|
574
|
+
# by try to convert it to float
|
575
|
+
try:
|
576
|
+
float(value)
|
577
|
+
return True
|
578
|
+
except ValueError:
|
579
|
+
# If it fails, then it's not NaN or a number
|
580
|
+
# but it can be either 'x' or 'X'
|
581
|
+
OK_but_not_a_number = '[xX]'
|
582
|
+
try:
|
583
|
+
if (re.fullmatch(OK_but_not_a_number, str(value)) is not None):
|
584
|
+
return True
|
585
|
+
except ValueError:
|
586
|
+
pass
|
587
|
+
return False
|
588
|
+
|
589
|
+
|
590
|
+
def _hasDuplicatedEntry(entries: list):
|
591
|
+
"""
|
592
|
+
"""
|
593
|
+
duplicates = {}
|
594
|
+
for (i, entry) in enumerate(entries):
|
595
|
+
if entries.count(entry) > 1:
|
596
|
+
if entry not in duplicates.keys():
|
597
|
+
duplicates[entry] = []
|
598
|
+
duplicates[entry].append(i)
|
599
|
+
# duplicates = [entry for entry in entries if entries.count(entry) > 1]
|
600
|
+
return (len(duplicates) > 0), duplicates
|
601
|
+
|
602
|
+
|
603
|
+
def _fuseDuplicatedColumns(table: pd.DataFrame, dup_cols: dict):
|
604
|
+
# Get current columns names
|
605
|
+
new_columns_names = table.columns.to_list()
|
606
|
+
new_tables = {}
|
607
|
+
# For each duplicated column, get the column name and positions of duplicat
|
608
|
+
for (col_name, cols_index) in dup_cols.items():
|
609
|
+
# Fuse columns
|
610
|
+
new_tables[col_name] = table.loc[:, col_name].apply(lambda row: row.values[0], axis=1)
|
611
|
+
# Rename duplicated columns, except the first one
|
612
|
+
for (i, col_index) in enumerate(cols_index):
|
613
|
+
if i == 0:
|
614
|
+
continue
|
615
|
+
new_columns_names[col_index] = col_name+'_dup'
|
616
|
+
# Set new columns names
|
617
|
+
table.columns = new_columns_names
|
618
|
+
# Drop and replace
|
619
|
+
for (col_name, sub_table) in new_tables.items():
|
620
|
+
# Drop the renamed columns (except the first one)
|
621
|
+
table.drop(columns=(col_name+'_dup'), inplace=True)
|
622
|
+
# Apply the fused data on the remaining column
|
623
|
+
table[col_name] = sub_table
|
624
|
+
|
625
|
+
|
626
|
+
def _fuseDuplicatedRows(table: pd.DataFrame, dup_rows: dict):
|
627
|
+
# Get current columns names
|
628
|
+
new_index_names = table.index.to_list()
|
629
|
+
new_tables = {}
|
630
|
+
# For each duplicated column, get the column name and positions of duplicat
|
631
|
+
for (row_name, rows_index) in dup_rows.items():
|
632
|
+
# Fuse columns
|
633
|
+
new_tables[row_name] = table.loc[row_name, :].apply(lambda col: col.values[0], axis=0)
|
634
|
+
# Rename duplicated columns, except the first one
|
635
|
+
for (i, row_index) in enumerate(rows_index):
|
636
|
+
if i == 0:
|
637
|
+
continue
|
638
|
+
new_index_names[row_index] = row_name+'_dup'
|
639
|
+
# Set new index names
|
640
|
+
table.index = new_index_names
|
641
|
+
# Drop and replace
|
642
|
+
for (row_name, sub_table) in new_tables.items():
|
643
|
+
# Drop the renamed columns (except the first one)
|
644
|
+
table.drop(index=(row_name+'_dup'), inplace=True)
|
645
|
+
# Apply the fused data on the remaining column
|
646
|
+
table.loc[row_name, :] = sub_table
|
647
|
+
|
648
|
+
|
649
|
+
# PUBLIC FUNCTIONS ----------------------------------------------------------------
|
650
|
+
def consistantSheetName(
|
651
|
+
usr_sheet_name: str,
|
652
|
+
sankey: Sankey
|
653
|
+
):
|
654
|
+
'''
|
655
|
+
Test if the usr_sheet_name is consistent with the allowed sheet list.
|
656
|
+
|
657
|
+
Parameters
|
658
|
+
----------
|
659
|
+
usr_sheet_name : string
|
660
|
+
Sheet name to check.
|
661
|
+
|
662
|
+
Returns
|
663
|
+
-------
|
664
|
+
string
|
665
|
+
Result is empty string if the tested sheet is not consistant.
|
666
|
+
Result is the dictionary key corresponding of the allowed list found.
|
667
|
+
|
668
|
+
Notes
|
669
|
+
-----
|
670
|
+
- If the usr_sheet_name input is empty ('') the result is a list of
|
671
|
+
allowed sheet name as a string.
|
672
|
+
- A particular case is taken into account for proxy input file which
|
673
|
+
usualy has 3 proxy sheets (and one of them with 'sector' keyword in its name)
|
674
|
+
'''
|
675
|
+
_, res = _consistantSheetName(usr_sheet_name, sankey.xl_user_converter)
|
676
|
+
return res
|
677
|
+
|
678
|
+
|
679
|
+
def consistantColName(
|
680
|
+
sheet_name: str,
|
681
|
+
prop_col: str,
|
682
|
+
sankey: Sankey,
|
683
|
+
tags: list = []
|
684
|
+
):
|
685
|
+
'''
|
686
|
+
Test if the prop_col is consistent with the allowed col list.
|
687
|
+
|
688
|
+
Parameters
|
689
|
+
----------
|
690
|
+
:param sheet_name: Sheet name to check.
|
691
|
+
:type sheet_name: string
|
692
|
+
|
693
|
+
:param prop_cols: Column to find
|
694
|
+
:type prop_cols: string
|
695
|
+
|
696
|
+
:param tags: Tags list to check
|
697
|
+
:type tags: list
|
698
|
+
|
699
|
+
Returns
|
700
|
+
-------
|
701
|
+
:return:
|
702
|
+
If the column corresponds to an entry in the sheetname dictionnary, then result is the corresponding key.
|
703
|
+
If the column is a tag column / an additonnal column, the result is the standard format of the column name.
|
704
|
+
:rtype: string
|
705
|
+
'''
|
706
|
+
_, res = _consistantColName(
|
707
|
+
sheet_name,
|
708
|
+
prop_col,
|
709
|
+
sankey.xl_user_converter,
|
710
|
+
tags)
|
711
|
+
return res
|
712
|
+
|
713
|
+
|
714
|
+
def load_sankey_from_excel_file(
|
715
|
+
input_file: str,
|
716
|
+
sankey: Sankey,
|
717
|
+
do_coherence_checks: bool = False,
|
718
|
+
sheet_to_remove_names: list = None,
|
719
|
+
):
|
720
|
+
'''
|
721
|
+
Main convertor routine. Call dedicated routine depending on input type
|
722
|
+
Use global variable 'su_trace' to trace the file processing
|
723
|
+
|
724
|
+
Parameters
|
725
|
+
----------
|
726
|
+
:param input_file: input file name to load (with extension and path)
|
727
|
+
:type input_file: string
|
728
|
+
|
729
|
+
:param sankey: data struct as a Sankey object
|
730
|
+
:type sankey: Sankey, modified
|
731
|
+
|
732
|
+
:param do_coherence_checks: Do we trigger coherence checks on sankey structure ?
|
733
|
+
:type do_coherence_checks: bool
|
734
|
+
|
735
|
+
:param sheet_to_remove_names: List of sheet that will be rewrite or removed when re-export as excel
|
736
|
+
:type sheet_to_remove_names: list, modified, optionnal (default=None)
|
737
|
+
|
738
|
+
Returns
|
739
|
+
-------
|
740
|
+
:return: (Success ; Error message )
|
741
|
+
:rtype: (bool; string)
|
742
|
+
'''
|
743
|
+
# Read excel input
|
744
|
+
excel_file = pd.ExcelFile(input_file)
|
745
|
+
# If every went fine, get sheet name
|
746
|
+
excel_sheet_names = excel_file.sheet_names
|
747
|
+
# keeping sheets_to_show consistent sheets
|
748
|
+
necessary_sheet_names = {}
|
749
|
+
unconsistant_sheet_names = []
|
750
|
+
use_sheet_to_remove_names = True
|
751
|
+
if type(sheet_to_remove_names) is not list:
|
752
|
+
use_sheet_to_remove_names = False
|
753
|
+
for sheet_name in excel_sheet_names:
|
754
|
+
# Get sheet reference name for given sheet name
|
755
|
+
is_sheet_consistant, sheet_refkey = _consistantSheetName(sheet_name, sankey.xl_user_converter)
|
756
|
+
if is_sheet_consistant: # Got the reference name
|
757
|
+
if sheet_refkey not in necessary_sheet_names:
|
758
|
+
necessary_sheet_names[sheet_refkey] = [sheet_name]
|
759
|
+
else:
|
760
|
+
necessary_sheet_names[sheet_refkey].append(sheet_name)
|
761
|
+
else: # No reference name Found
|
762
|
+
unconsistant_sheet_names.append(sheet_name)
|
763
|
+
# Check if we got some sheets to process
|
764
|
+
if len(necessary_sheet_names.keys()) == 0:
|
765
|
+
err_msg = "We didn't find any sheet name as specified in the following table : \n"
|
766
|
+
err_msg += _allowedSheetNames()
|
767
|
+
return False, err_msg
|
768
|
+
# Debug log
|
769
|
+
su_trace.logger.debug('Names of excel sheets that will be processed : ')
|
770
|
+
[su_trace.logger.debug('- {}'.format(_)) for _ in necessary_sheet_names.values()]
|
771
|
+
if len(unconsistant_sheet_names) > 0:
|
772
|
+
su_trace.logger.debug('Names of excel sheets that will be ignored : ')
|
773
|
+
[su_trace.logger.debug('- {}'.format(_)) for _ in unconsistant_sheet_names]
|
774
|
+
if use_sheet_to_remove_names:
|
775
|
+
if len(sheet_to_remove_names) > 0:
|
776
|
+
su_trace.logger.debug('Names of excel sheets that will be removed : ')
|
777
|
+
[su_trace.logger.debug('- {}'.format(_)) for _ in sheet_to_remove_names]
|
778
|
+
# Update struct
|
779
|
+
return _read_sankey_from_excel_book(
|
780
|
+
input_file,
|
781
|
+
necessary_sheet_names,
|
782
|
+
sankey,
|
783
|
+
do_coherence_checks=do_coherence_checks)
|
784
|
+
|
785
|
+
|
786
|
+
def _read_sankey_from_excel_book(
|
787
|
+
excel_file_name: str,
|
788
|
+
sheet_names: dict,
|
789
|
+
sankey: Sankey,
|
790
|
+
do_coherence_checks: bool = False
|
791
|
+
):
|
792
|
+
"""
|
793
|
+
Parse all sheets from excel book to create a sankey struct.
|
794
|
+
|
795
|
+
Parameters
|
796
|
+
----------
|
797
|
+
:param excel_book: Dataframe (eqv dict) corresponding to the sheets of the input excel file
|
798
|
+
:type excel_book: pd.DataFrame
|
799
|
+
|
800
|
+
:param sheet_names: input file worksheet dict as [reference sheet name: user sheet name]
|
801
|
+
:type sheet_names: dict
|
802
|
+
|
803
|
+
:param sankey: Sankey struct constructed from input
|
804
|
+
:type sankey: Sankey, modified
|
805
|
+
|
806
|
+
:param do_coherence_checks: Do we trigger coherence checks on sankey structure ?
|
807
|
+
:type do_coherence_checks: bool
|
808
|
+
|
809
|
+
Returns
|
810
|
+
-------
|
811
|
+
:return: (Success ; Error message )
|
812
|
+
:rtype: (bool; string)
|
813
|
+
"""
|
814
|
+
# TODO : useless but I keep it for now
|
815
|
+
mfa_dict = {}
|
816
|
+
# Verify that we have the minimum number of sheets
|
817
|
+
ok, msg = check_sheets_before_reading(sheet_names)
|
818
|
+
if not ok:
|
819
|
+
return ok, msg
|
820
|
+
# First create standardized node type tags if needed
|
821
|
+
for _ in (CONST.PRODUCTS_SHEET, CONST.SECTORS_SHEET, CONST.EXCHANGES_SHEET):
|
822
|
+
if _ in sheet_names.keys():
|
823
|
+
sankey.get_or_create_tagg(
|
824
|
+
CONST.NODE_TYPE,
|
825
|
+
CONST.TAG_TYPE_NODE,
|
826
|
+
':'.join([
|
827
|
+
CONST.NODE_TYPE_PRODUCT,
|
828
|
+
CONST.NODE_TYPE_SECTOR,
|
829
|
+
CONST.NODE_TYPE_EXCHANGE]))
|
830
|
+
break
|
831
|
+
# Then check all other TAGS
|
832
|
+
if CONST.TAG_SHEET in sheet_names.keys():
|
833
|
+
# Read tags
|
834
|
+
for tag_sheet_name in sheet_names[CONST.TAG_SHEET]:
|
835
|
+
su_trace.logger.info('Reading sheet {}'.format(tag_sheet_name))
|
836
|
+
ok, msg = xl_read_tags_sheet(pd.read_excel(excel_file_name, tag_sheet_name), sankey)
|
837
|
+
if not ok:
|
838
|
+
return ok, "Error on sheet {0} ({1}) : {2}".format(tag_sheet_name, CONST.TAG_SHEET, msg)
|
839
|
+
# Log warning messages
|
840
|
+
if len(msg) > 0:
|
841
|
+
su_trace.logger.error('Warning(s) on sheet {0} ({1}) :'.format(tag_sheet_name, CONST.TAG_SHEET))
|
842
|
+
for _ in msg.split('\n'):
|
843
|
+
if len(_) > 0:
|
844
|
+
su_trace.logger.error(' - {}'.format(_))
|
845
|
+
# Then check nodes, but in this order
|
846
|
+
options = {}
|
847
|
+
options['warn_on_new_nodes'] = False
|
848
|
+
options['warn_on_new_flux'] = False
|
849
|
+
prev_mfa_entry_name = []
|
850
|
+
sheets_processing_order = [
|
851
|
+
(CONST.NODES_SHEET, xl_read_nodes_sheet, [CONST.NODES_SHEET, options, sankey]),
|
852
|
+
(CONST.PRODUCTS_SHEET, xl_read_products_sectors_sheet, [CONST.PRODUCTS_SHEET, options, sankey]),
|
853
|
+
(CONST.SECTORS_SHEET, xl_read_products_sectors_sheet, [CONST.SECTORS_SHEET, options, sankey]),
|
854
|
+
(CONST.EXCHANGES_SHEET, xl_read_products_sectors_sheet, [CONST.EXCHANGES_SHEET, options, sankey]),
|
855
|
+
(CONST.IO_SHEET, xl_read_input_output_sheet, [options, mfa_dict, sankey]),
|
856
|
+
(CONST.TER_SHEET, xl_read_terbase_sheet, [options, mfa_dict, sankey]),
|
857
|
+
(CONST.DATA_SHEET, xl_read_data_sheet, [options, sankey]),
|
858
|
+
(CONST.IO_DATA_SHEET, xl_read_input_output_data_sheet, [options, mfa_dict, sankey]),
|
859
|
+
(CONST.MIN_MAX_SHEET, xl_read_min_max_sheet, [options, sankey]),
|
860
|
+
(CONST.CONSTRAINTS_SHEET, xl_read_constraints_sheet, [options, sankey]),
|
861
|
+
(CONST.RESULTS_SHEET, xl_read_result_sheet, [sankey]),
|
862
|
+
# (CONST.ANALYSIS_SHEET, xl_read_analysis_sheet, [mfa_dict, sankey]),
|
863
|
+
(CONST.UNCERTAINTY_SHEET, xl_read_uncertainty_sheet, [mfa_dict, sankey]),
|
864
|
+
(CONST.CONVERSIONS_SHEET, xl_read_conversions_sheet, [mfa_dict, sankey])
|
865
|
+
]
|
866
|
+
# Process all sheets in correct order if they exist
|
867
|
+
for (std_sheet_name, extract_function, args) in sheets_processing_order:
|
868
|
+
if std_sheet_name in sheet_names.keys():
|
869
|
+
# Warn on new node creation
|
870
|
+
if (not options['warn_on_new_nodes']) and (len(prev_mfa_entry_name) > 0):
|
871
|
+
options['warn_on_new_nodes'] = \
|
872
|
+
(CONST.NODES_SHEET in prev_mfa_entry_name) or \
|
873
|
+
(CONST.IO_SHEET in prev_mfa_entry_name) or \
|
874
|
+
(CONST.TER_SHEET in prev_mfa_entry_name)
|
875
|
+
options['warn_on_new_nodes'] |= \
|
876
|
+
(CONST.PRODUCTS_SHEET in prev_mfa_entry_name) and \
|
877
|
+
(CONST.SECTORS_SHEET in prev_mfa_entry_name) and \
|
878
|
+
(std_sheet_name != CONST.EXCHANGES_SHEET)
|
879
|
+
# Warn on new flux creation
|
880
|
+
if (not options['warn_on_new_flux']) and (len(prev_mfa_entry_name) > 0):
|
881
|
+
options['warn_on_new_flux'] = \
|
882
|
+
(CONST.IO_SHEET in prev_mfa_entry_name) or \
|
883
|
+
(CONST.TER_SHEET in prev_mfa_entry_name) or \
|
884
|
+
(CONST.DATA_SHEET in prev_mfa_entry_name)
|
885
|
+
# User sheet name
|
886
|
+
for sheet_name in sheet_names[std_sheet_name]:
|
887
|
+
# Extract sheet
|
888
|
+
excel_sheet = pd.read_excel(excel_file_name, sheet_name)
|
889
|
+
# If nothing inside -> continue
|
890
|
+
nb_rows = excel_sheet.shape[0]
|
891
|
+
if nb_rows < 1:
|
892
|
+
continue
|
893
|
+
# Parse
|
894
|
+
su_trace.logger.info('Reading sheet {}'.format(sheet_name))
|
895
|
+
ok, msg = extract_function(excel_sheet, *args)
|
896
|
+
if not ok:
|
897
|
+
return ok, "Error on sheet {0} ({1}) : {2}".format(sheet_name, std_sheet_name, msg)
|
898
|
+
# Log warning messages
|
899
|
+
if len(msg) > 0:
|
900
|
+
su_trace.logger.error('Warning(s) on sheet {0} ({1}) :'.format(sheet_name, std_sheet_name))
|
901
|
+
for _ in msg.split('\n'):
|
902
|
+
if len(_) > 0:
|
903
|
+
su_trace.logger.error(' - {}'.format(_))
|
904
|
+
# Auto-compute missing flux
|
905
|
+
if std_sheet_name in [CONST.IO_SHEET, CONST.TER_SHEET, CONST.DATA_SHEET, CONST.RESULTS_SHEET]:
|
906
|
+
ok = sankey.autocompute_missing_flux()
|
907
|
+
if not ok:
|
908
|
+
return False, ''
|
909
|
+
# Ok node parsing
|
910
|
+
prev_mfa_entry_name.append(std_sheet_name)
|
911
|
+
# Synchronize all nodes levels
|
912
|
+
sankey.autocompute_nodes_levels()
|
913
|
+
# if sankey.has_at_least_one_mat_balance():
|
914
|
+
# Compute mat balance
|
915
|
+
sankey.autocompute_mat_balance()
|
916
|
+
# else:
|
917
|
+
# # Recompute mat_balance only if it was specified for at least a node
|
918
|
+
# su_trace.logger.info('Matter balance was not specified in entry file, no computing.')
|
919
|
+
|
920
|
+
# Overall coherence checks
|
921
|
+
if do_coherence_checks:
|
922
|
+
su_trace.logger.info('Overall coherence checks on Sankey structure')
|
923
|
+
ok = sankey.check_overall_sankey_coherence()
|
924
|
+
if not ok:
|
925
|
+
return False, 'Sankey structure is not coherent. Abort.'
|
926
|
+
# End
|
927
|
+
return True, ''
|
928
|
+
|
929
|
+
|
930
|
+
def check_sheets_before_reading(sheet_names):
|
931
|
+
"""
|
932
|
+
Verify if there are enough sheets for parsing
|
933
|
+
|
934
|
+
Parameters
|
935
|
+
----------
|
936
|
+
:param sheet_names: input file worksheet dict as [reference sheet name: user sheet name]
|
937
|
+
:type sheet_names: dict
|
938
|
+
|
939
|
+
Returns
|
940
|
+
-------
|
941
|
+
:return: (Success ; Error message )
|
942
|
+
:rtype: (bool; string)
|
943
|
+
|
944
|
+
"""
|
945
|
+
# With data sheet, enought data to structure the Sankey
|
946
|
+
if CONST.DATA_SHEET in sheet_names.keys():
|
947
|
+
return True, 'OK - Data sheet'
|
948
|
+
# No data sheet -> Do we have Node sheet ?
|
949
|
+
if CONST.NODES_SHEET in sheet_names.keys():
|
950
|
+
return True, 'OK - Node sheet'
|
951
|
+
# No Node sheet -> Do we have Product & Sector ?
|
952
|
+
if (CONST.PRODUCTS_SHEET in sheet_names.keys()) and \
|
953
|
+
(CONST.SECTORS_SHEET in sheet_names.keys()):
|
954
|
+
return True, 'OK - Products & Sectors sheets'
|
955
|
+
# No product & sector sheets -> Do we have IO sheet ?
|
956
|
+
if (CONST.IO_SHEET in sheet_names.keys()):
|
957
|
+
return True, 'OK - IO sheets'
|
958
|
+
# No IO sheet -> Do we have TER sheet
|
959
|
+
if CONST.TER_SHEET in sheet_names.keys():
|
960
|
+
return True, 'OK - TER sheet'
|
961
|
+
# not enough sheets
|
962
|
+
err_msg = "Not enough sheets. To create the Sankey, we need at least one of theses sheets: \n"
|
963
|
+
err_msg += _allowedSheetNames([CONST.DATA_SHEET, CONST.NODES_SHEET, CONST.IO_SHEET, CONST.TER_SHEET])
|
964
|
+
err_msg += "Or all theses sheets instead : \n"
|
965
|
+
err_msg += _allowedSheetNames([CONST.PRODUCTS_SHEET, CONST.SECTORS_SHEET])
|
966
|
+
return False, err_msg
|
967
|
+
|
968
|
+
|
969
|
+
def xl_read_tags_sheet(
|
970
|
+
tags_sheet: dict,
|
971
|
+
sankey: Sankey
|
972
|
+
):
|
973
|
+
'''
|
974
|
+
Read tags sheet.
|
975
|
+
|
976
|
+
Parameters
|
977
|
+
----------
|
978
|
+
:param tags_sheet: Feuille excel à lire
|
979
|
+
:type tags_sheet: dict
|
980
|
+
|
981
|
+
:param sankey: Sankey struct constructed from input
|
982
|
+
:type sankey: Sankey, modified
|
983
|
+
|
984
|
+
Returns
|
985
|
+
-------
|
986
|
+
:return: (Success ; Error message )
|
987
|
+
:rtype: (bool; string)
|
988
|
+
'''
|
989
|
+
# Keep only the first columns. Clean the remaining empty right columns.
|
990
|
+
for i, col in enumerate(tags_sheet.columns): # iterable on columns names
|
991
|
+
if 'Unnamed' in col:
|
992
|
+
tags_sheet.drop(tags_sheet.columns[i:], inplace=True, axis=1)
|
993
|
+
break
|
994
|
+
# Standardise les noms de colonne celon le dictionnaire si il fait partie
|
995
|
+
# du dictionnaire sinon le recherche aussi dans les nodeTags
|
996
|
+
tags_sheet.columns = list(map(lambda x: consistantColName(CONST.TAG_SHEET, x, sankey), tags_sheet.columns))
|
997
|
+
# Waiting for these columns
|
998
|
+
# Obligatory columns to have in tags sheet, with their default type
|
999
|
+
oblig_columns = {CONST.TAG_NAME: '', CONST.TAG_TYPE: '', CONST.TAG_TAGS: ''}
|
1000
|
+
# Check if we have at least the obligatory columns
|
1001
|
+
ok, err_msg = _checkNeededColumns(tags_sheet.columns, oblig_columns.keys(), CONST.TAG_SHEET)
|
1002
|
+
if not ok:
|
1003
|
+
return ok, err_msg
|
1004
|
+
# Facultative columns we can have, with default value
|
1005
|
+
facul_columns = {CONST.TAG_IS_PALETTE: 0, CONST.TAG_COLORMAP: '', CONST.TAG_COLOR: ''}
|
1006
|
+
# Check if we need to add facultative columns
|
1007
|
+
for facul_column_name in facul_columns.keys():
|
1008
|
+
if facul_column_name not in tags_sheet.columns:
|
1009
|
+
tags_sheet[facul_column_name] = facul_columns[facul_column_name]
|
1010
|
+
# Convert data as specific type
|
1011
|
+
ok, msg = _castColumnType(
|
1012
|
+
tags_sheet, dict(oblig_columns, **facul_columns),
|
1013
|
+
empty_to_default_value=True)
|
1014
|
+
if not ok:
|
1015
|
+
return ok, msg
|
1016
|
+
# Update Sankey
|
1017
|
+
return sankey.update_from_tags_table(tags_sheet)
|
1018
|
+
|
1019
|
+
|
1020
|
+
def xl_read_data_sheet(
|
1021
|
+
data_sheet: pd.DataFrame,
|
1022
|
+
options: dict,
|
1023
|
+
sankey: Sankey
|
1024
|
+
):
|
1025
|
+
'''
|
1026
|
+
Read data sheet.
|
1027
|
+
|
1028
|
+
Parameters
|
1029
|
+
----------
|
1030
|
+
:param data_sheet: Feuille excel à lire
|
1031
|
+
:type data_sheet: pd.DataFrame
|
1032
|
+
|
1033
|
+
:param options: Dictionnary of parsing options
|
1034
|
+
:type options: dict
|
1035
|
+
|
1036
|
+
:param sankey: Sankey struct constructed from input
|
1037
|
+
:type sankey: Sankey, modified
|
1038
|
+
|
1039
|
+
Returns
|
1040
|
+
-------
|
1041
|
+
:return: (Success code; Error message )
|
1042
|
+
:rtype: (int; string)
|
1043
|
+
'''
|
1044
|
+
# Set column header consitant with specified columns names for data sheet
|
1045
|
+
tags = list(sankey.taggs[CONST.TAG_TYPE_FLUX].keys())
|
1046
|
+
tags += list(sankey.taggs[CONST.TAG_TYPE_DATA].keys())
|
1047
|
+
new_columns_names = list(
|
1048
|
+
map(lambda x: consistantColName(CONST.DATA_SHEET, x, sankey, tags),
|
1049
|
+
data_sheet.columns))
|
1050
|
+
# Waiting for these columns
|
1051
|
+
# Obligatory columns to have in tags sheet, with their default type
|
1052
|
+
oblig_columns = {
|
1053
|
+
CONST.DATA_ORIGIN: '',
|
1054
|
+
CONST.DATA_DESTINATION: '',
|
1055
|
+
}
|
1056
|
+
# Check if we have the mandatory columns (Origin, destination, values)
|
1057
|
+
ok, msg = _checkNeededColumns(new_columns_names, list(oblig_columns.keys()), CONST.DATA_SHEET)
|
1058
|
+
if not ok:
|
1059
|
+
return ok, msg
|
1060
|
+
# Ok to Update columns name with consistant names
|
1061
|
+
data_sheet.columns = new_columns_names
|
1062
|
+
# Facultative columns we can have, with default value
|
1063
|
+
facul_columns = {
|
1064
|
+
CONST.DATA_VALUE: 0.,
|
1065
|
+
CONST.DATA_QUANTITY: 0.0,
|
1066
|
+
CONST.DATA_FACTOR: 0.0,
|
1067
|
+
CONST.DATA_UNCERT: 0.0}
|
1068
|
+
# Convert columns data to default data type or None if Nan
|
1069
|
+
ok, msg = _castColumnType(
|
1070
|
+
data_sheet, dict(oblig_columns, **facul_columns))
|
1071
|
+
if not ok:
|
1072
|
+
return ok, msg
|
1073
|
+
# Update Sankey
|
1074
|
+
return sankey.update_from_data_table(
|
1075
|
+
data_sheet,
|
1076
|
+
options['warn_on_new_nodes'],
|
1077
|
+
options['warn_on_new_flux'])
|
1078
|
+
|
1079
|
+
|
1080
|
+
def xl_read_nodes_sheet(
|
1081
|
+
nodes_sheet: dict,
|
1082
|
+
mfa_entry_name: str,
|
1083
|
+
options: dict,
|
1084
|
+
sankey: Sankey
|
1085
|
+
):
|
1086
|
+
"""
|
1087
|
+
Read node sheet.
|
1088
|
+
|
1089
|
+
Parameters
|
1090
|
+
----------
|
1091
|
+
:param nodes_sheet: Excel sheet to read (dataframe)
|
1092
|
+
:type nodes_sheet: dict
|
1093
|
+
|
1094
|
+
:param mfa_entry_name: Type of sheet to parse.
|
1095
|
+
:type mfa_entry_name: str
|
1096
|
+
|
1097
|
+
:param options: Dictionnary of parsing options.
|
1098
|
+
:type options: dict
|
1099
|
+
|
1100
|
+
:param sankey: Sankey struct constructed from input
|
1101
|
+
:type sankey: Sankey, modified
|
1102
|
+
|
1103
|
+
Returns
|
1104
|
+
-------
|
1105
|
+
:return: (Success ; Error message )
|
1106
|
+
:rtype: (bool; string)
|
1107
|
+
|
1108
|
+
"""
|
1109
|
+
# Standardise les noms de colonne selon le dictionnaire,
|
1110
|
+
# sinon renvoit les noms de colones tels quels
|
1111
|
+
tags = list(sankey.taggs[CONST.TAG_TYPE_NODE].keys())
|
1112
|
+
tags += list(sankey.taggs[CONST.TAG_TYPE_LEVEL].keys())
|
1113
|
+
nodes_sheet.columns = list(
|
1114
|
+
map(lambda x: consistantColName(mfa_entry_name, x, sankey, tags),
|
1115
|
+
nodes_sheet.columns))
|
1116
|
+
# Waiting for these columns
|
1117
|
+
# Obligatory columns to have in tags sheet, with their default type
|
1118
|
+
oblig_columns = {
|
1119
|
+
CONST.NODES_LEVEL: 0,
|
1120
|
+
CONST.NODES_NODE: ''}
|
1121
|
+
# Check if we have at least the obligatory columns
|
1122
|
+
ok, msg = _checkNeededColumns(nodes_sheet.columns, list(oblig_columns.keys()), mfa_entry_name)
|
1123
|
+
if not ok:
|
1124
|
+
return ok, msg
|
1125
|
+
# Facultative columns we can have, wi
|
1126
|
+
facul_columns = {
|
1127
|
+
CONST.NODES_MAT_BALANCE: 1,
|
1128
|
+
CONST.NODES_SANKEY: 1,
|
1129
|
+
CONST.NODES_COLOR: '',
|
1130
|
+
CONST.NODES_DEFINITIONS: ''}
|
1131
|
+
# Convert to int, str, or None if Nan
|
1132
|
+
ok, msg = _castColumnType(
|
1133
|
+
nodes_sheet, dict(oblig_columns, **facul_columns))
|
1134
|
+
if not ok:
|
1135
|
+
return ok, msg
|
1136
|
+
# Update Sankey
|
1137
|
+
return sankey.update_from_nodes_table(
|
1138
|
+
nodes_sheet,
|
1139
|
+
warn_on_new_nodes=options['warn_on_new_nodes'])
|
1140
|
+
|
1141
|
+
|
1142
|
+
def xl_read_products_sectors_sheet(
|
1143
|
+
excel_sheet: dict,
|
1144
|
+
mfa_entry_name: str,
|
1145
|
+
options: dict,
|
1146
|
+
sankey: Sankey
|
1147
|
+
):
|
1148
|
+
"""
|
1149
|
+
Read either Product, Sector or Exchange sheet
|
1150
|
+
|
1151
|
+
Parameters
|
1152
|
+
----------
|
1153
|
+
:param excel_sheet: Excel sheet to read (dataframe)
|
1154
|
+
:type excel_sheet: dict
|
1155
|
+
|
1156
|
+
:param mfa_entry_name: Type of sheet to parse.
|
1157
|
+
:type mfa_entry_name: str
|
1158
|
+
|
1159
|
+
:param options: Dictionnary of parsing options.
|
1160
|
+
:type options: dict
|
1161
|
+
|
1162
|
+
:param sankey: Sankey struct constructed from input
|
1163
|
+
:type sankey: Sankey, modified
|
1164
|
+
|
1165
|
+
Returns
|
1166
|
+
-------
|
1167
|
+
:return: (Success ; Error message )
|
1168
|
+
:rtype: (bool; string)
|
1169
|
+
"""
|
1170
|
+
# Add tag column
|
1171
|
+
if mfa_entry_name == CONST.PRODUCTS_SHEET:
|
1172
|
+
excel_sheet[CONST.NODE_TYPE] = CONST.NODE_TYPE_PRODUCT
|
1173
|
+
elif mfa_entry_name == CONST.SECTORS_SHEET:
|
1174
|
+
excel_sheet[CONST.NODE_TYPE] = CONST.NODE_TYPE_SECTOR
|
1175
|
+
elif mfa_entry_name == CONST.EXCHANGES_SHEET:
|
1176
|
+
excel_sheet[CONST.NODE_TYPE] = CONST.NODE_TYPE_EXCHANGE
|
1177
|
+
# Read as node
|
1178
|
+
return xl_read_nodes_sheet(
|
1179
|
+
excel_sheet,
|
1180
|
+
mfa_entry_name,
|
1181
|
+
options,
|
1182
|
+
sankey)
|
1183
|
+
|
1184
|
+
|
1185
|
+
def xl_read_terbase_sheet(
|
1186
|
+
ter_excel_sheet: dict,
|
1187
|
+
options: dict,
|
1188
|
+
mfa_dict: dict,
|
1189
|
+
sankey: Sankey
|
1190
|
+
):
|
1191
|
+
"""
|
1192
|
+
Read TER sheet
|
1193
|
+
|
1194
|
+
Parameters
|
1195
|
+
----------
|
1196
|
+
:param excel_sheet: Excel sheet to read (dataframe)
|
1197
|
+
:type excel_sheet: dict
|
1198
|
+
|
1199
|
+
:param options: Dictionnary of parsing options.
|
1200
|
+
:type options: dict
|
1201
|
+
|
1202
|
+
:param mfa_dict: Data struct for Sankey
|
1203
|
+
:type mfa_dict: dict, modified
|
1204
|
+
|
1205
|
+
:param sankey: Sankey struct constructed from input
|
1206
|
+
:type sankey: Sankey, modified
|
1207
|
+
|
1208
|
+
Returns
|
1209
|
+
-------
|
1210
|
+
:return: (Success ; Error message )
|
1211
|
+
:rtype: (bool; string)
|
1212
|
+
"""
|
1213
|
+
# Extract all tables from sheet
|
1214
|
+
tables = []
|
1215
|
+
_extractTablesFromSheet(ter_excel_sheet, tables)
|
1216
|
+
if len(tables) != 2:
|
1217
|
+
err_msg = 'Could not find or extract the necessary two tables, found {}.\n'.format(len(tables))
|
1218
|
+
err_msg += 'Are all the tables here and correctly formatted ?'
|
1219
|
+
return False, err_msg
|
1220
|
+
# Do we have duplicated cols or row
|
1221
|
+
for i, table in enumerate(tables):
|
1222
|
+
has_dup_cols, dup_cols = _hasDuplicatedEntry(table.columns.to_list())
|
1223
|
+
if has_dup_cols:
|
1224
|
+
_fuseDuplicatedColumns(table, dup_cols)
|
1225
|
+
has_dup_rows, dup_rows = _hasDuplicatedEntry(table.index.to_list())
|
1226
|
+
if has_dup_rows:
|
1227
|
+
_fuseDuplicatedRows(table, dup_rows)
|
1228
|
+
# Do we have the sames columns and rows for each tables
|
1229
|
+
has_missing_entry = False
|
1230
|
+
msg = ""
|
1231
|
+
sets_headers = [(set(table.index.to_list()), set(table.columns.to_list())) for table in tables]
|
1232
|
+
for i in range(len(sets_headers) - 1):
|
1233
|
+
diff_rows = sets_headers[i][0] - sets_headers[i+1][0]
|
1234
|
+
if len(diff_rows) > 0:
|
1235
|
+
has_missing_entry = True
|
1236
|
+
msg += 'Tables {0} and {1} have incompatibles rows : {2}\n'.format(
|
1237
|
+
i, i+1, list(diff_rows))
|
1238
|
+
diff_cols = sets_headers[i][1] - sets_headers[i+1][1]
|
1239
|
+
if len(diff_cols) > 0:
|
1240
|
+
has_missing_entry = True
|
1241
|
+
msg += 'Tables {0} and {1} have incompatibles columns : {2}\n'.format(
|
1242
|
+
i, i+1, list(diff_cols))
|
1243
|
+
if has_missing_entry:
|
1244
|
+
return False, msg
|
1245
|
+
# Separate tables
|
1246
|
+
table_supplies = tables[0] # Define flux Sectors->Products, with Cols=Sectors, Rows=Product
|
1247
|
+
table_uses = tables[1] # Define flux Products->Sectors, with Cols=Sectors, Rows=Product
|
1248
|
+
# In Sankey struct
|
1249
|
+
log = ''
|
1250
|
+
ok, msg = sankey.update_from_matrix_table(
|
1251
|
+
table_supplies.T.replace({np.nan: None}),
|
1252
|
+
warn_on_new_nodes=options['warn_on_new_nodes'],
|
1253
|
+
warn_on_new_flux=options['warn_on_new_flux'],
|
1254
|
+
tagg_name='Type de noeud',
|
1255
|
+
tagg_type=CONST.TAG_TYPE_NODE,
|
1256
|
+
tag_name_col=CONST.NODE_TYPE_PRODUCT,
|
1257
|
+
tag_name_row=CONST.NODE_TYPE_SECTOR)
|
1258
|
+
if not ok:
|
1259
|
+
err = 'Could not process supplies table : {}'.format(msg)
|
1260
|
+
return ok, msg
|
1261
|
+
log += msg
|
1262
|
+
ok, msg = sankey.update_from_matrix_table(
|
1263
|
+
table_uses.replace({np.nan: None}),
|
1264
|
+
warn_on_new_nodes=options['warn_on_new_nodes'],
|
1265
|
+
warn_on_new_flux=options['warn_on_new_flux'],
|
1266
|
+
tagg_name='Type de noeud',
|
1267
|
+
tagg_type=CONST.TAG_TYPE_NODE,
|
1268
|
+
tag_name_col=CONST.NODE_TYPE_SECTOR,
|
1269
|
+
tag_name_row=CONST.NODE_TYPE_PRODUCT)
|
1270
|
+
log += msg
|
1271
|
+
if not ok:
|
1272
|
+
err = 'Could not process use table : {}'.format(msg)
|
1273
|
+
return ok, err
|
1274
|
+
# Set MFA dict - Needed for retrocompatibility
|
1275
|
+
# Set 'x' and 'X' as 1
|
1276
|
+
table_uses.replace({'x': 1}, inplace=True)
|
1277
|
+
table_uses.replace({'X': 1}, inplace=True)
|
1278
|
+
table_supplies.replace({'x': 1}, inplace=True)
|
1279
|
+
table_supplies.replace({'X': 1}, inplace=True)
|
1280
|
+
# Default type = int
|
1281
|
+
_castColumnType(table_uses, 0, empty_to_default_value=True)
|
1282
|
+
_castColumnType(table_supplies, 0, empty_to_default_value=True)
|
1283
|
+
# Save in MFA_dict
|
1284
|
+
mfa_dict[CONST.TER_SHEET] = {}
|
1285
|
+
mfa_dict[CONST.TER_SHEET]['use'] = table_uses
|
1286
|
+
mfa_dict[CONST.TER_SHEET]['supply'] = table_supplies
|
1287
|
+
return True, log
|
1288
|
+
|
1289
|
+
|
1290
|
+
def xl_read_input_output_sheet(
|
1291
|
+
io_excel_sheet: dict,
|
1292
|
+
options: dict,
|
1293
|
+
mfa_input: dict,
|
1294
|
+
sankey: Sankey,
|
1295
|
+
read_data_in_matrix=False
|
1296
|
+
):
|
1297
|
+
"""
|
1298
|
+
Read IO sheet
|
1299
|
+
|
1300
|
+
Parameters
|
1301
|
+
----------
|
1302
|
+
:param io_excel_sheet: Excel sheet to read (dataframe)
|
1303
|
+
:type io_excel_sheet: dict
|
1304
|
+
|
1305
|
+
:param options: Dictionnary of parsing options.
|
1306
|
+
:type options: dict
|
1307
|
+
|
1308
|
+
:param mfa_dict: Data struct for Sankey
|
1309
|
+
:type mfa_dict: dict, modified
|
1310
|
+
|
1311
|
+
:param sankey: Sankey struct constructed from input
|
1312
|
+
:type sankey: Sankey, modified
|
1313
|
+
|
1314
|
+
Returns
|
1315
|
+
-------
|
1316
|
+
:return: (Success ; Error message )
|
1317
|
+
:rtype: (bool; string)
|
1318
|
+
"""
|
1319
|
+
# Extract all tables from sheet
|
1320
|
+
tables = []
|
1321
|
+
_extractTablesFromSheet(io_excel_sheet, tables)
|
1322
|
+
if len(tables) != 1:
|
1323
|
+
err_msg = 'Did not found the correct amount of tables. Need one table, found {}.'.format(len(tables))
|
1324
|
+
if len(tables) == 0:
|
1325
|
+
err_msg += '\nIs the table in the given sheet or correctly formatted ?'
|
1326
|
+
return False, err_msg
|
1327
|
+
io_sheet = tables[0]
|
1328
|
+
# Do we have duplicated cols or row
|
1329
|
+
has_dup_cols, dup_cols = _hasDuplicatedEntry(io_sheet.columns.to_list())
|
1330
|
+
if has_dup_cols:
|
1331
|
+
_fuseDuplicatedColumns(io_sheet, dup_cols)
|
1332
|
+
has_dup_rows, dup_rows = _hasDuplicatedEntry(io_sheet.index.to_list())
|
1333
|
+
if has_dup_rows:
|
1334
|
+
_fuseDuplicatedRows(io_sheet, dup_rows)
|
1335
|
+
# In Sankey struct
|
1336
|
+
ok, msg = sankey.update_from_matrix_table(
|
1337
|
+
io_sheet.replace({np.nan: None}),
|
1338
|
+
data_in_matrix=read_data_in_matrix,
|
1339
|
+
warn_on_new_nodes=options['warn_on_new_nodes'],
|
1340
|
+
warn_on_new_flux=options['warn_on_new_flux'])
|
1341
|
+
# Update MFA data dict - Needed for retrocompatibility
|
1342
|
+
# Set 'x' and 'X' as 1
|
1343
|
+
io_sheet.replace({'x': 1}, inplace=True)
|
1344
|
+
io_sheet.replace({'X': 1}, inplace=True)
|
1345
|
+
# Default type = int
|
1346
|
+
_castColumnType(io_sheet, 0, empty_to_default_value=False)
|
1347
|
+
# Save in MFA_dict
|
1348
|
+
mfa_input[CONST.IO_SHEET] = io_sheet
|
1349
|
+
# Output
|
1350
|
+
return ok, msg
|
1351
|
+
|
1352
|
+
|
1353
|
+
def xl_read_input_output_data_sheet(
|
1354
|
+
io_excel_sheet: dict,
|
1355
|
+
options: dict,
|
1356
|
+
mfa_input: dict,
|
1357
|
+
sankey: Sankey
|
1358
|
+
):
|
1359
|
+
"""
|
1360
|
+
Read IO sheet
|
1361
|
+
|
1362
|
+
Parameters
|
1363
|
+
----------
|
1364
|
+
:param io_excel_sheet: Excel sheet to read (dataframe)
|
1365
|
+
:type io_excel_sheet: dict
|
1366
|
+
|
1367
|
+
:param options: Dictionnary of parsing options.
|
1368
|
+
:type options: dict
|
1369
|
+
|
1370
|
+
:param mfa_dict: Data struct for Sankey
|
1371
|
+
:type mfa_dict: dict, modified
|
1372
|
+
|
1373
|
+
:param sankey: Sankey struct constructed from input
|
1374
|
+
:type sankey: Sankey, modified
|
1375
|
+
|
1376
|
+
Returns
|
1377
|
+
-------
|
1378
|
+
:return: (Success ; Error message )
|
1379
|
+
:rtype: (bool; string)
|
1380
|
+
"""
|
1381
|
+
return xl_read_input_output_sheet(
|
1382
|
+
io_excel_sheet,
|
1383
|
+
options,
|
1384
|
+
mfa_input,
|
1385
|
+
sankey,
|
1386
|
+
read_data_in_matrix=True)
|
1387
|
+
|
1388
|
+
|
1389
|
+
def xl_read_min_max_sheet(
|
1390
|
+
min_max_sheet: pd.DataFrame,
|
1391
|
+
options: dict,
|
1392
|
+
sankey: Sankey
|
1393
|
+
):
|
1394
|
+
"""
|
1395
|
+
Read CONST.MIN_MAX_SHEET.
|
1396
|
+
|
1397
|
+
Parameters
|
1398
|
+
----------
|
1399
|
+
:param min_max_sheet: Feuille excel à lire
|
1400
|
+
:type min_max_sheet: pd.DataFrame
|
1401
|
+
|
1402
|
+
:param sankey: Sankey struct constructed from input
|
1403
|
+
:type sankey: Sankey, modified
|
1404
|
+
|
1405
|
+
Returns
|
1406
|
+
-------
|
1407
|
+
:return: (Success ; Error message )
|
1408
|
+
:rtype: (bool; string)
|
1409
|
+
"""
|
1410
|
+
# Set column header consitant with tags groups
|
1411
|
+
tags = list(sankey.taggs[CONST.TAG_TYPE_FLUX].keys())
|
1412
|
+
tags += list(sankey.taggs[CONST.TAG_TYPE_DATA].keys())
|
1413
|
+
new_columns_names = list(
|
1414
|
+
map(lambda x: consistantColName(CONST.MIN_MAX_SHEET, x, sankey, tags),
|
1415
|
+
min_max_sheet.columns))
|
1416
|
+
# Waiting for these columns
|
1417
|
+
# Obligatory columns to have in tags sheet, with their default type
|
1418
|
+
oblig_columns = {
|
1419
|
+
CONST.MIN_MAX_ORIGIN: '',
|
1420
|
+
CONST.MIN_MAX_DESTINATION: ''}
|
1421
|
+
# All columns are here ?
|
1422
|
+
ok, msg = _checkNeededColumns(new_columns_names, list(oblig_columns.keys()), CONST.MIN_MAX_SHEET)
|
1423
|
+
if not ok:
|
1424
|
+
return ok, msg
|
1425
|
+
# Ok to Update columns name with consistant names
|
1426
|
+
min_max_sheet.columns = new_columns_names
|
1427
|
+
# Facultative columns we can have, with default value
|
1428
|
+
facul_columns = {}
|
1429
|
+
for tag in tags:
|
1430
|
+
facul_columns[tag] = ''
|
1431
|
+
# Convert to int, str, or None if Nan
|
1432
|
+
ok, msg = _castColumnType(
|
1433
|
+
min_max_sheet, dict(oblig_columns, **facul_columns))
|
1434
|
+
if not ok:
|
1435
|
+
return ok, msg
|
1436
|
+
# Update sankey struct
|
1437
|
+
ok, msg = sankey.update_from_min_max_table(
|
1438
|
+
min_max_sheet,
|
1439
|
+
options['warn_on_new_nodes'],
|
1440
|
+
options['warn_on_new_flux'])
|
1441
|
+
if not ok:
|
1442
|
+
return ok, msg
|
1443
|
+
return True, ''
|
1444
|
+
|
1445
|
+
|
1446
|
+
def xl_read_constraints_sheet(
|
1447
|
+
constraints_sheet: pd.DataFrame,
|
1448
|
+
options: dict,
|
1449
|
+
sankey: Sankey
|
1450
|
+
):
|
1451
|
+
"""
|
1452
|
+
Read CONST.CONSTRAINTS_SHEET.
|
1453
|
+
|
1454
|
+
Parameters
|
1455
|
+
----------
|
1456
|
+
:param constraints_sheet: Feuille excel à lire
|
1457
|
+
:type constraints_sheet: pd.DataFrame
|
1458
|
+
|
1459
|
+
:param sankey: Sankey struct constructed from input
|
1460
|
+
:type sankey: Sankey, modified
|
1461
|
+
|
1462
|
+
Returns
|
1463
|
+
-------
|
1464
|
+
:return: (Success ; Error message )
|
1465
|
+
:rtype: (bool; string)
|
1466
|
+
"""
|
1467
|
+
# Set column header consitant with tags groups
|
1468
|
+
tags = list(sankey.taggs[CONST.TAG_TYPE_FLUX].keys())
|
1469
|
+
tags += list(sankey.taggs[CONST.TAG_TYPE_DATA].keys())
|
1470
|
+
new_columns_names = list(
|
1471
|
+
map(lambda x: consistantColName(CONST.CONSTRAINTS_SHEET, x, sankey, tags),
|
1472
|
+
constraints_sheet.columns))
|
1473
|
+
# Waiting for these columns
|
1474
|
+
# Obligatory columns to have in tags sheet, with their default type
|
1475
|
+
oblig_columns = {
|
1476
|
+
CONST.CONSTRAINT_ID: '',
|
1477
|
+
CONST.CONSTRAINT_ORIGIN: '',
|
1478
|
+
CONST.CONSTRAINT_DESTINATION: ''}
|
1479
|
+
onlyone_columns = {
|
1480
|
+
CONST.CONSTRAINT_EQ: 0.0,
|
1481
|
+
CONST.CONSTRAINT_INEQ_INF: 0.0,
|
1482
|
+
CONST.CONSTRAINT_INEQ_SUP: 0.0}
|
1483
|
+
# All columns are here ?
|
1484
|
+
ok, msg = _checkNeededColumns(
|
1485
|
+
new_columns_names,
|
1486
|
+
list(oblig_columns.keys()),
|
1487
|
+
CONST.CONSTRAINTS_SHEET,
|
1488
|
+
list(onlyone_columns.keys()))
|
1489
|
+
if not ok:
|
1490
|
+
return ok, msg
|
1491
|
+
# Ok to Update columns name with consistant names
|
1492
|
+
constraints_sheet.columns = new_columns_names
|
1493
|
+
# Facultative columns we can have, with default value
|
1494
|
+
facul_columns = {}
|
1495
|
+
for tag in tags:
|
1496
|
+
facul_columns[tag] = ''
|
1497
|
+
# Convert columns data to default data type or None if Nan
|
1498
|
+
ok, msg = _castColumnType(
|
1499
|
+
constraints_sheet, dict(oblig_columns, **onlyone_columns, **facul_columns))
|
1500
|
+
if not ok:
|
1501
|
+
return ok, msg
|
1502
|
+
ok, msg = sankey.update_from_constraints_table(
|
1503
|
+
constraints_sheet,
|
1504
|
+
options['warn_on_new_nodes'],
|
1505
|
+
options['warn_on_new_flux'])
|
1506
|
+
if not ok:
|
1507
|
+
return ok, msg
|
1508
|
+
return True, ''
|
1509
|
+
|
1510
|
+
|
1511
|
+
def xl_read_result_sheet(
|
1512
|
+
result_sheet: pd.DataFrame,
|
1513
|
+
sankey: Sankey
|
1514
|
+
):
|
1515
|
+
'''
|
1516
|
+
Read result sheet.
|
1517
|
+
|
1518
|
+
Parameters
|
1519
|
+
----------
|
1520
|
+
:param result_sheet: Feuille excel à lire
|
1521
|
+
:type result_sheet: pd.DataFrame
|
1522
|
+
|
1523
|
+
:param options: Dictionnary of parsing options
|
1524
|
+
:type options: dict
|
1525
|
+
|
1526
|
+
:param sankey: Sankey struct constructed from input
|
1527
|
+
:type sankey: Sankey, modified
|
1528
|
+
|
1529
|
+
Returns
|
1530
|
+
-------
|
1531
|
+
:return: (Success code; Error message )
|
1532
|
+
:rtype: (int; string)
|
1533
|
+
'''
|
1534
|
+
# Set column header consitant with specified columns names for data sheet
|
1535
|
+
tags = list(sankey.taggs[CONST.TAG_TYPE_FLUX].keys())
|
1536
|
+
tags += list(sankey.taggs[CONST.TAG_TYPE_DATA].keys())
|
1537
|
+
new_columns_names = list(
|
1538
|
+
map(lambda x: consistantColName(CONST.RESULTS_SHEET, x, sankey, tags),
|
1539
|
+
result_sheet.columns))
|
1540
|
+
# Waiting for these columns
|
1541
|
+
# Obligatory columns to have in tags sheet, with their default type
|
1542
|
+
oblig_columns = {
|
1543
|
+
CONST.RESULTS_ORIGIN: '',
|
1544
|
+
CONST.RESULTS_DESTINATION: '',
|
1545
|
+
CONST.RESULTS_VALUE: 0.}
|
1546
|
+
# Check if we have the mandatory columns (Origin, destination, values)
|
1547
|
+
ok, msg = _checkNeededColumns(
|
1548
|
+
new_columns_names,
|
1549
|
+
list(oblig_columns.keys()),
|
1550
|
+
CONST.RESULTS_SHEET)
|
1551
|
+
if not ok:
|
1552
|
+
return ok, msg
|
1553
|
+
# Ok to Update columns name with consistant names
|
1554
|
+
result_sheet.columns = new_columns_names
|
1555
|
+
# Facultative columns we can have, with default value
|
1556
|
+
facul_columns = {
|
1557
|
+
CONST.RESULTS_FREE_MIN: 0.0,
|
1558
|
+
CONST.RESULTS_FREE_MAX: 0.0}
|
1559
|
+
# Convert columns data to default data type or None if Nan
|
1560
|
+
ok, msg = _castColumnType(
|
1561
|
+
result_sheet, dict(oblig_columns, **facul_columns))
|
1562
|
+
if not ok:
|
1563
|
+
return ok, msg
|
1564
|
+
# Update Sankey
|
1565
|
+
return sankey.update_from_result_table(result_sheet)
|
1566
|
+
|
1567
|
+
|
1568
|
+
def xl_read_analysis_sheet(
|
1569
|
+
analysis_sheet: pd.DataFrame,
|
1570
|
+
mfa_dict: dict,
|
1571
|
+
sankey: Sankey
|
1572
|
+
):
|
1573
|
+
"""
|
1574
|
+
Read Analysis sheet.
|
1575
|
+
|
1576
|
+
Parameters
|
1577
|
+
----------
|
1578
|
+
:param analysis_sheet: Feuille excel à lire
|
1579
|
+
:type analysis_sheet: pd.DataFrame
|
1580
|
+
|
1581
|
+
:param mfa_dict: MFA data after parsing
|
1582
|
+
:type mfa_dict: dict, modified
|
1583
|
+
|
1584
|
+
:param sankey: Sankey struct constructed from input
|
1585
|
+
:type sankey: Sankey, modified
|
1586
|
+
|
1587
|
+
Returns
|
1588
|
+
-------
|
1589
|
+
:return: (Success ; Error message )
|
1590
|
+
:rtype: (bool; string)
|
1591
|
+
"""
|
1592
|
+
# Set column header consitant with tags groups
|
1593
|
+
tags = list(sankey.taggs[CONST.TAG_TYPE_FLUX].keys())
|
1594
|
+
tags += list(sankey.taggs[CONST.TAG_TYPE_DATA].keys())
|
1595
|
+
new_columns_names = list(
|
1596
|
+
map(lambda x: consistantColName(CONST.ANALYSIS_SHEET, x, sankey, tags),
|
1597
|
+
analysis_sheet.columns))
|
1598
|
+
# Waiting for these columns
|
1599
|
+
# Obligatory columns to have in tags sheet, with their default type
|
1600
|
+
oblig_columns = {
|
1601
|
+
CONST.RESULTS_ORIGIN: '',
|
1602
|
+
CONST.RESULTS_DESTINATION: '',
|
1603
|
+
CONST.RESULTS_VALUE: 0.0}
|
1604
|
+
# All columns are here ?
|
1605
|
+
ok, msg = _checkNeededColumns(
|
1606
|
+
new_columns_names,
|
1607
|
+
list(oblig_columns.keys()),
|
1608
|
+
CONST.ANALYSIS_SHEET)
|
1609
|
+
if not ok:
|
1610
|
+
return ok, msg
|
1611
|
+
# Ok to Update columns name with consistant names
|
1612
|
+
analysis_sheet.columns = new_columns_names
|
1613
|
+
# Facultative columns we can have, with default value
|
1614
|
+
facul_columns = {}
|
1615
|
+
for tag in tags:
|
1616
|
+
facul_columns[tag] = ''
|
1617
|
+
# Convert columns data to default data type or None if Nan
|
1618
|
+
ok, msg = _castColumnType(
|
1619
|
+
analysis_sheet, dict(oblig_columns, **facul_columns))
|
1620
|
+
if not ok:
|
1621
|
+
return ok, msg
|
1622
|
+
# Update Sankey - analysis part
|
1623
|
+
ok, msg = sankey.update_from_analysis_table(
|
1624
|
+
analysis_sheet)
|
1625
|
+
if not ok:
|
1626
|
+
return ok, msg
|
1627
|
+
# Update MFA data dict
|
1628
|
+
mfa_dict[CONST.ANALYSIS_SHEET] = analysis_sheet
|
1629
|
+
return True, ''
|
1630
|
+
|
1631
|
+
|
1632
|
+
def xl_read_uncertainty_sheet(
|
1633
|
+
uncertainty_sheet: pd.DataFrame,
|
1634
|
+
mfa_dict: dict,
|
1635
|
+
sankey: Sankey
|
1636
|
+
):
|
1637
|
+
"""
|
1638
|
+
Read UNCERTAINTY SHEET.
|
1639
|
+
|
1640
|
+
Parameters
|
1641
|
+
----------
|
1642
|
+
:param uncertainty_sheet: Feuille excel à lire
|
1643
|
+
:type uncertainty_sheet: pd.DataFrame
|
1644
|
+
|
1645
|
+
:param mfa_dict: MFA data after parsing
|
1646
|
+
:type mfa_dict: dict, modified
|
1647
|
+
|
1648
|
+
:param sankey: Sankey struct constructed from input
|
1649
|
+
:type sankey: Sankey, modified
|
1650
|
+
|
1651
|
+
Returns
|
1652
|
+
-------
|
1653
|
+
:return: (Success ; Error message )
|
1654
|
+
:rtype: (bool; string)
|
1655
|
+
"""
|
1656
|
+
# Filter out empty columns
|
1657
|
+
for i, col in enumerate(uncertainty_sheet.columns): # iterable on columns names
|
1658
|
+
if 'Unnamed' in col:
|
1659
|
+
uncertainty_sheet.drop(uncertainty_sheet.columns[i:], inplace=True, axis=1)
|
1660
|
+
break
|
1661
|
+
# Set column header consitant with tags groups
|
1662
|
+
tags = list(sankey.taggs[CONST.TAG_TYPE_FLUX].keys())
|
1663
|
+
tags += list(sankey.taggs[CONST.TAG_TYPE_DATA].keys())
|
1664
|
+
new_columns_names = list(
|
1665
|
+
map(lambda x: consistantColName(CONST.UNCERTAINTY_SHEET, x, sankey, tags),
|
1666
|
+
uncertainty_sheet.columns))
|
1667
|
+
# Waiting for these columns
|
1668
|
+
# Obligatory columns to have in tags sheet, with their default type
|
1669
|
+
oblig_columns = {
|
1670
|
+
CONST.UNCERTAINTY_ORIGIN: '',
|
1671
|
+
CONST.UNCERTAINTY_DESTINATION: ''}
|
1672
|
+
# All columns are here ?
|
1673
|
+
ok, msg = _checkNeededColumns(
|
1674
|
+
new_columns_names,
|
1675
|
+
list(oblig_columns.keys()),
|
1676
|
+
CONST.UNCERTAINTY_SHEET)
|
1677
|
+
if not ok:
|
1678
|
+
return ok, msg
|
1679
|
+
# Ok to Update columns name with consistant names
|
1680
|
+
uncertainty_sheet.columns = new_columns_names
|
1681
|
+
# Facultative columns we can have, with default value and default position in sheet
|
1682
|
+
facul_columns = {}
|
1683
|
+
facul_column_pos = 2
|
1684
|
+
for _ in CONST.UNCERTAINTY_SHEET_COLS:
|
1685
|
+
facul_columns['{}'.format(_)] = {'val': 0.0, 'pos': facul_column_pos}
|
1686
|
+
facul_column_pos += 1
|
1687
|
+
for tag in tags:
|
1688
|
+
facul_columns[tag] = {'val': '', 'pos': facul_column_pos}
|
1689
|
+
facul_column_pos += 1
|
1690
|
+
# Check if we need to add facultative columns
|
1691
|
+
for facul_column_name, facul_column in facul_columns.items():
|
1692
|
+
if facul_column_name not in uncertainty_sheet.columns:
|
1693
|
+
uncertainty_sheet.insert(
|
1694
|
+
facul_column['pos'], facul_column_name, facul_column['val'])
|
1695
|
+
# Convert to int, str, or None if Nan
|
1696
|
+
ok, msg = _castColumnType(
|
1697
|
+
uncertainty_sheet,
|
1698
|
+
dict(oblig_columns, **facul_columns),
|
1699
|
+
empty_to_default_value=True)
|
1700
|
+
if not ok:
|
1701
|
+
return ok, msg
|
1702
|
+
# Update Sankey - Uncertainty part
|
1703
|
+
ok, msg = sankey.update_from_uncertainty_table(
|
1704
|
+
uncertainty_sheet)
|
1705
|
+
if not ok:
|
1706
|
+
return ok, msg
|
1707
|
+
mfa_dict[CONST.UNCERTAINTY_SHEET] = uncertainty_sheet
|
1708
|
+
return True, ''
|
1709
|
+
|
1710
|
+
|
1711
|
+
def xl_read_conversions_sheet(
|
1712
|
+
conversions_sheet: dict,
|
1713
|
+
mfa_dict: dict,
|
1714
|
+
sankey: Sankey
|
1715
|
+
):
|
1716
|
+
"""
|
1717
|
+
Read CONVERSION SHEET.
|
1718
|
+
TODO this sheet must be changed.
|
1719
|
+
|
1720
|
+
Parameters
|
1721
|
+
----------
|
1722
|
+
:param conversions_sheet: Feuille excel à lire
|
1723
|
+
:type conversions_sheet: pd.DataFrame
|
1724
|
+
|
1725
|
+
:param mfa_dict: MFA data after parsing
|
1726
|
+
:type mfa_dict: dict, modified
|
1727
|
+
|
1728
|
+
:param sankey: Sankey struct constructed from input
|
1729
|
+
:type sankey: Sankey, modified
|
1730
|
+
|
1731
|
+
Returns
|
1732
|
+
-------
|
1733
|
+
:return: (Success ; Error message )
|
1734
|
+
:rtype: (bool; string)
|
1735
|
+
"""
|
1736
|
+
# Set column header consitant with tags groups
|
1737
|
+
new_columns_names = list(
|
1738
|
+
map(lambda x: consistantColName(CONST.CONVERSIONS_SHEET, x, sankey),
|
1739
|
+
conversions_sheet.columns))
|
1740
|
+
# Waiting for these columns
|
1741
|
+
oblig_columns = {
|
1742
|
+
CONST.CONVERSIONS_LOCATION: '',
|
1743
|
+
CONST.CONVERSIONS_PRODUCT: '',
|
1744
|
+
CONST.CONVERSIONS_NATURAL_UNIT: '',
|
1745
|
+
CONST.CONVERSIONS_FACTOR: 0.0}
|
1746
|
+
# All columns are here ?
|
1747
|
+
ok, msg = _checkNeededColumns(new_columns_names, list(oblig_columns.keys()), CONST.CONVERSIONS_SHEET)
|
1748
|
+
if not ok:
|
1749
|
+
return ok, msg
|
1750
|
+
# Ok to Update columns name with consistant names
|
1751
|
+
conversions_sheet.columns = new_columns_names
|
1752
|
+
# # Facultative columns we can have, with default value
|
1753
|
+
# facul_columns = {
|
1754
|
+
# CONST.CONVERSIONS_FACTOR_INV: 0.0}
|
1755
|
+
# # Convert columns data to default data type or None if Nan
|
1756
|
+
# ok, msg = _castColumnType(
|
1757
|
+
# conversions_sheet.iloc[1:], dict(oblig_columns, **facul_columns))
|
1758
|
+
# if not ok:
|
1759
|
+
# return ok, msg
|
1760
|
+
conversions_sheet.replace({np.nan: None}, inplace=True)
|
1761
|
+
# Update Sankey - analysis part
|
1762
|
+
nodes = []
|
1763
|
+
ok, msg = sankey.update_from_conversions_table(conversions_sheet, nodes)
|
1764
|
+
if not ok:
|
1765
|
+
return ok, msg
|
1766
|
+
# Update MFA data dict
|
1767
|
+
nodes2tooltips = {}
|
1768
|
+
nodes2units_conv = {}
|
1769
|
+
nodes2natural_unit = {}
|
1770
|
+
for node in nodes:
|
1771
|
+
for localisation in node.unit.keys():
|
1772
|
+
name = localisation + '/' + node.name
|
1773
|
+
node2tooltips = []
|
1774
|
+
node2units_conv = [1.0]
|
1775
|
+
for tooltip in sankey.tooltips.keys():
|
1776
|
+
if tooltip in node.tooltips.keys():
|
1777
|
+
node2tooltips.append(node.tooltips[tooltip].content)
|
1778
|
+
else:
|
1779
|
+
node2tooltips.append(None)
|
1780
|
+
for unit in sankey.units.keys():
|
1781
|
+
other_factors = node.get_other_factors(localisation)
|
1782
|
+
try:
|
1783
|
+
node2units_conv.append(other_factors[unit])
|
1784
|
+
except Exception:
|
1785
|
+
node2units_conv.append(None)
|
1786
|
+
nodes2tooltips[name] = node2tooltips
|
1787
|
+
nodes2units_conv[name] = node2units_conv
|
1788
|
+
nodes2natural_unit[name] = node.get_natural_unit(localisation)
|
1789
|
+
mfa_dict[CONST.CONVERSIONS_SHEET] = {
|
1790
|
+
'tooltip_names': [[name, desc] for name, desc in sankey.tooltips.items()],
|
1791
|
+
'units_names': [[name, desc] for name, desc in sankey.units.items()],
|
1792
|
+
'nodes2tooltips': nodes2tooltips,
|
1793
|
+
'nodes2units_conv': nodes2units_conv,
|
1794
|
+
'nodes2natural_unit': nodes2natural_unit}
|
1795
|
+
return True, ''
|
1796
|
+
|
1797
|
+
|
1798
|
+
def write_excel_from_sankey(
|
1799
|
+
excel_filename: str,
|
1800
|
+
sankey: Sankey,
|
1801
|
+
mode: str = 'a',
|
1802
|
+
sheets_to_remove__names: list = [],
|
1803
|
+
**kwargs
|
1804
|
+
):
|
1805
|
+
"""
|
1806
|
+
_summary_
|
1807
|
+
|
1808
|
+
Parameters
|
1809
|
+
----------
|
1810
|
+
:param excel_filename: Name of Excel file to write
|
1811
|
+
:type excel_filename: str
|
1812
|
+
|
1813
|
+
:param sankey: Sankey structure to write to Excel file
|
1814
|
+
:type sankey: Sankey
|
1815
|
+
|
1816
|
+
Optional parameters
|
1817
|
+
-------------------
|
1818
|
+
:param mode: Writing mode (see pandas.ExcelWriter for more infos)
|
1819
|
+
:type mode: str, optional (defaults to 'a')
|
1820
|
+
|
1821
|
+
:param sheets_to_remove__names: List of sheets (by name) to remove for Excel file if they are present
|
1822
|
+
:type sheets_to_remove__names: list[str, ...], optional (defaults to [])
|
1823
|
+
|
1824
|
+
Hidden parameters
|
1825
|
+
-----------------
|
1826
|
+
:param additional_sheets: Dict of tables (pandas.DataFrame) to add in Excel file
|
1827
|
+
:type additional_sheets: Dict{str: pandas.DataFrame}
|
1828
|
+
"""
|
1829
|
+
# Post-process function
|
1830
|
+
def _post_process_excel_file(
|
1831
|
+
excel_file
|
1832
|
+
):
|
1833
|
+
# Extract excel book
|
1834
|
+
excel = excel_file.book
|
1835
|
+
# Remove sheets
|
1836
|
+
for sheet_to_remove__name in sheets_to_remove__names:
|
1837
|
+
sheets = excel._sheets
|
1838
|
+
try:
|
1839
|
+
sheet_to_remove__id = sheets.index(excel[sheet_to_remove__name])
|
1840
|
+
sheet = sheets.pop(sheet_to_remove__id)
|
1841
|
+
except Exception:
|
1842
|
+
pass
|
1843
|
+
# Read-me sheet must always be the first sheet
|
1844
|
+
try:
|
1845
|
+
read_me_sheet__id = excel.worksheets.index(excel['READ ME'])
|
1846
|
+
sheet = sheets.pop(read_me_sheet__id)
|
1847
|
+
sheets.insert(0, sheet)
|
1848
|
+
except Exception:
|
1849
|
+
pass
|
1850
|
+
# File is open and saved by xlwings to activate the formulas.
|
1851
|
+
# if has_xl_wings:
|
1852
|
+
# try:
|
1853
|
+
# app = xl.App(visible=False)
|
1854
|
+
# book = app.books.open(excel_filename)
|
1855
|
+
# book.save()
|
1856
|
+
# app.kill()
|
1857
|
+
# except Exception:
|
1858
|
+
# pass
|
1859
|
+
# Write sheets from sankey
|
1860
|
+
if mode == 'a':
|
1861
|
+
with pd.ExcelWriter(excel_filename, engine='openpyxl', mode=mode, if_sheet_exists='replace') as excel_file:
|
1862
|
+
sankey.write_in_excel_file(excel_file, **kwargs)
|
1863
|
+
_post_process_excel_file(excel_file)
|
1864
|
+
else:
|
1865
|
+
with pd.ExcelWriter(excel_filename, engine='openpyxl', mode=mode) as excel_file:
|
1866
|
+
sankey.write_in_excel_file(excel_file, **kwargs)
|
1867
|
+
_post_process_excel_file(excel_file)
|