SankeyExcelParser 1.0.0b0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- SankeyExcelParser/__init__.py +0 -0
- SankeyExcelParser/io_excel.py +1867 -0
- SankeyExcelParser/io_excel_constants.py +811 -0
- SankeyExcelParser/sankey.py +3138 -0
- SankeyExcelParser/sankey_utils/__init__.py +0 -0
- SankeyExcelParser/sankey_utils/data.py +1118 -0
- SankeyExcelParser/sankey_utils/excel_source.py +31 -0
- SankeyExcelParser/sankey_utils/flux.py +344 -0
- SankeyExcelParser/sankey_utils/functions.py +278 -0
- SankeyExcelParser/sankey_utils/node.py +340 -0
- SankeyExcelParser/sankey_utils/protos/__init__.py +0 -0
- SankeyExcelParser/sankey_utils/protos/flux.py +84 -0
- SankeyExcelParser/sankey_utils/protos/node.py +386 -0
- SankeyExcelParser/sankey_utils/protos/sankey_object.py +135 -0
- SankeyExcelParser/sankey_utils/protos/tag_group.py +95 -0
- SankeyExcelParser/sankey_utils/sankey_object.py +165 -0
- SankeyExcelParser/sankey_utils/table_object.py +37 -0
- SankeyExcelParser/sankey_utils/tag.py +95 -0
- SankeyExcelParser/sankey_utils/tag_group.py +206 -0
- SankeyExcelParser/su_trace.py +239 -0
- SankeyExcelParser/tests/integration/__init__.py +0 -0
- SankeyExcelParser/tests/integration/test_base.py +356 -0
- SankeyExcelParser/tests/integration/test_run_check_input.py +100 -0
- SankeyExcelParser/tests/integration/test_run_conversions.py +96 -0
- SankeyExcelParser/tests/integration/test_run_load_input.py +94 -0
- SankeyExcelParser/tests/unit/__init__.py +0 -0
- SankeyExcelParser-1.0.0b0.data/scripts/run_parse_and_write_excel.py +155 -0
- SankeyExcelParser-1.0.0b0.data/scripts/run_parse_excel.py +115 -0
- SankeyExcelParser-1.0.0b0.dist-info/METADATA +113 -0
- SankeyExcelParser-1.0.0b0.dist-info/RECORD +32 -0
- SankeyExcelParser-1.0.0b0.dist-info/WHEEL +5 -0
- SankeyExcelParser-1.0.0b0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1867 @@
|
|
1
|
+
"""
|
2
|
+
This module is dedicated to the conversion from outside format to internal json format.
|
3
|
+
Outside formats may be: a workbook (excel), another json file, a database etc...
|
4
|
+
Structure and specifications of internal json format are defined in this module. Internal
|
5
|
+
json format can take two main forms: one to adress input informations and a second one
|
6
|
+
for output communications.
|
7
|
+
"""
|
8
|
+
|
9
|
+
# External libs -----------------------------------------------------
|
10
|
+
import pandas as pd
|
11
|
+
import numpy as np
|
12
|
+
import re
|
13
|
+
|
14
|
+
# Local libs -------------------------------------------------------
|
15
|
+
import SankeyExcelParser.io_excel_constants as CONST
|
16
|
+
import SankeyExcelParser.su_trace as su_trace
|
17
|
+
|
18
|
+
# External modules -------------------------------------------------
|
19
|
+
from unidecode import unidecode
|
20
|
+
|
21
|
+
# Local modules -----------------------------------------------------
|
22
|
+
from SankeyExcelParser.sankey import Sankey, UserExcelConverter
|
23
|
+
|
24
|
+
# has_xl_wings = True
|
25
|
+
# try:
|
26
|
+
# # import xlwings as xl
|
27
|
+
# import pythoncom
|
28
|
+
# pythoncom.CoInitialize()
|
29
|
+
# except Exception:
|
30
|
+
# has_xl_wings = False
|
31
|
+
|
32
|
+
|
33
|
+
# Private functions ----------------------------------------------------------------
|
34
|
+
def _compareStrings(
|
35
|
+
string_in: str,
|
36
|
+
string_ref: str,
|
37
|
+
strip_input_string=False
|
38
|
+
):
|
39
|
+
"""
|
40
|
+
Uniformize strings for easier comparison.
|
41
|
+
|
42
|
+
Parameters
|
43
|
+
----------
|
44
|
+
:param string_in: String to compare.
|
45
|
+
:type string_in: str
|
46
|
+
|
47
|
+
:param string_ref: Ref string to compare with.
|
48
|
+
:type string_ref: str
|
49
|
+
|
50
|
+
:param strip_input_string: Remove ' ' at start / or end for input string.
|
51
|
+
:type strip_input_string: boolean, optionnal (default=False)
|
52
|
+
|
53
|
+
Returns
|
54
|
+
-------
|
55
|
+
:return: True if strings mean the same thing, False otherwise
|
56
|
+
:rtype: bool
|
57
|
+
"""
|
58
|
+
s1 = string_in.lower()
|
59
|
+
s2 = string_ref.lower()
|
60
|
+
if strip_input_string:
|
61
|
+
s1 = s1.strip()
|
62
|
+
return (re.fullmatch(unidecode(s2), unidecode(s1)) is not None)
|
63
|
+
|
64
|
+
|
65
|
+
def _consistantColName(
|
66
|
+
sheet_name: str,
|
67
|
+
usr_col_name: str,
|
68
|
+
xl_names_converter: UserExcelConverter,
|
69
|
+
tags: list = []
|
70
|
+
):
|
71
|
+
'''
|
72
|
+
Test if the usr_col_name is consistent with the allowed col list.
|
73
|
+
|
74
|
+
Parameters
|
75
|
+
----------
|
76
|
+
:param sheet_name: Sheet name to check.
|
77
|
+
:type sheet_name: string
|
78
|
+
|
79
|
+
:param prop_cols: Column to find
|
80
|
+
:type prop_cols: string
|
81
|
+
|
82
|
+
:param tags: Tags list to check
|
83
|
+
:type tags: list
|
84
|
+
|
85
|
+
Returns
|
86
|
+
-------
|
87
|
+
:return:
|
88
|
+
If the column corresponds to an entry in the sheetname dictionnary, then result is the corresponding key.
|
89
|
+
If the column is a tag column / an additonnal column, the result is the standard format of the column name.
|
90
|
+
:rtype: string
|
91
|
+
'''
|
92
|
+
# Check if Sheet is about data
|
93
|
+
if _compareStrings(sheet_name, 'flux data', strip_input_string=True):
|
94
|
+
xl_names_converter.add_new_col(sheet_name, CONST.DATA_SHEET, usr_col_name)
|
95
|
+
return True, CONST.DATA_SHEET
|
96
|
+
sheet_name_lower = sheet_name.lower()
|
97
|
+
usr_col_name_lower = usr_col_name.lower()
|
98
|
+
if sheet_name_lower != '' and usr_col_name_lower != '':
|
99
|
+
# Is the proposed column a tag column ?
|
100
|
+
for tag in tags:
|
101
|
+
if _compareStrings(usr_col_name_lower, tag, strip_input_string=True):
|
102
|
+
return True, tag
|
103
|
+
# Is the proposed column in allowed columns ?
|
104
|
+
for std_col_name in CONST.DICT_OF_COLS_NAMES__RE[sheet_name_lower].keys():
|
105
|
+
for allowed_col_re in CONST.DICT_OF_COLS_NAMES__RE[sheet_name_lower][std_col_name]:
|
106
|
+
if _compareStrings(usr_col_name_lower, allowed_col_re, strip_input_string=True):
|
107
|
+
xl_names_converter.add_new_col(sheet_name_lower, std_col_name, usr_col_name)
|
108
|
+
return True, std_col_name
|
109
|
+
return False, usr_col_name
|
110
|
+
|
111
|
+
|
112
|
+
def _consistantSheetName(
|
113
|
+
usr_sheet_name: str,
|
114
|
+
xl_names_converter: UserExcelConverter,
|
115
|
+
):
|
116
|
+
'''
|
117
|
+
Test if the usr_sheet_name is consistent with the allowed sheet list.
|
118
|
+
|
119
|
+
Parameters
|
120
|
+
----------
|
121
|
+
:param usr_sheet_name: Sheet name to check.
|
122
|
+
:type usr_sheet_name: string
|
123
|
+
|
124
|
+
Returns
|
125
|
+
-------
|
126
|
+
:return:
|
127
|
+
- out1: True if tested sheet is consistant.
|
128
|
+
- out2: The dictionary key corresponding of the allowed list found, if tested sheet is consitant.
|
129
|
+
List of allowed sheet names if not.
|
130
|
+
:rtype: (bool, string)
|
131
|
+
|
132
|
+
Notes
|
133
|
+
-----
|
134
|
+
- If the usr_sheet_name input is empty ('') the result is a list of
|
135
|
+
allowed sheet name as a string.
|
136
|
+
- A particular case is taken into account for proxy input file which
|
137
|
+
usualy has 3 proxy sheets (and one of them with 'sector' keyword in its name)
|
138
|
+
'''
|
139
|
+
# Check if Sheet is about data
|
140
|
+
if _compareStrings(usr_sheet_name, 'flux data', strip_input_string=True):
|
141
|
+
xl_names_converter.add_new_sheet(CONST.DATA_SHEET, usr_sheet_name)
|
142
|
+
return True, CONST.DATA_SHEET
|
143
|
+
# If we have a sheet to check
|
144
|
+
if usr_sheet_name != '':
|
145
|
+
# Is sheet in list of possible names for sheets
|
146
|
+
for std_sheet_name in CONST.DICT_OF_SHEET_NAMES__RE.keys():
|
147
|
+
for allow_sheet_re in CONST.DICT_OF_SHEET_NAMES__RE[std_sheet_name]:
|
148
|
+
if _compareStrings(usr_sheet_name, allow_sheet_re, strip_input_string=True):
|
149
|
+
xl_names_converter.add_new_sheet(std_sheet_name, usr_sheet_name)
|
150
|
+
return True, std_sheet_name
|
151
|
+
# We didn't found the corresponding key
|
152
|
+
return False, _allowedSheetNames()
|
153
|
+
|
154
|
+
|
155
|
+
def _allowedSheetNames(sheets_to_show=[]):
|
156
|
+
'''
|
157
|
+
Return the table of allowed sheet names with respect to their type of informations.
|
158
|
+
|
159
|
+
Parameters
|
160
|
+
----------
|
161
|
+
:param sheets_to_show: list of sheet to print. If list empty, print all.
|
162
|
+
:type sheets_to_show: list, optional, default=[]
|
163
|
+
|
164
|
+
Returns
|
165
|
+
-------
|
166
|
+
:return:
|
167
|
+
Result is empty string if the tested col is not consistant.
|
168
|
+
Result is the dictionary key corresponding of the allowed list found.
|
169
|
+
:rtype: string
|
170
|
+
'''
|
171
|
+
wcol1 = 30
|
172
|
+
wcol2 = 70
|
173
|
+
# Create table header
|
174
|
+
list_allowed = '{0: <{w1}} | {1: <{w2}}\n'.format("Sheet type", "Possible sheet names", w1=wcol1, w2=wcol2)
|
175
|
+
list_allowed += '-'*(wcol1 + wcol2 + 3) + '\n'
|
176
|
+
# Keys to show = table first column
|
177
|
+
if len(sheets_to_show) > 0:
|
178
|
+
list_dict_keys = [_ for _ in sheets_to_show if _ in CONST.DICT_OF_SHEET_NAMES.keys()]
|
179
|
+
else:
|
180
|
+
list_dict_keys = CONST.DICT_OF_SHEET_NAMES.keys()
|
181
|
+
# Create table
|
182
|
+
for dict_key in list_dict_keys:
|
183
|
+
list_allowed += '{: <{w}} | '.format(dict_key, w=wcol1)
|
184
|
+
if len(CONST.DICT_OF_SHEET_NAMES[dict_key]) != 0:
|
185
|
+
list_allowed += ', '.join(set(CONST.DICT_OF_SHEET_NAMES[dict_key]))
|
186
|
+
list_allowed += '\n'
|
187
|
+
return list_allowed
|
188
|
+
|
189
|
+
|
190
|
+
def _checkNeededColumns(
|
191
|
+
columns: list,
|
192
|
+
columns_needed: list,
|
193
|
+
sheet_name: str,
|
194
|
+
columns_needed_onlyone: list = []
|
195
|
+
):
|
196
|
+
"""_summary_
|
197
|
+
|
198
|
+
Parameters
|
199
|
+
----------
|
200
|
+
:param columns: Current list of columns
|
201
|
+
:type columns: list
|
202
|
+
|
203
|
+
:param columns_needed: List of columns to have
|
204
|
+
:type columns_needed: list
|
205
|
+
|
206
|
+
:param sheet_name: Sheet name from which to check names
|
207
|
+
:type sheet_name: str
|
208
|
+
|
209
|
+
:param columns_needed_onlyone: List of columns in which at least only one is needed
|
210
|
+
:type columns_needed_onlyone: list
|
211
|
+
|
212
|
+
Returns
|
213
|
+
-------
|
214
|
+
:return: (Success?, Log message)
|
215
|
+
:rtype: (bool, str)
|
216
|
+
"""
|
217
|
+
# Check columns need
|
218
|
+
for column_needed in columns_needed:
|
219
|
+
if not (column_needed in columns):
|
220
|
+
err_msg = 'The \"{}\" column is missing '.format(column_needed)
|
221
|
+
err_msg += 'or does not have the correct name.\n'
|
222
|
+
err_msg += '\n'
|
223
|
+
err_msg += '{}\n'.format(CONST.DICT_OF_COMMENTS[sheet_name][column_needed][0])
|
224
|
+
err_msg += '\n'
|
225
|
+
std_column_names = set(CONST.DICT_OF_COLS_NAMES[sheet_name][column_needed])
|
226
|
+
err_msg += 'Acceptable names for this column : {}'.format(
|
227
|
+
', '.join(['\"{}\"'.format(_) for _ in std_column_names]))
|
228
|
+
return False, err_msg
|
229
|
+
# Check optionnal columns (need_only one)
|
230
|
+
if len(columns_needed_onlyone) > 0:
|
231
|
+
if (not any(np.in1d(columns_needed_onlyone, columns))):
|
232
|
+
err_msg = 'A mandatory column is missing or does not have the correct name.\n'
|
233
|
+
err_msg += 'A least one of these columns must be present : {}'.format(
|
234
|
+
', '.join(['\"{}\"'.format(_) for _ in columns_needed_onlyone]))
|
235
|
+
return False, err_msg
|
236
|
+
return True, ''
|
237
|
+
|
238
|
+
|
239
|
+
def _castColumnType(
|
240
|
+
sheet: pd.DataFrame,
|
241
|
+
columns_types,
|
242
|
+
empty_to_default_value=False
|
243
|
+
):
|
244
|
+
"""
|
245
|
+
Set specific columns values to str.
|
246
|
+
|
247
|
+
Parameters
|
248
|
+
----------
|
249
|
+
:param sheet: Sheet to modify.
|
250
|
+
:type sheet: pandas.DataFrame, modified
|
251
|
+
|
252
|
+
:param columns_types: Dict of column and their default types/values OR any default value.
|
253
|
+
:type columns_types: any
|
254
|
+
|
255
|
+
:param empty_to_default_value: If true, set empty cells with default value, if not, set as None.
|
256
|
+
:type: bool
|
257
|
+
|
258
|
+
Returns
|
259
|
+
-------
|
260
|
+
:return: (Success? ; Log message)
|
261
|
+
:rtype: (bool, str)
|
262
|
+
"""
|
263
|
+
# Filter column to convert / Columns that are in sheet
|
264
|
+
if type(columns_types) is dict:
|
265
|
+
cols_to_convert = \
|
266
|
+
[(_, columns_types[_]) for _ in columns_types.keys() if _ in sheet.columns]
|
267
|
+
else:
|
268
|
+
cols_to_convert = \
|
269
|
+
[(_, columns_types) for _ in sheet.columns]
|
270
|
+
# Convert
|
271
|
+
for (col, _) in cols_to_convert:
|
272
|
+
try:
|
273
|
+
# Special type
|
274
|
+
if type(_) is dict:
|
275
|
+
val = _['val']
|
276
|
+
else:
|
277
|
+
val = _
|
278
|
+
# Convert as string
|
279
|
+
if type(val) is str:
|
280
|
+
sheet[col] = sheet[col].replace({np.nan: 'None'})
|
281
|
+
sheet[col] = sheet[col].astype(str)
|
282
|
+
if empty_to_default_value:
|
283
|
+
sheet[col] = sheet[col].replace({'None': val})
|
284
|
+
else:
|
285
|
+
sheet[col] = sheet[col].replace({'None': None})
|
286
|
+
# Convert as float
|
287
|
+
elif type(val) is float:
|
288
|
+
sheet[col] = sheet[col].astype(float)
|
289
|
+
if empty_to_default_value:
|
290
|
+
sheet[col] = sheet[col].replace({np.nan: val})
|
291
|
+
else:
|
292
|
+
sheet[col] = sheet[col].replace({np.nan: None})
|
293
|
+
# Convert as int
|
294
|
+
elif type(val) is int:
|
295
|
+
sheet[col] = sheet[col].replace({np.nan: -702313053})
|
296
|
+
sheet[col] = sheet[col].astype(int)
|
297
|
+
if empty_to_default_value:
|
298
|
+
sheet[col] = sheet[col].replace({-702313053: val})
|
299
|
+
else:
|
300
|
+
sheet[col] = sheet[col].replace({-702313053: None})
|
301
|
+
# Convert to other types
|
302
|
+
else:
|
303
|
+
sheet[col] = sheet[col].astype(type(val))
|
304
|
+
except Exception:
|
305
|
+
err = 'Column \"{}\" contains values '.format(col)
|
306
|
+
err += 'that could not be read as {} values'.format(type(val))
|
307
|
+
return False, err
|
308
|
+
# Replace remaining empty data with None
|
309
|
+
sheet.replace({np.nan: None}, inplace=True)
|
310
|
+
return True, ''
|
311
|
+
|
312
|
+
|
313
|
+
def _pd_sorted_col(
|
314
|
+
dft: pd.DataFrame,
|
315
|
+
lico: list
|
316
|
+
):
|
317
|
+
"""
|
318
|
+
Sort columns order of a dataframe in function of a column list
|
319
|
+
|
320
|
+
Parameters
|
321
|
+
----------
|
322
|
+
:param dft: Input dataframe to sort.
|
323
|
+
:type dft: pandas.DataFrame
|
324
|
+
|
325
|
+
:param lico: Ordered list of columns to have.
|
326
|
+
:type lico: list
|
327
|
+
|
328
|
+
Returns
|
329
|
+
-------
|
330
|
+
:return: Sorted dataframe.
|
331
|
+
:rtype: (bool; string)
|
332
|
+
|
333
|
+
"""
|
334
|
+
li_df = list(dft)
|
335
|
+
if li_df != lico:
|
336
|
+
dftm = pd.DataFrame(columns=lico)
|
337
|
+
for col in lico:
|
338
|
+
dftm[col] = dft[col]
|
339
|
+
return dftm
|
340
|
+
|
341
|
+
|
342
|
+
def _extractTablesFromSheet(
|
343
|
+
sheet: pd.DataFrame,
|
344
|
+
new_sheets: list,
|
345
|
+
default_columns_names=None
|
346
|
+
):
|
347
|
+
"""
|
348
|
+
Extract all tables from an excel sheet.
|
349
|
+
|
350
|
+
Ex: Extract tables from a sheet like this
|
351
|
+
|
352
|
+
+----+----+----+----+----+
|
353
|
+
| - | - | - | - | - |
|
354
|
+
+----+----+----+----+----+
|
355
|
+
| - | - | C1 | C2 | C3 |
|
356
|
+
+----+----+----+----+----+
|
357
|
+
| - | R1 | x | - | x |
|
358
|
+
+----+----+----+----+----+
|
359
|
+
| - | R2 | x | x | - |
|
360
|
+
+----+----+----+----+----+
|
361
|
+
| - | - | - | - | - |
|
362
|
+
+----+----+----+----+----+
|
363
|
+
| - | - | C4 | C5 | C6 |
|
364
|
+
+----+----+----+----+----+
|
365
|
+
| - | R3 | - | x | x |
|
366
|
+
+----+----+----+----+----+
|
367
|
+
| - | R4 | x | - | - |
|
368
|
+
+----+----+----+----+----+
|
369
|
+
|
370
|
+
Or like this
|
371
|
+
|
372
|
+
+----+----+----+----+----+
|
373
|
+
| - | - | - | - | - |
|
374
|
+
+----+----+----+----+----+
|
375
|
+
| - | - | C1 | C2 | C3 |
|
376
|
+
+----+----+----+----+----+
|
377
|
+
| - | R1 | x | - | x |
|
378
|
+
+----+----+----+----+----+
|
379
|
+
| - | R2 | x | x | - |
|
380
|
+
+----+----+----+----+----+
|
381
|
+
| - | - | - | - | - |
|
382
|
+
+----+----+----+----+----+
|
383
|
+
| - | R3 | - | x | x |
|
384
|
+
+----+----+----+----+----+
|
385
|
+
| - | R4 | x | - | - |
|
386
|
+
+----+----+----+----+----+
|
387
|
+
|
388
|
+
Parameters
|
389
|
+
----------
|
390
|
+
:param sheet: Sheet to parse
|
391
|
+
:type sheet: pd.DataFrame
|
392
|
+
|
393
|
+
:param new_sheets: List of sheets extracted from sheet
|
394
|
+
:type new_sheets: list(pd.DataFrame), modified
|
395
|
+
|
396
|
+
Returns
|
397
|
+
-------
|
398
|
+
:return: _description_
|
399
|
+
:rtype: _type_
|
400
|
+
"""
|
401
|
+
# Nothing to do
|
402
|
+
if sheet.empty:
|
403
|
+
return True
|
404
|
+
# If we dont have any default column name -> read column index
|
405
|
+
# -> Useful if first row is composed of name of node
|
406
|
+
# -> Need to get rid of Unamed cols
|
407
|
+
# -> Then if nodes are mentionned in more than one column, panda add a '.x' (x a number)
|
408
|
+
# at the end of the node name, so we need to get rid of that too...
|
409
|
+
if default_columns_names is None:
|
410
|
+
default_columns_names = []
|
411
|
+
for _ in sheet.columns:
|
412
|
+
if isinstance(_, str):
|
413
|
+
if (re.fullmatch('Unnamed:.*', _) is None):
|
414
|
+
end_ = re.search('([.][0-9]+)\Z', _) # noqa: W605
|
415
|
+
if end_ is not None:
|
416
|
+
default_columns_names.append(_[:-len(end_[0])])
|
417
|
+
else:
|
418
|
+
default_columns_names.append(_)
|
419
|
+
# Need to reindex sheet to use enumerated correctly index and columns
|
420
|
+
sheet = sheet.reset_index(drop=True)
|
421
|
+
sheet = sheet.T.reset_index(drop=True).T
|
422
|
+
# ----------------- Initialize starting and ending points
|
423
|
+
start_row = 0
|
424
|
+
start_col = 0
|
425
|
+
index_col = 0 # Column number for index names
|
426
|
+
end_row = sheet.shape[0]
|
427
|
+
end_col = sheet.shape[1]
|
428
|
+
# ---------------- Find starting point
|
429
|
+
found_starting_point = False
|
430
|
+
for row in range(sheet.shape[0]):
|
431
|
+
for col in range(sheet.shape[1]):
|
432
|
+
# Check if current val is NaN (empty cell)
|
433
|
+
val = sheet.iat[row, col]
|
434
|
+
is_nan = (val != val)
|
435
|
+
# If not -> Bingo
|
436
|
+
found_starting_point = (not is_nan)
|
437
|
+
if found_starting_point:
|
438
|
+
start_row = row
|
439
|
+
start_col = col
|
440
|
+
index_col = col
|
441
|
+
break
|
442
|
+
if found_starting_point:
|
443
|
+
break
|
444
|
+
# ------------ Check table format with upper left corner
|
445
|
+
upper_left_corner = sheet.iloc[start_row:min(start_row+2, end_row), start_col:min(start_col+2, end_col)]
|
446
|
+
# Not enought data in given sheet -> stop ?
|
447
|
+
if (upper_left_corner.shape[0] < 2):
|
448
|
+
# Modify starting row to avoid missing table with only one line
|
449
|
+
start_row = max(0, start_row-1)
|
450
|
+
upper_left_corner = sheet.iloc[start_row:min(start_row+2, end_row), start_col:min(start_col+2, end_col)]
|
451
|
+
if (upper_left_corner.shape[1] < 2):
|
452
|
+
# Modify starting col to avoid missing table with only one col
|
453
|
+
start_col = max(0, start_col-1)
|
454
|
+
index_col = start_col
|
455
|
+
upper_left_corner = sheet.iloc[start_row:min(start_row+2, end_row), start_col:min(start_col+2, end_col)]
|
456
|
+
if (upper_left_corner.shape[0] < 2) or (upper_left_corner.shape[1] < 2):
|
457
|
+
# Ok table does not contain any data
|
458
|
+
return True
|
459
|
+
# Upper left corner is an isolated value ?
|
460
|
+
v1 = upper_left_corner.iloc[0, 1]
|
461
|
+
v2 = upper_left_corner.iloc[1, 0]
|
462
|
+
if (v1 != v1) and (v2 != v2):
|
463
|
+
# Retry but without the isolated value
|
464
|
+
sheet_copy = sheet.copy() # copy to be sure that we dont modify original sheet
|
465
|
+
sheet_copy.iloc[start_row, start_col] = np.nan
|
466
|
+
return _extractTablesFromSheet(sheet_copy, new_sheets, default_columns_names=default_columns_names)
|
467
|
+
# First column is an overhead ?
|
468
|
+
if (not _isValueAcceptedInMatrixTable(upper_left_corner.iloc[1, 1])):
|
469
|
+
# Retry but without the isolated value
|
470
|
+
sheet_copy = sheet.copy() # copy to be sure that we dont modify original sheet
|
471
|
+
sheet_copy.iloc[start_row, start_col:end_col] = np.nan
|
472
|
+
return _extractTablesFromSheet(sheet_copy, new_sheets, default_columns_names=default_columns_names)
|
473
|
+
# Check if the content of first row = column names
|
474
|
+
columns_names = None
|
475
|
+
# Check what upper left corner of table contains
|
476
|
+
# In all case : 'val' can be 'x', 'X' or some stringified float value.
|
477
|
+
# Case 1 : upper left corner = ['R1', 'val' / NaN]
|
478
|
+
# ... ['R2', 'val' / NaN]
|
479
|
+
# ... -> 'val' and NaN can be turned as float.
|
480
|
+
# Case 2 : upper left corner = ['C1', 'C2']
|
481
|
+
# ... ['val' / Nan, 'val' / NaN]
|
482
|
+
# ... -> On first row, can not turn columns names as float
|
483
|
+
# ... -> On first col, 'val' and NaN can be turned as float
|
484
|
+
# Case 3 : upper left corner = ['table name', 'C1' ]
|
485
|
+
# ... ['R1' , 'val' / NaN]
|
486
|
+
# ... -> On first row, can not turn table name or columns names as float
|
487
|
+
# ... -> On first col, No row name can be turned as float
|
488
|
+
if _isValueAcceptedInMatrixTable(upper_left_corner.iloc[0, 1]):
|
489
|
+
case = 1
|
490
|
+
else:
|
491
|
+
if _isValueAcceptedInMatrixTable(upper_left_corner.iloc[1, 0]):
|
492
|
+
case = 2
|
493
|
+
else:
|
494
|
+
case = 3
|
495
|
+
# Check in which case we are
|
496
|
+
if (case == 1):
|
497
|
+
# Case 1 -> need to use defaut columns names
|
498
|
+
columns_names = default_columns_names
|
499
|
+
# Start col is one col on the right, because first col is index names
|
500
|
+
start_col = min(start_col+1, end_col)
|
501
|
+
# Ending col is easy to find
|
502
|
+
end_col = min(start_col + len(columns_names), end_col)
|
503
|
+
if (case == 2):
|
504
|
+
# Case 2 -> There are columns name on the first row
|
505
|
+
columns_names = sheet.astype('str').iloc[start_row, start_col:].to_list()
|
506
|
+
# start row is one row below & index col is one col before
|
507
|
+
start_row = min(start_row+1, end_row)
|
508
|
+
index_col = max(0, index_col-1)
|
509
|
+
if (case == 3):
|
510
|
+
# Case 3 -> There are columns name on the first row, but starting one col on the right
|
511
|
+
columns_names = sheet.astype('str').iloc[start_row, (start_col+1):].to_list()
|
512
|
+
# start row is one row below & index col does not change, and start col is one col on the right
|
513
|
+
start_row = min(start_row+1, end_row)
|
514
|
+
start_col = min(start_col+1, end_col)
|
515
|
+
if (case == 2) or (case == 3):
|
516
|
+
# Case 2 & 3 : Find ending col
|
517
|
+
for [i, col_name] in enumerate(columns_names):
|
518
|
+
# Check if current col name is NaN (empty cell)
|
519
|
+
is_nan = (col_name != col_name)
|
520
|
+
# If nan -> Bingo
|
521
|
+
if is_nan:
|
522
|
+
end_col = min(start_col + i, end_col)
|
523
|
+
columns_names = columns_names[:i]
|
524
|
+
break
|
525
|
+
# No default column name was provided -> Error
|
526
|
+
if columns_names is None:
|
527
|
+
return False
|
528
|
+
# ------------ Check what first col contains
|
529
|
+
index_names = sheet.iloc[start_row:end_row, index_col].to_list()
|
530
|
+
# ------------- Find ending row
|
531
|
+
for (i, index_name) in enumerate(index_names):
|
532
|
+
# Check if current val is NaN (empty cell)
|
533
|
+
is_nan = (index_name != index_name)
|
534
|
+
# If nan -> Bingo
|
535
|
+
if is_nan:
|
536
|
+
end_row = min(i + start_row, end_row)
|
537
|
+
index_names = index_names[:i]
|
538
|
+
break
|
539
|
+
# New table
|
540
|
+
new_table = sheet.iloc[start_row:end_row, start_col:end_col]
|
541
|
+
if len(new_table.columns) != len(columns_names):
|
542
|
+
su_trace.logger.error('Could not read ter table')
|
543
|
+
return False
|
544
|
+
new_table.columns = [_.strip() if (type(_) is str) else _ for _ in columns_names]
|
545
|
+
new_table.index = [_.strip() if (type(_) is str) else _ for _ in index_names]
|
546
|
+
new_sheets.append(new_table)
|
547
|
+
# Find other table if needed
|
548
|
+
ok = True
|
549
|
+
ok &= _extractTablesFromSheet(
|
550
|
+
sheet.iloc[:, end_col:], new_sheets,
|
551
|
+
default_columns_names=columns_names) # Upper right missing part of sheet
|
552
|
+
ok &= _extractTablesFromSheet(
|
553
|
+
sheet.iloc[end_row:, :], new_sheets,
|
554
|
+
default_columns_names=columns_names) # Down missing part of sheet
|
555
|
+
# TODO revoir découpage des restes de table en recurrence
|
556
|
+
return ok
|
557
|
+
|
558
|
+
|
559
|
+
def _isValueAcceptedInMatrixTable(value):
|
560
|
+
"""
|
561
|
+
In Matrix table, accepted values are NaN, Numbers and 'x' or 'X'
|
562
|
+
|
563
|
+
Parameters
|
564
|
+
----------
|
565
|
+
:param value: Value to test
|
566
|
+
:type value: Any
|
567
|
+
|
568
|
+
Returns
|
569
|
+
-------
|
570
|
+
:return: True if value is Ok, else false
|
571
|
+
:rtype: boolean
|
572
|
+
"""
|
573
|
+
# First check if value is a number or NaN
|
574
|
+
# by try to convert it to float
|
575
|
+
try:
|
576
|
+
float(value)
|
577
|
+
return True
|
578
|
+
except ValueError:
|
579
|
+
# If it fails, then it's not NaN or a number
|
580
|
+
# but it can be either 'x' or 'X'
|
581
|
+
OK_but_not_a_number = '[xX]'
|
582
|
+
try:
|
583
|
+
if (re.fullmatch(OK_but_not_a_number, str(value)) is not None):
|
584
|
+
return True
|
585
|
+
except ValueError:
|
586
|
+
pass
|
587
|
+
return False
|
588
|
+
|
589
|
+
|
590
|
+
def _hasDuplicatedEntry(entries: list):
|
591
|
+
"""
|
592
|
+
"""
|
593
|
+
duplicates = {}
|
594
|
+
for (i, entry) in enumerate(entries):
|
595
|
+
if entries.count(entry) > 1:
|
596
|
+
if entry not in duplicates.keys():
|
597
|
+
duplicates[entry] = []
|
598
|
+
duplicates[entry].append(i)
|
599
|
+
# duplicates = [entry for entry in entries if entries.count(entry) > 1]
|
600
|
+
return (len(duplicates) > 0), duplicates
|
601
|
+
|
602
|
+
|
603
|
+
def _fuseDuplicatedColumns(table: pd.DataFrame, dup_cols: dict):
|
604
|
+
# Get current columns names
|
605
|
+
new_columns_names = table.columns.to_list()
|
606
|
+
new_tables = {}
|
607
|
+
# For each duplicated column, get the column name and positions of duplicat
|
608
|
+
for (col_name, cols_index) in dup_cols.items():
|
609
|
+
# Fuse columns
|
610
|
+
new_tables[col_name] = table.loc[:, col_name].apply(lambda row: row.values[0], axis=1)
|
611
|
+
# Rename duplicated columns, except the first one
|
612
|
+
for (i, col_index) in enumerate(cols_index):
|
613
|
+
if i == 0:
|
614
|
+
continue
|
615
|
+
new_columns_names[col_index] = col_name+'_dup'
|
616
|
+
# Set new columns names
|
617
|
+
table.columns = new_columns_names
|
618
|
+
# Drop and replace
|
619
|
+
for (col_name, sub_table) in new_tables.items():
|
620
|
+
# Drop the renamed columns (except the first one)
|
621
|
+
table.drop(columns=(col_name+'_dup'), inplace=True)
|
622
|
+
# Apply the fused data on the remaining column
|
623
|
+
table[col_name] = sub_table
|
624
|
+
|
625
|
+
|
626
|
+
def _fuseDuplicatedRows(table: pd.DataFrame, dup_rows: dict):
|
627
|
+
# Get current columns names
|
628
|
+
new_index_names = table.index.to_list()
|
629
|
+
new_tables = {}
|
630
|
+
# For each duplicated column, get the column name and positions of duplicat
|
631
|
+
for (row_name, rows_index) in dup_rows.items():
|
632
|
+
# Fuse columns
|
633
|
+
new_tables[row_name] = table.loc[row_name, :].apply(lambda col: col.values[0], axis=0)
|
634
|
+
# Rename duplicated columns, except the first one
|
635
|
+
for (i, row_index) in enumerate(rows_index):
|
636
|
+
if i == 0:
|
637
|
+
continue
|
638
|
+
new_index_names[row_index] = row_name+'_dup'
|
639
|
+
# Set new index names
|
640
|
+
table.index = new_index_names
|
641
|
+
# Drop and replace
|
642
|
+
for (row_name, sub_table) in new_tables.items():
|
643
|
+
# Drop the renamed columns (except the first one)
|
644
|
+
table.drop(index=(row_name+'_dup'), inplace=True)
|
645
|
+
# Apply the fused data on the remaining column
|
646
|
+
table.loc[row_name, :] = sub_table
|
647
|
+
|
648
|
+
|
649
|
+
# PUBLIC FUNCTIONS ----------------------------------------------------------------
|
650
|
+
def consistantSheetName(
|
651
|
+
usr_sheet_name: str,
|
652
|
+
sankey: Sankey
|
653
|
+
):
|
654
|
+
'''
|
655
|
+
Test if the usr_sheet_name is consistent with the allowed sheet list.
|
656
|
+
|
657
|
+
Parameters
|
658
|
+
----------
|
659
|
+
usr_sheet_name : string
|
660
|
+
Sheet name to check.
|
661
|
+
|
662
|
+
Returns
|
663
|
+
-------
|
664
|
+
string
|
665
|
+
Result is empty string if the tested sheet is not consistant.
|
666
|
+
Result is the dictionary key corresponding of the allowed list found.
|
667
|
+
|
668
|
+
Notes
|
669
|
+
-----
|
670
|
+
- If the usr_sheet_name input is empty ('') the result is a list of
|
671
|
+
allowed sheet name as a string.
|
672
|
+
- A particular case is taken into account for proxy input file which
|
673
|
+
usualy has 3 proxy sheets (and one of them with 'sector' keyword in its name)
|
674
|
+
'''
|
675
|
+
_, res = _consistantSheetName(usr_sheet_name, sankey.xl_user_converter)
|
676
|
+
return res
|
677
|
+
|
678
|
+
|
679
|
+
def consistantColName(
|
680
|
+
sheet_name: str,
|
681
|
+
prop_col: str,
|
682
|
+
sankey: Sankey,
|
683
|
+
tags: list = []
|
684
|
+
):
|
685
|
+
'''
|
686
|
+
Test if the prop_col is consistent with the allowed col list.
|
687
|
+
|
688
|
+
Parameters
|
689
|
+
----------
|
690
|
+
:param sheet_name: Sheet name to check.
|
691
|
+
:type sheet_name: string
|
692
|
+
|
693
|
+
:param prop_cols: Column to find
|
694
|
+
:type prop_cols: string
|
695
|
+
|
696
|
+
:param tags: Tags list to check
|
697
|
+
:type tags: list
|
698
|
+
|
699
|
+
Returns
|
700
|
+
-------
|
701
|
+
:return:
|
702
|
+
If the column corresponds to an entry in the sheetname dictionnary, then result is the corresponding key.
|
703
|
+
If the column is a tag column / an additonnal column, the result is the standard format of the column name.
|
704
|
+
:rtype: string
|
705
|
+
'''
|
706
|
+
_, res = _consistantColName(
|
707
|
+
sheet_name,
|
708
|
+
prop_col,
|
709
|
+
sankey.xl_user_converter,
|
710
|
+
tags)
|
711
|
+
return res
|
712
|
+
|
713
|
+
|
714
|
+
def load_sankey_from_excel_file(
|
715
|
+
input_file: str,
|
716
|
+
sankey: Sankey,
|
717
|
+
do_coherence_checks: bool = False,
|
718
|
+
sheet_to_remove_names: list = None,
|
719
|
+
):
|
720
|
+
'''
|
721
|
+
Main convertor routine. Call dedicated routine depending on input type
|
722
|
+
Use global variable 'su_trace' to trace the file processing
|
723
|
+
|
724
|
+
Parameters
|
725
|
+
----------
|
726
|
+
:param input_file: input file name to load (with extension and path)
|
727
|
+
:type input_file: string
|
728
|
+
|
729
|
+
:param sankey: data struct as a Sankey object
|
730
|
+
:type sankey: Sankey, modified
|
731
|
+
|
732
|
+
:param do_coherence_checks: Do we trigger coherence checks on sankey structure ?
|
733
|
+
:type do_coherence_checks: bool
|
734
|
+
|
735
|
+
:param sheet_to_remove_names: List of sheet that will be rewrite or removed when re-export as excel
|
736
|
+
:type sheet_to_remove_names: list, modified, optionnal (default=None)
|
737
|
+
|
738
|
+
Returns
|
739
|
+
-------
|
740
|
+
:return: (Success ; Error message )
|
741
|
+
:rtype: (bool; string)
|
742
|
+
'''
|
743
|
+
# Read excel input
|
744
|
+
excel_file = pd.ExcelFile(input_file)
|
745
|
+
# If every went fine, get sheet name
|
746
|
+
excel_sheet_names = excel_file.sheet_names
|
747
|
+
# keeping sheets_to_show consistent sheets
|
748
|
+
necessary_sheet_names = {}
|
749
|
+
unconsistant_sheet_names = []
|
750
|
+
use_sheet_to_remove_names = True
|
751
|
+
if type(sheet_to_remove_names) is not list:
|
752
|
+
use_sheet_to_remove_names = False
|
753
|
+
for sheet_name in excel_sheet_names:
|
754
|
+
# Get sheet reference name for given sheet name
|
755
|
+
is_sheet_consistant, sheet_refkey = _consistantSheetName(sheet_name, sankey.xl_user_converter)
|
756
|
+
if is_sheet_consistant: # Got the reference name
|
757
|
+
if sheet_refkey not in necessary_sheet_names:
|
758
|
+
necessary_sheet_names[sheet_refkey] = [sheet_name]
|
759
|
+
else:
|
760
|
+
necessary_sheet_names[sheet_refkey].append(sheet_name)
|
761
|
+
else: # No reference name Found
|
762
|
+
unconsistant_sheet_names.append(sheet_name)
|
763
|
+
# Check if we got some sheets to process
|
764
|
+
if len(necessary_sheet_names.keys()) == 0:
|
765
|
+
err_msg = "We didn't find any sheet name as specified in the following table : \n"
|
766
|
+
err_msg += _allowedSheetNames()
|
767
|
+
return False, err_msg
|
768
|
+
# Debug log
|
769
|
+
su_trace.logger.debug('Names of excel sheets that will be processed : ')
|
770
|
+
[su_trace.logger.debug('- {}'.format(_)) for _ in necessary_sheet_names.values()]
|
771
|
+
if len(unconsistant_sheet_names) > 0:
|
772
|
+
su_trace.logger.debug('Names of excel sheets that will be ignored : ')
|
773
|
+
[su_trace.logger.debug('- {}'.format(_)) for _ in unconsistant_sheet_names]
|
774
|
+
if use_sheet_to_remove_names:
|
775
|
+
if len(sheet_to_remove_names) > 0:
|
776
|
+
su_trace.logger.debug('Names of excel sheets that will be removed : ')
|
777
|
+
[su_trace.logger.debug('- {}'.format(_)) for _ in sheet_to_remove_names]
|
778
|
+
# Update struct
|
779
|
+
return _read_sankey_from_excel_book(
|
780
|
+
input_file,
|
781
|
+
necessary_sheet_names,
|
782
|
+
sankey,
|
783
|
+
do_coherence_checks=do_coherence_checks)
|
784
|
+
|
785
|
+
|
786
|
+
def _read_sankey_from_excel_book(
|
787
|
+
excel_file_name: str,
|
788
|
+
sheet_names: dict,
|
789
|
+
sankey: Sankey,
|
790
|
+
do_coherence_checks: bool = False
|
791
|
+
):
|
792
|
+
"""
|
793
|
+
Parse all sheets from excel book to create a sankey struct.
|
794
|
+
|
795
|
+
Parameters
|
796
|
+
----------
|
797
|
+
:param excel_book: Dataframe (eqv dict) corresponding to the sheets of the input excel file
|
798
|
+
:type excel_book: pd.DataFrame
|
799
|
+
|
800
|
+
:param sheet_names: input file worksheet dict as [reference sheet name: user sheet name]
|
801
|
+
:type sheet_names: dict
|
802
|
+
|
803
|
+
:param sankey: Sankey struct constructed from input
|
804
|
+
:type sankey: Sankey, modified
|
805
|
+
|
806
|
+
:param do_coherence_checks: Do we trigger coherence checks on sankey structure ?
|
807
|
+
:type do_coherence_checks: bool
|
808
|
+
|
809
|
+
Returns
|
810
|
+
-------
|
811
|
+
:return: (Success ; Error message )
|
812
|
+
:rtype: (bool; string)
|
813
|
+
"""
|
814
|
+
# TODO : useless but I keep it for now
|
815
|
+
mfa_dict = {}
|
816
|
+
# Verify that we have the minimum number of sheets
|
817
|
+
ok, msg = check_sheets_before_reading(sheet_names)
|
818
|
+
if not ok:
|
819
|
+
return ok, msg
|
820
|
+
# First create standardized node type tags if needed
|
821
|
+
for _ in (CONST.PRODUCTS_SHEET, CONST.SECTORS_SHEET, CONST.EXCHANGES_SHEET):
|
822
|
+
if _ in sheet_names.keys():
|
823
|
+
sankey.get_or_create_tagg(
|
824
|
+
CONST.NODE_TYPE,
|
825
|
+
CONST.TAG_TYPE_NODE,
|
826
|
+
':'.join([
|
827
|
+
CONST.NODE_TYPE_PRODUCT,
|
828
|
+
CONST.NODE_TYPE_SECTOR,
|
829
|
+
CONST.NODE_TYPE_EXCHANGE]))
|
830
|
+
break
|
831
|
+
# Then check all other TAGS
|
832
|
+
if CONST.TAG_SHEET in sheet_names.keys():
|
833
|
+
# Read tags
|
834
|
+
for tag_sheet_name in sheet_names[CONST.TAG_SHEET]:
|
835
|
+
su_trace.logger.info('Reading sheet {}'.format(tag_sheet_name))
|
836
|
+
ok, msg = xl_read_tags_sheet(pd.read_excel(excel_file_name, tag_sheet_name), sankey)
|
837
|
+
if not ok:
|
838
|
+
return ok, "Error on sheet {0} ({1}) : {2}".format(tag_sheet_name, CONST.TAG_SHEET, msg)
|
839
|
+
# Log warning messages
|
840
|
+
if len(msg) > 0:
|
841
|
+
su_trace.logger.error('Warning(s) on sheet {0} ({1}) :'.format(tag_sheet_name, CONST.TAG_SHEET))
|
842
|
+
for _ in msg.split('\n'):
|
843
|
+
if len(_) > 0:
|
844
|
+
su_trace.logger.error(' - {}'.format(_))
|
845
|
+
# Then check nodes, but in this order
|
846
|
+
options = {}
|
847
|
+
options['warn_on_new_nodes'] = False
|
848
|
+
options['warn_on_new_flux'] = False
|
849
|
+
prev_mfa_entry_name = []
|
850
|
+
sheets_processing_order = [
|
851
|
+
(CONST.NODES_SHEET, xl_read_nodes_sheet, [CONST.NODES_SHEET, options, sankey]),
|
852
|
+
(CONST.PRODUCTS_SHEET, xl_read_products_sectors_sheet, [CONST.PRODUCTS_SHEET, options, sankey]),
|
853
|
+
(CONST.SECTORS_SHEET, xl_read_products_sectors_sheet, [CONST.SECTORS_SHEET, options, sankey]),
|
854
|
+
(CONST.EXCHANGES_SHEET, xl_read_products_sectors_sheet, [CONST.EXCHANGES_SHEET, options, sankey]),
|
855
|
+
(CONST.IO_SHEET, xl_read_input_output_sheet, [options, mfa_dict, sankey]),
|
856
|
+
(CONST.TER_SHEET, xl_read_terbase_sheet, [options, mfa_dict, sankey]),
|
857
|
+
(CONST.DATA_SHEET, xl_read_data_sheet, [options, sankey]),
|
858
|
+
(CONST.IO_DATA_SHEET, xl_read_input_output_data_sheet, [options, mfa_dict, sankey]),
|
859
|
+
(CONST.MIN_MAX_SHEET, xl_read_min_max_sheet, [options, sankey]),
|
860
|
+
(CONST.CONSTRAINTS_SHEET, xl_read_constraints_sheet, [options, sankey]),
|
861
|
+
(CONST.RESULTS_SHEET, xl_read_result_sheet, [sankey]),
|
862
|
+
# (CONST.ANALYSIS_SHEET, xl_read_analysis_sheet, [mfa_dict, sankey]),
|
863
|
+
(CONST.UNCERTAINTY_SHEET, xl_read_uncertainty_sheet, [mfa_dict, sankey]),
|
864
|
+
(CONST.CONVERSIONS_SHEET, xl_read_conversions_sheet, [mfa_dict, sankey])
|
865
|
+
]
|
866
|
+
# Process all sheets in correct order if they exist
|
867
|
+
for (std_sheet_name, extract_function, args) in sheets_processing_order:
|
868
|
+
if std_sheet_name in sheet_names.keys():
|
869
|
+
# Warn on new node creation
|
870
|
+
if (not options['warn_on_new_nodes']) and (len(prev_mfa_entry_name) > 0):
|
871
|
+
options['warn_on_new_nodes'] = \
|
872
|
+
(CONST.NODES_SHEET in prev_mfa_entry_name) or \
|
873
|
+
(CONST.IO_SHEET in prev_mfa_entry_name) or \
|
874
|
+
(CONST.TER_SHEET in prev_mfa_entry_name)
|
875
|
+
options['warn_on_new_nodes'] |= \
|
876
|
+
(CONST.PRODUCTS_SHEET in prev_mfa_entry_name) and \
|
877
|
+
(CONST.SECTORS_SHEET in prev_mfa_entry_name) and \
|
878
|
+
(std_sheet_name != CONST.EXCHANGES_SHEET)
|
879
|
+
# Warn on new flux creation
|
880
|
+
if (not options['warn_on_new_flux']) and (len(prev_mfa_entry_name) > 0):
|
881
|
+
options['warn_on_new_flux'] = \
|
882
|
+
(CONST.IO_SHEET in prev_mfa_entry_name) or \
|
883
|
+
(CONST.TER_SHEET in prev_mfa_entry_name) or \
|
884
|
+
(CONST.DATA_SHEET in prev_mfa_entry_name)
|
885
|
+
# User sheet name
|
886
|
+
for sheet_name in sheet_names[std_sheet_name]:
|
887
|
+
# Extract sheet
|
888
|
+
excel_sheet = pd.read_excel(excel_file_name, sheet_name)
|
889
|
+
# If nothing inside -> continue
|
890
|
+
nb_rows = excel_sheet.shape[0]
|
891
|
+
if nb_rows < 1:
|
892
|
+
continue
|
893
|
+
# Parse
|
894
|
+
su_trace.logger.info('Reading sheet {}'.format(sheet_name))
|
895
|
+
ok, msg = extract_function(excel_sheet, *args)
|
896
|
+
if not ok:
|
897
|
+
return ok, "Error on sheet {0} ({1}) : {2}".format(sheet_name, std_sheet_name, msg)
|
898
|
+
# Log warning messages
|
899
|
+
if len(msg) > 0:
|
900
|
+
su_trace.logger.error('Warning(s) on sheet {0} ({1}) :'.format(sheet_name, std_sheet_name))
|
901
|
+
for _ in msg.split('\n'):
|
902
|
+
if len(_) > 0:
|
903
|
+
su_trace.logger.error(' - {}'.format(_))
|
904
|
+
# Auto-compute missing flux
|
905
|
+
if std_sheet_name in [CONST.IO_SHEET, CONST.TER_SHEET, CONST.DATA_SHEET, CONST.RESULTS_SHEET]:
|
906
|
+
ok = sankey.autocompute_missing_flux()
|
907
|
+
if not ok:
|
908
|
+
return False, ''
|
909
|
+
# Ok node parsing
|
910
|
+
prev_mfa_entry_name.append(std_sheet_name)
|
911
|
+
# Synchronize all nodes levels
|
912
|
+
sankey.autocompute_nodes_levels()
|
913
|
+
# if sankey.has_at_least_one_mat_balance():
|
914
|
+
# Compute mat balance
|
915
|
+
sankey.autocompute_mat_balance()
|
916
|
+
# else:
|
917
|
+
# # Recompute mat_balance only if it was specified for at least a node
|
918
|
+
# su_trace.logger.info('Matter balance was not specified in entry file, no computing.')
|
919
|
+
|
920
|
+
# Overall coherence checks
|
921
|
+
if do_coherence_checks:
|
922
|
+
su_trace.logger.info('Overall coherence checks on Sankey structure')
|
923
|
+
ok = sankey.check_overall_sankey_coherence()
|
924
|
+
if not ok:
|
925
|
+
return False, 'Sankey structure is not coherent. Abort.'
|
926
|
+
# End
|
927
|
+
return True, ''
|
928
|
+
|
929
|
+
|
930
|
+
def check_sheets_before_reading(sheet_names):
|
931
|
+
"""
|
932
|
+
Verify if there are enough sheets for parsing
|
933
|
+
|
934
|
+
Parameters
|
935
|
+
----------
|
936
|
+
:param sheet_names: input file worksheet dict as [reference sheet name: user sheet name]
|
937
|
+
:type sheet_names: dict
|
938
|
+
|
939
|
+
Returns
|
940
|
+
-------
|
941
|
+
:return: (Success ; Error message )
|
942
|
+
:rtype: (bool; string)
|
943
|
+
|
944
|
+
"""
|
945
|
+
# With data sheet, enought data to structure the Sankey
|
946
|
+
if CONST.DATA_SHEET in sheet_names.keys():
|
947
|
+
return True, 'OK - Data sheet'
|
948
|
+
# No data sheet -> Do we have Node sheet ?
|
949
|
+
if CONST.NODES_SHEET in sheet_names.keys():
|
950
|
+
return True, 'OK - Node sheet'
|
951
|
+
# No Node sheet -> Do we have Product & Sector ?
|
952
|
+
if (CONST.PRODUCTS_SHEET in sheet_names.keys()) and \
|
953
|
+
(CONST.SECTORS_SHEET in sheet_names.keys()):
|
954
|
+
return True, 'OK - Products & Sectors sheets'
|
955
|
+
# No product & sector sheets -> Do we have IO sheet ?
|
956
|
+
if (CONST.IO_SHEET in sheet_names.keys()):
|
957
|
+
return True, 'OK - IO sheets'
|
958
|
+
# No IO sheet -> Do we have TER sheet
|
959
|
+
if CONST.TER_SHEET in sheet_names.keys():
|
960
|
+
return True, 'OK - TER sheet'
|
961
|
+
# not enough sheets
|
962
|
+
err_msg = "Not enough sheets. To create the Sankey, we need at least one of theses sheets: \n"
|
963
|
+
err_msg += _allowedSheetNames([CONST.DATA_SHEET, CONST.NODES_SHEET, CONST.IO_SHEET, CONST.TER_SHEET])
|
964
|
+
err_msg += "Or all theses sheets instead : \n"
|
965
|
+
err_msg += _allowedSheetNames([CONST.PRODUCTS_SHEET, CONST.SECTORS_SHEET])
|
966
|
+
return False, err_msg
|
967
|
+
|
968
|
+
|
969
|
+
def xl_read_tags_sheet(
|
970
|
+
tags_sheet: dict,
|
971
|
+
sankey: Sankey
|
972
|
+
):
|
973
|
+
'''
|
974
|
+
Read tags sheet.
|
975
|
+
|
976
|
+
Parameters
|
977
|
+
----------
|
978
|
+
:param tags_sheet: Feuille excel à lire
|
979
|
+
:type tags_sheet: dict
|
980
|
+
|
981
|
+
:param sankey: Sankey struct constructed from input
|
982
|
+
:type sankey: Sankey, modified
|
983
|
+
|
984
|
+
Returns
|
985
|
+
-------
|
986
|
+
:return: (Success ; Error message )
|
987
|
+
:rtype: (bool; string)
|
988
|
+
'''
|
989
|
+
# Keep only the first columns. Clean the remaining empty right columns.
|
990
|
+
for i, col in enumerate(tags_sheet.columns): # iterable on columns names
|
991
|
+
if 'Unnamed' in col:
|
992
|
+
tags_sheet.drop(tags_sheet.columns[i:], inplace=True, axis=1)
|
993
|
+
break
|
994
|
+
# Standardise les noms de colonne celon le dictionnaire si il fait partie
|
995
|
+
# du dictionnaire sinon le recherche aussi dans les nodeTags
|
996
|
+
tags_sheet.columns = list(map(lambda x: consistantColName(CONST.TAG_SHEET, x, sankey), tags_sheet.columns))
|
997
|
+
# Waiting for these columns
|
998
|
+
# Obligatory columns to have in tags sheet, with their default type
|
999
|
+
oblig_columns = {CONST.TAG_NAME: '', CONST.TAG_TYPE: '', CONST.TAG_TAGS: ''}
|
1000
|
+
# Check if we have at least the obligatory columns
|
1001
|
+
ok, err_msg = _checkNeededColumns(tags_sheet.columns, oblig_columns.keys(), CONST.TAG_SHEET)
|
1002
|
+
if not ok:
|
1003
|
+
return ok, err_msg
|
1004
|
+
# Facultative columns we can have, with default value
|
1005
|
+
facul_columns = {CONST.TAG_IS_PALETTE: 0, CONST.TAG_COLORMAP: '', CONST.TAG_COLOR: ''}
|
1006
|
+
# Check if we need to add facultative columns
|
1007
|
+
for facul_column_name in facul_columns.keys():
|
1008
|
+
if facul_column_name not in tags_sheet.columns:
|
1009
|
+
tags_sheet[facul_column_name] = facul_columns[facul_column_name]
|
1010
|
+
# Convert data as specific type
|
1011
|
+
ok, msg = _castColumnType(
|
1012
|
+
tags_sheet, dict(oblig_columns, **facul_columns),
|
1013
|
+
empty_to_default_value=True)
|
1014
|
+
if not ok:
|
1015
|
+
return ok, msg
|
1016
|
+
# Update Sankey
|
1017
|
+
return sankey.update_from_tags_table(tags_sheet)
|
1018
|
+
|
1019
|
+
|
1020
|
+
def xl_read_data_sheet(
|
1021
|
+
data_sheet: pd.DataFrame,
|
1022
|
+
options: dict,
|
1023
|
+
sankey: Sankey
|
1024
|
+
):
|
1025
|
+
'''
|
1026
|
+
Read data sheet.
|
1027
|
+
|
1028
|
+
Parameters
|
1029
|
+
----------
|
1030
|
+
:param data_sheet: Feuille excel à lire
|
1031
|
+
:type data_sheet: pd.DataFrame
|
1032
|
+
|
1033
|
+
:param options: Dictionnary of parsing options
|
1034
|
+
:type options: dict
|
1035
|
+
|
1036
|
+
:param sankey: Sankey struct constructed from input
|
1037
|
+
:type sankey: Sankey, modified
|
1038
|
+
|
1039
|
+
Returns
|
1040
|
+
-------
|
1041
|
+
:return: (Success code; Error message )
|
1042
|
+
:rtype: (int; string)
|
1043
|
+
'''
|
1044
|
+
# Set column header consitant with specified columns names for data sheet
|
1045
|
+
tags = list(sankey.taggs[CONST.TAG_TYPE_FLUX].keys())
|
1046
|
+
tags += list(sankey.taggs[CONST.TAG_TYPE_DATA].keys())
|
1047
|
+
new_columns_names = list(
|
1048
|
+
map(lambda x: consistantColName(CONST.DATA_SHEET, x, sankey, tags),
|
1049
|
+
data_sheet.columns))
|
1050
|
+
# Waiting for these columns
|
1051
|
+
# Obligatory columns to have in tags sheet, with their default type
|
1052
|
+
oblig_columns = {
|
1053
|
+
CONST.DATA_ORIGIN: '',
|
1054
|
+
CONST.DATA_DESTINATION: '',
|
1055
|
+
}
|
1056
|
+
# Check if we have the mandatory columns (Origin, destination, values)
|
1057
|
+
ok, msg = _checkNeededColumns(new_columns_names, list(oblig_columns.keys()), CONST.DATA_SHEET)
|
1058
|
+
if not ok:
|
1059
|
+
return ok, msg
|
1060
|
+
# Ok to Update columns name with consistant names
|
1061
|
+
data_sheet.columns = new_columns_names
|
1062
|
+
# Facultative columns we can have, with default value
|
1063
|
+
facul_columns = {
|
1064
|
+
CONST.DATA_VALUE: 0.,
|
1065
|
+
CONST.DATA_QUANTITY: 0.0,
|
1066
|
+
CONST.DATA_FACTOR: 0.0,
|
1067
|
+
CONST.DATA_UNCERT: 0.0}
|
1068
|
+
# Convert columns data to default data type or None if Nan
|
1069
|
+
ok, msg = _castColumnType(
|
1070
|
+
data_sheet, dict(oblig_columns, **facul_columns))
|
1071
|
+
if not ok:
|
1072
|
+
return ok, msg
|
1073
|
+
# Update Sankey
|
1074
|
+
return sankey.update_from_data_table(
|
1075
|
+
data_sheet,
|
1076
|
+
options['warn_on_new_nodes'],
|
1077
|
+
options['warn_on_new_flux'])
|
1078
|
+
|
1079
|
+
|
1080
|
+
def xl_read_nodes_sheet(
|
1081
|
+
nodes_sheet: dict,
|
1082
|
+
mfa_entry_name: str,
|
1083
|
+
options: dict,
|
1084
|
+
sankey: Sankey
|
1085
|
+
):
|
1086
|
+
"""
|
1087
|
+
Read node sheet.
|
1088
|
+
|
1089
|
+
Parameters
|
1090
|
+
----------
|
1091
|
+
:param nodes_sheet: Excel sheet to read (dataframe)
|
1092
|
+
:type nodes_sheet: dict
|
1093
|
+
|
1094
|
+
:param mfa_entry_name: Type of sheet to parse.
|
1095
|
+
:type mfa_entry_name: str
|
1096
|
+
|
1097
|
+
:param options: Dictionnary of parsing options.
|
1098
|
+
:type options: dict
|
1099
|
+
|
1100
|
+
:param sankey: Sankey struct constructed from input
|
1101
|
+
:type sankey: Sankey, modified
|
1102
|
+
|
1103
|
+
Returns
|
1104
|
+
-------
|
1105
|
+
:return: (Success ; Error message )
|
1106
|
+
:rtype: (bool; string)
|
1107
|
+
|
1108
|
+
"""
|
1109
|
+
# Standardise les noms de colonne selon le dictionnaire,
|
1110
|
+
# sinon renvoit les noms de colones tels quels
|
1111
|
+
tags = list(sankey.taggs[CONST.TAG_TYPE_NODE].keys())
|
1112
|
+
tags += list(sankey.taggs[CONST.TAG_TYPE_LEVEL].keys())
|
1113
|
+
nodes_sheet.columns = list(
|
1114
|
+
map(lambda x: consistantColName(mfa_entry_name, x, sankey, tags),
|
1115
|
+
nodes_sheet.columns))
|
1116
|
+
# Waiting for these columns
|
1117
|
+
# Obligatory columns to have in tags sheet, with their default type
|
1118
|
+
oblig_columns = {
|
1119
|
+
CONST.NODES_LEVEL: 0,
|
1120
|
+
CONST.NODES_NODE: ''}
|
1121
|
+
# Check if we have at least the obligatory columns
|
1122
|
+
ok, msg = _checkNeededColumns(nodes_sheet.columns, list(oblig_columns.keys()), mfa_entry_name)
|
1123
|
+
if not ok:
|
1124
|
+
return ok, msg
|
1125
|
+
# Facultative columns we can have, wi
|
1126
|
+
facul_columns = {
|
1127
|
+
CONST.NODES_MAT_BALANCE: 1,
|
1128
|
+
CONST.NODES_SANKEY: 1,
|
1129
|
+
CONST.NODES_COLOR: '',
|
1130
|
+
CONST.NODES_DEFINITIONS: ''}
|
1131
|
+
# Convert to int, str, or None if Nan
|
1132
|
+
ok, msg = _castColumnType(
|
1133
|
+
nodes_sheet, dict(oblig_columns, **facul_columns))
|
1134
|
+
if not ok:
|
1135
|
+
return ok, msg
|
1136
|
+
# Update Sankey
|
1137
|
+
return sankey.update_from_nodes_table(
|
1138
|
+
nodes_sheet,
|
1139
|
+
warn_on_new_nodes=options['warn_on_new_nodes'])
|
1140
|
+
|
1141
|
+
|
1142
|
+
def xl_read_products_sectors_sheet(
|
1143
|
+
excel_sheet: dict,
|
1144
|
+
mfa_entry_name: str,
|
1145
|
+
options: dict,
|
1146
|
+
sankey: Sankey
|
1147
|
+
):
|
1148
|
+
"""
|
1149
|
+
Read either Product, Sector or Exchange sheet
|
1150
|
+
|
1151
|
+
Parameters
|
1152
|
+
----------
|
1153
|
+
:param excel_sheet: Excel sheet to read (dataframe)
|
1154
|
+
:type excel_sheet: dict
|
1155
|
+
|
1156
|
+
:param mfa_entry_name: Type of sheet to parse.
|
1157
|
+
:type mfa_entry_name: str
|
1158
|
+
|
1159
|
+
:param options: Dictionnary of parsing options.
|
1160
|
+
:type options: dict
|
1161
|
+
|
1162
|
+
:param sankey: Sankey struct constructed from input
|
1163
|
+
:type sankey: Sankey, modified
|
1164
|
+
|
1165
|
+
Returns
|
1166
|
+
-------
|
1167
|
+
:return: (Success ; Error message )
|
1168
|
+
:rtype: (bool; string)
|
1169
|
+
"""
|
1170
|
+
# Add tag column
|
1171
|
+
if mfa_entry_name == CONST.PRODUCTS_SHEET:
|
1172
|
+
excel_sheet[CONST.NODE_TYPE] = CONST.NODE_TYPE_PRODUCT
|
1173
|
+
elif mfa_entry_name == CONST.SECTORS_SHEET:
|
1174
|
+
excel_sheet[CONST.NODE_TYPE] = CONST.NODE_TYPE_SECTOR
|
1175
|
+
elif mfa_entry_name == CONST.EXCHANGES_SHEET:
|
1176
|
+
excel_sheet[CONST.NODE_TYPE] = CONST.NODE_TYPE_EXCHANGE
|
1177
|
+
# Read as node
|
1178
|
+
return xl_read_nodes_sheet(
|
1179
|
+
excel_sheet,
|
1180
|
+
mfa_entry_name,
|
1181
|
+
options,
|
1182
|
+
sankey)
|
1183
|
+
|
1184
|
+
|
1185
|
+
def xl_read_terbase_sheet(
|
1186
|
+
ter_excel_sheet: dict,
|
1187
|
+
options: dict,
|
1188
|
+
mfa_dict: dict,
|
1189
|
+
sankey: Sankey
|
1190
|
+
):
|
1191
|
+
"""
|
1192
|
+
Read TER sheet
|
1193
|
+
|
1194
|
+
Parameters
|
1195
|
+
----------
|
1196
|
+
:param excel_sheet: Excel sheet to read (dataframe)
|
1197
|
+
:type excel_sheet: dict
|
1198
|
+
|
1199
|
+
:param options: Dictionnary of parsing options.
|
1200
|
+
:type options: dict
|
1201
|
+
|
1202
|
+
:param mfa_dict: Data struct for Sankey
|
1203
|
+
:type mfa_dict: dict, modified
|
1204
|
+
|
1205
|
+
:param sankey: Sankey struct constructed from input
|
1206
|
+
:type sankey: Sankey, modified
|
1207
|
+
|
1208
|
+
Returns
|
1209
|
+
-------
|
1210
|
+
:return: (Success ; Error message )
|
1211
|
+
:rtype: (bool; string)
|
1212
|
+
"""
|
1213
|
+
# Extract all tables from sheet
|
1214
|
+
tables = []
|
1215
|
+
_extractTablesFromSheet(ter_excel_sheet, tables)
|
1216
|
+
if len(tables) != 2:
|
1217
|
+
err_msg = 'Could not find or extract the necessary two tables, found {}.\n'.format(len(tables))
|
1218
|
+
err_msg += 'Are all the tables here and correctly formatted ?'
|
1219
|
+
return False, err_msg
|
1220
|
+
# Do we have duplicated cols or row
|
1221
|
+
for i, table in enumerate(tables):
|
1222
|
+
has_dup_cols, dup_cols = _hasDuplicatedEntry(table.columns.to_list())
|
1223
|
+
if has_dup_cols:
|
1224
|
+
_fuseDuplicatedColumns(table, dup_cols)
|
1225
|
+
has_dup_rows, dup_rows = _hasDuplicatedEntry(table.index.to_list())
|
1226
|
+
if has_dup_rows:
|
1227
|
+
_fuseDuplicatedRows(table, dup_rows)
|
1228
|
+
# Do we have the sames columns and rows for each tables
|
1229
|
+
has_missing_entry = False
|
1230
|
+
msg = ""
|
1231
|
+
sets_headers = [(set(table.index.to_list()), set(table.columns.to_list())) for table in tables]
|
1232
|
+
for i in range(len(sets_headers) - 1):
|
1233
|
+
diff_rows = sets_headers[i][0] - sets_headers[i+1][0]
|
1234
|
+
if len(diff_rows) > 0:
|
1235
|
+
has_missing_entry = True
|
1236
|
+
msg += 'Tables {0} and {1} have incompatibles rows : {2}\n'.format(
|
1237
|
+
i, i+1, list(diff_rows))
|
1238
|
+
diff_cols = sets_headers[i][1] - sets_headers[i+1][1]
|
1239
|
+
if len(diff_cols) > 0:
|
1240
|
+
has_missing_entry = True
|
1241
|
+
msg += 'Tables {0} and {1} have incompatibles columns : {2}\n'.format(
|
1242
|
+
i, i+1, list(diff_cols))
|
1243
|
+
if has_missing_entry:
|
1244
|
+
return False, msg
|
1245
|
+
# Separate tables
|
1246
|
+
table_supplies = tables[0] # Define flux Sectors->Products, with Cols=Sectors, Rows=Product
|
1247
|
+
table_uses = tables[1] # Define flux Products->Sectors, with Cols=Sectors, Rows=Product
|
1248
|
+
# In Sankey struct
|
1249
|
+
log = ''
|
1250
|
+
ok, msg = sankey.update_from_matrix_table(
|
1251
|
+
table_supplies.T.replace({np.nan: None}),
|
1252
|
+
warn_on_new_nodes=options['warn_on_new_nodes'],
|
1253
|
+
warn_on_new_flux=options['warn_on_new_flux'],
|
1254
|
+
tagg_name='Type de noeud',
|
1255
|
+
tagg_type=CONST.TAG_TYPE_NODE,
|
1256
|
+
tag_name_col=CONST.NODE_TYPE_PRODUCT,
|
1257
|
+
tag_name_row=CONST.NODE_TYPE_SECTOR)
|
1258
|
+
if not ok:
|
1259
|
+
err = 'Could not process supplies table : {}'.format(msg)
|
1260
|
+
return ok, msg
|
1261
|
+
log += msg
|
1262
|
+
ok, msg = sankey.update_from_matrix_table(
|
1263
|
+
table_uses.replace({np.nan: None}),
|
1264
|
+
warn_on_new_nodes=options['warn_on_new_nodes'],
|
1265
|
+
warn_on_new_flux=options['warn_on_new_flux'],
|
1266
|
+
tagg_name='Type de noeud',
|
1267
|
+
tagg_type=CONST.TAG_TYPE_NODE,
|
1268
|
+
tag_name_col=CONST.NODE_TYPE_SECTOR,
|
1269
|
+
tag_name_row=CONST.NODE_TYPE_PRODUCT)
|
1270
|
+
log += msg
|
1271
|
+
if not ok:
|
1272
|
+
err = 'Could not process use table : {}'.format(msg)
|
1273
|
+
return ok, err
|
1274
|
+
# Set MFA dict - Needed for retrocompatibility
|
1275
|
+
# Set 'x' and 'X' as 1
|
1276
|
+
table_uses.replace({'x': 1}, inplace=True)
|
1277
|
+
table_uses.replace({'X': 1}, inplace=True)
|
1278
|
+
table_supplies.replace({'x': 1}, inplace=True)
|
1279
|
+
table_supplies.replace({'X': 1}, inplace=True)
|
1280
|
+
# Default type = int
|
1281
|
+
_castColumnType(table_uses, 0, empty_to_default_value=True)
|
1282
|
+
_castColumnType(table_supplies, 0, empty_to_default_value=True)
|
1283
|
+
# Save in MFA_dict
|
1284
|
+
mfa_dict[CONST.TER_SHEET] = {}
|
1285
|
+
mfa_dict[CONST.TER_SHEET]['use'] = table_uses
|
1286
|
+
mfa_dict[CONST.TER_SHEET]['supply'] = table_supplies
|
1287
|
+
return True, log
|
1288
|
+
|
1289
|
+
|
1290
|
+
def xl_read_input_output_sheet(
|
1291
|
+
io_excel_sheet: dict,
|
1292
|
+
options: dict,
|
1293
|
+
mfa_input: dict,
|
1294
|
+
sankey: Sankey,
|
1295
|
+
read_data_in_matrix=False
|
1296
|
+
):
|
1297
|
+
"""
|
1298
|
+
Read IO sheet
|
1299
|
+
|
1300
|
+
Parameters
|
1301
|
+
----------
|
1302
|
+
:param io_excel_sheet: Excel sheet to read (dataframe)
|
1303
|
+
:type io_excel_sheet: dict
|
1304
|
+
|
1305
|
+
:param options: Dictionnary of parsing options.
|
1306
|
+
:type options: dict
|
1307
|
+
|
1308
|
+
:param mfa_dict: Data struct for Sankey
|
1309
|
+
:type mfa_dict: dict, modified
|
1310
|
+
|
1311
|
+
:param sankey: Sankey struct constructed from input
|
1312
|
+
:type sankey: Sankey, modified
|
1313
|
+
|
1314
|
+
Returns
|
1315
|
+
-------
|
1316
|
+
:return: (Success ; Error message )
|
1317
|
+
:rtype: (bool; string)
|
1318
|
+
"""
|
1319
|
+
# Extract all tables from sheet
|
1320
|
+
tables = []
|
1321
|
+
_extractTablesFromSheet(io_excel_sheet, tables)
|
1322
|
+
if len(tables) != 1:
|
1323
|
+
err_msg = 'Did not found the correct amount of tables. Need one table, found {}.'.format(len(tables))
|
1324
|
+
if len(tables) == 0:
|
1325
|
+
err_msg += '\nIs the table in the given sheet or correctly formatted ?'
|
1326
|
+
return False, err_msg
|
1327
|
+
io_sheet = tables[0]
|
1328
|
+
# Do we have duplicated cols or row
|
1329
|
+
has_dup_cols, dup_cols = _hasDuplicatedEntry(io_sheet.columns.to_list())
|
1330
|
+
if has_dup_cols:
|
1331
|
+
_fuseDuplicatedColumns(io_sheet, dup_cols)
|
1332
|
+
has_dup_rows, dup_rows = _hasDuplicatedEntry(io_sheet.index.to_list())
|
1333
|
+
if has_dup_rows:
|
1334
|
+
_fuseDuplicatedRows(io_sheet, dup_rows)
|
1335
|
+
# In Sankey struct
|
1336
|
+
ok, msg = sankey.update_from_matrix_table(
|
1337
|
+
io_sheet.replace({np.nan: None}),
|
1338
|
+
data_in_matrix=read_data_in_matrix,
|
1339
|
+
warn_on_new_nodes=options['warn_on_new_nodes'],
|
1340
|
+
warn_on_new_flux=options['warn_on_new_flux'])
|
1341
|
+
# Update MFA data dict - Needed for retrocompatibility
|
1342
|
+
# Set 'x' and 'X' as 1
|
1343
|
+
io_sheet.replace({'x': 1}, inplace=True)
|
1344
|
+
io_sheet.replace({'X': 1}, inplace=True)
|
1345
|
+
# Default type = int
|
1346
|
+
_castColumnType(io_sheet, 0, empty_to_default_value=False)
|
1347
|
+
# Save in MFA_dict
|
1348
|
+
mfa_input[CONST.IO_SHEET] = io_sheet
|
1349
|
+
# Output
|
1350
|
+
return ok, msg
|
1351
|
+
|
1352
|
+
|
1353
|
+
def xl_read_input_output_data_sheet(
|
1354
|
+
io_excel_sheet: dict,
|
1355
|
+
options: dict,
|
1356
|
+
mfa_input: dict,
|
1357
|
+
sankey: Sankey
|
1358
|
+
):
|
1359
|
+
"""
|
1360
|
+
Read IO sheet
|
1361
|
+
|
1362
|
+
Parameters
|
1363
|
+
----------
|
1364
|
+
:param io_excel_sheet: Excel sheet to read (dataframe)
|
1365
|
+
:type io_excel_sheet: dict
|
1366
|
+
|
1367
|
+
:param options: Dictionnary of parsing options.
|
1368
|
+
:type options: dict
|
1369
|
+
|
1370
|
+
:param mfa_dict: Data struct for Sankey
|
1371
|
+
:type mfa_dict: dict, modified
|
1372
|
+
|
1373
|
+
:param sankey: Sankey struct constructed from input
|
1374
|
+
:type sankey: Sankey, modified
|
1375
|
+
|
1376
|
+
Returns
|
1377
|
+
-------
|
1378
|
+
:return: (Success ; Error message )
|
1379
|
+
:rtype: (bool; string)
|
1380
|
+
"""
|
1381
|
+
return xl_read_input_output_sheet(
|
1382
|
+
io_excel_sheet,
|
1383
|
+
options,
|
1384
|
+
mfa_input,
|
1385
|
+
sankey,
|
1386
|
+
read_data_in_matrix=True)
|
1387
|
+
|
1388
|
+
|
1389
|
+
def xl_read_min_max_sheet(
|
1390
|
+
min_max_sheet: pd.DataFrame,
|
1391
|
+
options: dict,
|
1392
|
+
sankey: Sankey
|
1393
|
+
):
|
1394
|
+
"""
|
1395
|
+
Read CONST.MIN_MAX_SHEET.
|
1396
|
+
|
1397
|
+
Parameters
|
1398
|
+
----------
|
1399
|
+
:param min_max_sheet: Feuille excel à lire
|
1400
|
+
:type min_max_sheet: pd.DataFrame
|
1401
|
+
|
1402
|
+
:param sankey: Sankey struct constructed from input
|
1403
|
+
:type sankey: Sankey, modified
|
1404
|
+
|
1405
|
+
Returns
|
1406
|
+
-------
|
1407
|
+
:return: (Success ; Error message )
|
1408
|
+
:rtype: (bool; string)
|
1409
|
+
"""
|
1410
|
+
# Set column header consitant with tags groups
|
1411
|
+
tags = list(sankey.taggs[CONST.TAG_TYPE_FLUX].keys())
|
1412
|
+
tags += list(sankey.taggs[CONST.TAG_TYPE_DATA].keys())
|
1413
|
+
new_columns_names = list(
|
1414
|
+
map(lambda x: consistantColName(CONST.MIN_MAX_SHEET, x, sankey, tags),
|
1415
|
+
min_max_sheet.columns))
|
1416
|
+
# Waiting for these columns
|
1417
|
+
# Obligatory columns to have in tags sheet, with their default type
|
1418
|
+
oblig_columns = {
|
1419
|
+
CONST.MIN_MAX_ORIGIN: '',
|
1420
|
+
CONST.MIN_MAX_DESTINATION: ''}
|
1421
|
+
# All columns are here ?
|
1422
|
+
ok, msg = _checkNeededColumns(new_columns_names, list(oblig_columns.keys()), CONST.MIN_MAX_SHEET)
|
1423
|
+
if not ok:
|
1424
|
+
return ok, msg
|
1425
|
+
# Ok to Update columns name with consistant names
|
1426
|
+
min_max_sheet.columns = new_columns_names
|
1427
|
+
# Facultative columns we can have, with default value
|
1428
|
+
facul_columns = {}
|
1429
|
+
for tag in tags:
|
1430
|
+
facul_columns[tag] = ''
|
1431
|
+
# Convert to int, str, or None if Nan
|
1432
|
+
ok, msg = _castColumnType(
|
1433
|
+
min_max_sheet, dict(oblig_columns, **facul_columns))
|
1434
|
+
if not ok:
|
1435
|
+
return ok, msg
|
1436
|
+
# Update sankey struct
|
1437
|
+
ok, msg = sankey.update_from_min_max_table(
|
1438
|
+
min_max_sheet,
|
1439
|
+
options['warn_on_new_nodes'],
|
1440
|
+
options['warn_on_new_flux'])
|
1441
|
+
if not ok:
|
1442
|
+
return ok, msg
|
1443
|
+
return True, ''
|
1444
|
+
|
1445
|
+
|
1446
|
+
def xl_read_constraints_sheet(
|
1447
|
+
constraints_sheet: pd.DataFrame,
|
1448
|
+
options: dict,
|
1449
|
+
sankey: Sankey
|
1450
|
+
):
|
1451
|
+
"""
|
1452
|
+
Read CONST.CONSTRAINTS_SHEET.
|
1453
|
+
|
1454
|
+
Parameters
|
1455
|
+
----------
|
1456
|
+
:param constraints_sheet: Feuille excel à lire
|
1457
|
+
:type constraints_sheet: pd.DataFrame
|
1458
|
+
|
1459
|
+
:param sankey: Sankey struct constructed from input
|
1460
|
+
:type sankey: Sankey, modified
|
1461
|
+
|
1462
|
+
Returns
|
1463
|
+
-------
|
1464
|
+
:return: (Success ; Error message )
|
1465
|
+
:rtype: (bool; string)
|
1466
|
+
"""
|
1467
|
+
# Set column header consitant with tags groups
|
1468
|
+
tags = list(sankey.taggs[CONST.TAG_TYPE_FLUX].keys())
|
1469
|
+
tags += list(sankey.taggs[CONST.TAG_TYPE_DATA].keys())
|
1470
|
+
new_columns_names = list(
|
1471
|
+
map(lambda x: consistantColName(CONST.CONSTRAINTS_SHEET, x, sankey, tags),
|
1472
|
+
constraints_sheet.columns))
|
1473
|
+
# Waiting for these columns
|
1474
|
+
# Obligatory columns to have in tags sheet, with their default type
|
1475
|
+
oblig_columns = {
|
1476
|
+
CONST.CONSTRAINT_ID: '',
|
1477
|
+
CONST.CONSTRAINT_ORIGIN: '',
|
1478
|
+
CONST.CONSTRAINT_DESTINATION: ''}
|
1479
|
+
onlyone_columns = {
|
1480
|
+
CONST.CONSTRAINT_EQ: 0.0,
|
1481
|
+
CONST.CONSTRAINT_INEQ_INF: 0.0,
|
1482
|
+
CONST.CONSTRAINT_INEQ_SUP: 0.0}
|
1483
|
+
# All columns are here ?
|
1484
|
+
ok, msg = _checkNeededColumns(
|
1485
|
+
new_columns_names,
|
1486
|
+
list(oblig_columns.keys()),
|
1487
|
+
CONST.CONSTRAINTS_SHEET,
|
1488
|
+
list(onlyone_columns.keys()))
|
1489
|
+
if not ok:
|
1490
|
+
return ok, msg
|
1491
|
+
# Ok to Update columns name with consistant names
|
1492
|
+
constraints_sheet.columns = new_columns_names
|
1493
|
+
# Facultative columns we can have, with default value
|
1494
|
+
facul_columns = {}
|
1495
|
+
for tag in tags:
|
1496
|
+
facul_columns[tag] = ''
|
1497
|
+
# Convert columns data to default data type or None if Nan
|
1498
|
+
ok, msg = _castColumnType(
|
1499
|
+
constraints_sheet, dict(oblig_columns, **onlyone_columns, **facul_columns))
|
1500
|
+
if not ok:
|
1501
|
+
return ok, msg
|
1502
|
+
ok, msg = sankey.update_from_constraints_table(
|
1503
|
+
constraints_sheet,
|
1504
|
+
options['warn_on_new_nodes'],
|
1505
|
+
options['warn_on_new_flux'])
|
1506
|
+
if not ok:
|
1507
|
+
return ok, msg
|
1508
|
+
return True, ''
|
1509
|
+
|
1510
|
+
|
1511
|
+
def xl_read_result_sheet(
|
1512
|
+
result_sheet: pd.DataFrame,
|
1513
|
+
sankey: Sankey
|
1514
|
+
):
|
1515
|
+
'''
|
1516
|
+
Read result sheet.
|
1517
|
+
|
1518
|
+
Parameters
|
1519
|
+
----------
|
1520
|
+
:param result_sheet: Feuille excel à lire
|
1521
|
+
:type result_sheet: pd.DataFrame
|
1522
|
+
|
1523
|
+
:param options: Dictionnary of parsing options
|
1524
|
+
:type options: dict
|
1525
|
+
|
1526
|
+
:param sankey: Sankey struct constructed from input
|
1527
|
+
:type sankey: Sankey, modified
|
1528
|
+
|
1529
|
+
Returns
|
1530
|
+
-------
|
1531
|
+
:return: (Success code; Error message )
|
1532
|
+
:rtype: (int; string)
|
1533
|
+
'''
|
1534
|
+
# Set column header consitant with specified columns names for data sheet
|
1535
|
+
tags = list(sankey.taggs[CONST.TAG_TYPE_FLUX].keys())
|
1536
|
+
tags += list(sankey.taggs[CONST.TAG_TYPE_DATA].keys())
|
1537
|
+
new_columns_names = list(
|
1538
|
+
map(lambda x: consistantColName(CONST.RESULTS_SHEET, x, sankey, tags),
|
1539
|
+
result_sheet.columns))
|
1540
|
+
# Waiting for these columns
|
1541
|
+
# Obligatory columns to have in tags sheet, with their default type
|
1542
|
+
oblig_columns = {
|
1543
|
+
CONST.RESULTS_ORIGIN: '',
|
1544
|
+
CONST.RESULTS_DESTINATION: '',
|
1545
|
+
CONST.RESULTS_VALUE: 0.}
|
1546
|
+
# Check if we have the mandatory columns (Origin, destination, values)
|
1547
|
+
ok, msg = _checkNeededColumns(
|
1548
|
+
new_columns_names,
|
1549
|
+
list(oblig_columns.keys()),
|
1550
|
+
CONST.RESULTS_SHEET)
|
1551
|
+
if not ok:
|
1552
|
+
return ok, msg
|
1553
|
+
# Ok to Update columns name with consistant names
|
1554
|
+
result_sheet.columns = new_columns_names
|
1555
|
+
# Facultative columns we can have, with default value
|
1556
|
+
facul_columns = {
|
1557
|
+
CONST.RESULTS_FREE_MIN: 0.0,
|
1558
|
+
CONST.RESULTS_FREE_MAX: 0.0}
|
1559
|
+
# Convert columns data to default data type or None if Nan
|
1560
|
+
ok, msg = _castColumnType(
|
1561
|
+
result_sheet, dict(oblig_columns, **facul_columns))
|
1562
|
+
if not ok:
|
1563
|
+
return ok, msg
|
1564
|
+
# Update Sankey
|
1565
|
+
return sankey.update_from_result_table(result_sheet)
|
1566
|
+
|
1567
|
+
|
1568
|
+
def xl_read_analysis_sheet(
|
1569
|
+
analysis_sheet: pd.DataFrame,
|
1570
|
+
mfa_dict: dict,
|
1571
|
+
sankey: Sankey
|
1572
|
+
):
|
1573
|
+
"""
|
1574
|
+
Read Analysis sheet.
|
1575
|
+
|
1576
|
+
Parameters
|
1577
|
+
----------
|
1578
|
+
:param analysis_sheet: Feuille excel à lire
|
1579
|
+
:type analysis_sheet: pd.DataFrame
|
1580
|
+
|
1581
|
+
:param mfa_dict: MFA data after parsing
|
1582
|
+
:type mfa_dict: dict, modified
|
1583
|
+
|
1584
|
+
:param sankey: Sankey struct constructed from input
|
1585
|
+
:type sankey: Sankey, modified
|
1586
|
+
|
1587
|
+
Returns
|
1588
|
+
-------
|
1589
|
+
:return: (Success ; Error message )
|
1590
|
+
:rtype: (bool; string)
|
1591
|
+
"""
|
1592
|
+
# Set column header consitant with tags groups
|
1593
|
+
tags = list(sankey.taggs[CONST.TAG_TYPE_FLUX].keys())
|
1594
|
+
tags += list(sankey.taggs[CONST.TAG_TYPE_DATA].keys())
|
1595
|
+
new_columns_names = list(
|
1596
|
+
map(lambda x: consistantColName(CONST.ANALYSIS_SHEET, x, sankey, tags),
|
1597
|
+
analysis_sheet.columns))
|
1598
|
+
# Waiting for these columns
|
1599
|
+
# Obligatory columns to have in tags sheet, with their default type
|
1600
|
+
oblig_columns = {
|
1601
|
+
CONST.RESULTS_ORIGIN: '',
|
1602
|
+
CONST.RESULTS_DESTINATION: '',
|
1603
|
+
CONST.RESULTS_VALUE: 0.0}
|
1604
|
+
# All columns are here ?
|
1605
|
+
ok, msg = _checkNeededColumns(
|
1606
|
+
new_columns_names,
|
1607
|
+
list(oblig_columns.keys()),
|
1608
|
+
CONST.ANALYSIS_SHEET)
|
1609
|
+
if not ok:
|
1610
|
+
return ok, msg
|
1611
|
+
# Ok to Update columns name with consistant names
|
1612
|
+
analysis_sheet.columns = new_columns_names
|
1613
|
+
# Facultative columns we can have, with default value
|
1614
|
+
facul_columns = {}
|
1615
|
+
for tag in tags:
|
1616
|
+
facul_columns[tag] = ''
|
1617
|
+
# Convert columns data to default data type or None if Nan
|
1618
|
+
ok, msg = _castColumnType(
|
1619
|
+
analysis_sheet, dict(oblig_columns, **facul_columns))
|
1620
|
+
if not ok:
|
1621
|
+
return ok, msg
|
1622
|
+
# Update Sankey - analysis part
|
1623
|
+
ok, msg = sankey.update_from_analysis_table(
|
1624
|
+
analysis_sheet)
|
1625
|
+
if not ok:
|
1626
|
+
return ok, msg
|
1627
|
+
# Update MFA data dict
|
1628
|
+
mfa_dict[CONST.ANALYSIS_SHEET] = analysis_sheet
|
1629
|
+
return True, ''
|
1630
|
+
|
1631
|
+
|
1632
|
+
def xl_read_uncertainty_sheet(
|
1633
|
+
uncertainty_sheet: pd.DataFrame,
|
1634
|
+
mfa_dict: dict,
|
1635
|
+
sankey: Sankey
|
1636
|
+
):
|
1637
|
+
"""
|
1638
|
+
Read UNCERTAINTY SHEET.
|
1639
|
+
|
1640
|
+
Parameters
|
1641
|
+
----------
|
1642
|
+
:param uncertainty_sheet: Feuille excel à lire
|
1643
|
+
:type uncertainty_sheet: pd.DataFrame
|
1644
|
+
|
1645
|
+
:param mfa_dict: MFA data after parsing
|
1646
|
+
:type mfa_dict: dict, modified
|
1647
|
+
|
1648
|
+
:param sankey: Sankey struct constructed from input
|
1649
|
+
:type sankey: Sankey, modified
|
1650
|
+
|
1651
|
+
Returns
|
1652
|
+
-------
|
1653
|
+
:return: (Success ; Error message )
|
1654
|
+
:rtype: (bool; string)
|
1655
|
+
"""
|
1656
|
+
# Filter out empty columns
|
1657
|
+
for i, col in enumerate(uncertainty_sheet.columns): # iterable on columns names
|
1658
|
+
if 'Unnamed' in col:
|
1659
|
+
uncertainty_sheet.drop(uncertainty_sheet.columns[i:], inplace=True, axis=1)
|
1660
|
+
break
|
1661
|
+
# Set column header consitant with tags groups
|
1662
|
+
tags = list(sankey.taggs[CONST.TAG_TYPE_FLUX].keys())
|
1663
|
+
tags += list(sankey.taggs[CONST.TAG_TYPE_DATA].keys())
|
1664
|
+
new_columns_names = list(
|
1665
|
+
map(lambda x: consistantColName(CONST.UNCERTAINTY_SHEET, x, sankey, tags),
|
1666
|
+
uncertainty_sheet.columns))
|
1667
|
+
# Waiting for these columns
|
1668
|
+
# Obligatory columns to have in tags sheet, with their default type
|
1669
|
+
oblig_columns = {
|
1670
|
+
CONST.UNCERTAINTY_ORIGIN: '',
|
1671
|
+
CONST.UNCERTAINTY_DESTINATION: ''}
|
1672
|
+
# All columns are here ?
|
1673
|
+
ok, msg = _checkNeededColumns(
|
1674
|
+
new_columns_names,
|
1675
|
+
list(oblig_columns.keys()),
|
1676
|
+
CONST.UNCERTAINTY_SHEET)
|
1677
|
+
if not ok:
|
1678
|
+
return ok, msg
|
1679
|
+
# Ok to Update columns name with consistant names
|
1680
|
+
uncertainty_sheet.columns = new_columns_names
|
1681
|
+
# Facultative columns we can have, with default value and default position in sheet
|
1682
|
+
facul_columns = {}
|
1683
|
+
facul_column_pos = 2
|
1684
|
+
for _ in CONST.UNCERTAINTY_SHEET_COLS:
|
1685
|
+
facul_columns['{}'.format(_)] = {'val': 0.0, 'pos': facul_column_pos}
|
1686
|
+
facul_column_pos += 1
|
1687
|
+
for tag in tags:
|
1688
|
+
facul_columns[tag] = {'val': '', 'pos': facul_column_pos}
|
1689
|
+
facul_column_pos += 1
|
1690
|
+
# Check if we need to add facultative columns
|
1691
|
+
for facul_column_name, facul_column in facul_columns.items():
|
1692
|
+
if facul_column_name not in uncertainty_sheet.columns:
|
1693
|
+
uncertainty_sheet.insert(
|
1694
|
+
facul_column['pos'], facul_column_name, facul_column['val'])
|
1695
|
+
# Convert to int, str, or None if Nan
|
1696
|
+
ok, msg = _castColumnType(
|
1697
|
+
uncertainty_sheet,
|
1698
|
+
dict(oblig_columns, **facul_columns),
|
1699
|
+
empty_to_default_value=True)
|
1700
|
+
if not ok:
|
1701
|
+
return ok, msg
|
1702
|
+
# Update Sankey - Uncertainty part
|
1703
|
+
ok, msg = sankey.update_from_uncertainty_table(
|
1704
|
+
uncertainty_sheet)
|
1705
|
+
if not ok:
|
1706
|
+
return ok, msg
|
1707
|
+
mfa_dict[CONST.UNCERTAINTY_SHEET] = uncertainty_sheet
|
1708
|
+
return True, ''
|
1709
|
+
|
1710
|
+
|
1711
|
+
def xl_read_conversions_sheet(
|
1712
|
+
conversions_sheet: dict,
|
1713
|
+
mfa_dict: dict,
|
1714
|
+
sankey: Sankey
|
1715
|
+
):
|
1716
|
+
"""
|
1717
|
+
Read CONVERSION SHEET.
|
1718
|
+
TODO this sheet must be changed.
|
1719
|
+
|
1720
|
+
Parameters
|
1721
|
+
----------
|
1722
|
+
:param conversions_sheet: Feuille excel à lire
|
1723
|
+
:type conversions_sheet: pd.DataFrame
|
1724
|
+
|
1725
|
+
:param mfa_dict: MFA data after parsing
|
1726
|
+
:type mfa_dict: dict, modified
|
1727
|
+
|
1728
|
+
:param sankey: Sankey struct constructed from input
|
1729
|
+
:type sankey: Sankey, modified
|
1730
|
+
|
1731
|
+
Returns
|
1732
|
+
-------
|
1733
|
+
:return: (Success ; Error message )
|
1734
|
+
:rtype: (bool; string)
|
1735
|
+
"""
|
1736
|
+
# Set column header consitant with tags groups
|
1737
|
+
new_columns_names = list(
|
1738
|
+
map(lambda x: consistantColName(CONST.CONVERSIONS_SHEET, x, sankey),
|
1739
|
+
conversions_sheet.columns))
|
1740
|
+
# Waiting for these columns
|
1741
|
+
oblig_columns = {
|
1742
|
+
CONST.CONVERSIONS_LOCATION: '',
|
1743
|
+
CONST.CONVERSIONS_PRODUCT: '',
|
1744
|
+
CONST.CONVERSIONS_NATURAL_UNIT: '',
|
1745
|
+
CONST.CONVERSIONS_FACTOR: 0.0}
|
1746
|
+
# All columns are here ?
|
1747
|
+
ok, msg = _checkNeededColumns(new_columns_names, list(oblig_columns.keys()), CONST.CONVERSIONS_SHEET)
|
1748
|
+
if not ok:
|
1749
|
+
return ok, msg
|
1750
|
+
# Ok to Update columns name with consistant names
|
1751
|
+
conversions_sheet.columns = new_columns_names
|
1752
|
+
# # Facultative columns we can have, with default value
|
1753
|
+
# facul_columns = {
|
1754
|
+
# CONST.CONVERSIONS_FACTOR_INV: 0.0}
|
1755
|
+
# # Convert columns data to default data type or None if Nan
|
1756
|
+
# ok, msg = _castColumnType(
|
1757
|
+
# conversions_sheet.iloc[1:], dict(oblig_columns, **facul_columns))
|
1758
|
+
# if not ok:
|
1759
|
+
# return ok, msg
|
1760
|
+
conversions_sheet.replace({np.nan: None}, inplace=True)
|
1761
|
+
# Update Sankey - analysis part
|
1762
|
+
nodes = []
|
1763
|
+
ok, msg = sankey.update_from_conversions_table(conversions_sheet, nodes)
|
1764
|
+
if not ok:
|
1765
|
+
return ok, msg
|
1766
|
+
# Update MFA data dict
|
1767
|
+
nodes2tooltips = {}
|
1768
|
+
nodes2units_conv = {}
|
1769
|
+
nodes2natural_unit = {}
|
1770
|
+
for node in nodes:
|
1771
|
+
for localisation in node.unit.keys():
|
1772
|
+
name = localisation + '/' + node.name
|
1773
|
+
node2tooltips = []
|
1774
|
+
node2units_conv = [1.0]
|
1775
|
+
for tooltip in sankey.tooltips.keys():
|
1776
|
+
if tooltip in node.tooltips.keys():
|
1777
|
+
node2tooltips.append(node.tooltips[tooltip].content)
|
1778
|
+
else:
|
1779
|
+
node2tooltips.append(None)
|
1780
|
+
for unit in sankey.units.keys():
|
1781
|
+
other_factors = node.get_other_factors(localisation)
|
1782
|
+
try:
|
1783
|
+
node2units_conv.append(other_factors[unit])
|
1784
|
+
except Exception:
|
1785
|
+
node2units_conv.append(None)
|
1786
|
+
nodes2tooltips[name] = node2tooltips
|
1787
|
+
nodes2units_conv[name] = node2units_conv
|
1788
|
+
nodes2natural_unit[name] = node.get_natural_unit(localisation)
|
1789
|
+
mfa_dict[CONST.CONVERSIONS_SHEET] = {
|
1790
|
+
'tooltip_names': [[name, desc] for name, desc in sankey.tooltips.items()],
|
1791
|
+
'units_names': [[name, desc] for name, desc in sankey.units.items()],
|
1792
|
+
'nodes2tooltips': nodes2tooltips,
|
1793
|
+
'nodes2units_conv': nodes2units_conv,
|
1794
|
+
'nodes2natural_unit': nodes2natural_unit}
|
1795
|
+
return True, ''
|
1796
|
+
|
1797
|
+
|
1798
|
+
def write_excel_from_sankey(
|
1799
|
+
excel_filename: str,
|
1800
|
+
sankey: Sankey,
|
1801
|
+
mode: str = 'a',
|
1802
|
+
sheets_to_remove__names: list = [],
|
1803
|
+
**kwargs
|
1804
|
+
):
|
1805
|
+
"""
|
1806
|
+
_summary_
|
1807
|
+
|
1808
|
+
Parameters
|
1809
|
+
----------
|
1810
|
+
:param excel_filename: Name of Excel file to write
|
1811
|
+
:type excel_filename: str
|
1812
|
+
|
1813
|
+
:param sankey: Sankey structure to write to Excel file
|
1814
|
+
:type sankey: Sankey
|
1815
|
+
|
1816
|
+
Optional parameters
|
1817
|
+
-------------------
|
1818
|
+
:param mode: Writing mode (see pandas.ExcelWriter for more infos)
|
1819
|
+
:type mode: str, optional (defaults to 'a')
|
1820
|
+
|
1821
|
+
:param sheets_to_remove__names: List of sheets (by name) to remove for Excel file if they are present
|
1822
|
+
:type sheets_to_remove__names: list[str, ...], optional (defaults to [])
|
1823
|
+
|
1824
|
+
Hidden parameters
|
1825
|
+
-----------------
|
1826
|
+
:param additional_sheets: Dict of tables (pandas.DataFrame) to add in Excel file
|
1827
|
+
:type additional_sheets: Dict{str: pandas.DataFrame}
|
1828
|
+
"""
|
1829
|
+
# Post-process function
|
1830
|
+
def _post_process_excel_file(
|
1831
|
+
excel_file
|
1832
|
+
):
|
1833
|
+
# Extract excel book
|
1834
|
+
excel = excel_file.book
|
1835
|
+
# Remove sheets
|
1836
|
+
for sheet_to_remove__name in sheets_to_remove__names:
|
1837
|
+
sheets = excel._sheets
|
1838
|
+
try:
|
1839
|
+
sheet_to_remove__id = sheets.index(excel[sheet_to_remove__name])
|
1840
|
+
sheet = sheets.pop(sheet_to_remove__id)
|
1841
|
+
except Exception:
|
1842
|
+
pass
|
1843
|
+
# Read-me sheet must always be the first sheet
|
1844
|
+
try:
|
1845
|
+
read_me_sheet__id = excel.worksheets.index(excel['READ ME'])
|
1846
|
+
sheet = sheets.pop(read_me_sheet__id)
|
1847
|
+
sheets.insert(0, sheet)
|
1848
|
+
except Exception:
|
1849
|
+
pass
|
1850
|
+
# File is open and saved by xlwings to activate the formulas.
|
1851
|
+
# if has_xl_wings:
|
1852
|
+
# try:
|
1853
|
+
# app = xl.App(visible=False)
|
1854
|
+
# book = app.books.open(excel_filename)
|
1855
|
+
# book.save()
|
1856
|
+
# app.kill()
|
1857
|
+
# except Exception:
|
1858
|
+
# pass
|
1859
|
+
# Write sheets from sankey
|
1860
|
+
if mode == 'a':
|
1861
|
+
with pd.ExcelWriter(excel_filename, engine='openpyxl', mode=mode, if_sheet_exists='replace') as excel_file:
|
1862
|
+
sankey.write_in_excel_file(excel_file, **kwargs)
|
1863
|
+
_post_process_excel_file(excel_file)
|
1864
|
+
else:
|
1865
|
+
with pd.ExcelWriter(excel_filename, engine='openpyxl', mode=mode) as excel_file:
|
1866
|
+
sankey.write_in_excel_file(excel_file, **kwargs)
|
1867
|
+
_post_process_excel_file(excel_file)
|