mrio-toolbox 1.1.2__py3-none-any.whl → 1.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mrio-toolbox might be problematic. Click here for more details.
- {mrio_toolbox-1.1.2.dist-info → mrio_toolbox-1.1.3.dist-info}/METADATA +1 -1
- mrio_toolbox-1.1.3.dist-info/RECORD +5 -0
- mrio_toolbox-1.1.3.dist-info/top_level.txt +1 -0
- __init__.py +0 -21
- _parts/_Axe.py +0 -539
- _parts/_Part.py +0 -1739
- _parts/__init__.py +0 -7
- _parts/part_operations.py +0 -57
- extractors/__init__.py +0 -20
- extractors/downloaders.py +0 -36
- extractors/emerging/__init__.py +0 -3
- extractors/emerging/emerging_extractor.py +0 -117
- extractors/eora/__init__.py +0 -3
- extractors/eora/eora_extractor.py +0 -132
- extractors/exiobase/__init__.py +0 -3
- extractors/exiobase/exiobase_extractor.py +0 -270
- extractors/extractors.py +0 -81
- extractors/figaro/__init__.py +0 -3
- extractors/figaro/figaro_downloader.py +0 -280
- extractors/figaro/figaro_extractor.py +0 -187
- extractors/gloria/__init__.py +0 -3
- extractors/gloria/gloria_extractor.py +0 -202
- extractors/gtap11/__init__.py +0 -7
- extractors/gtap11/extraction/__init__.py +0 -3
- extractors/gtap11/extraction/extractor.py +0 -129
- extractors/gtap11/extraction/harpy_files/__init__.py +0 -6
- extractors/gtap11/extraction/harpy_files/_header_sets.py +0 -279
- extractors/gtap11/extraction/harpy_files/har_file.py +0 -262
- extractors/gtap11/extraction/harpy_files/har_file_io.py +0 -974
- extractors/gtap11/extraction/harpy_files/header_array.py +0 -300
- extractors/gtap11/extraction/harpy_files/sl4.py +0 -229
- extractors/gtap11/gtap_mrio/__init__.py +0 -6
- extractors/gtap11/gtap_mrio/mrio_builder.py +0 -158
- extractors/icio/__init__.py +0 -3
- extractors/icio/icio_extractor.py +0 -121
- extractors/wiod/__init__.py +0 -3
- extractors/wiod/wiod_extractor.py +0 -143
- mrio.py +0 -899
- mrio_toolbox-1.1.2.dist-info/RECORD +0 -59
- mrio_toolbox-1.1.2.dist-info/top_level.txt +0 -6
- msm/__init__.py +0 -6
- msm/multi_scale_mapping.py +0 -863
- utils/__init__.py +0 -3
- utils/converters/__init__.py +0 -5
- utils/converters/pandas.py +0 -244
- utils/converters/xarray.py +0 -132
- utils/formatting/__init__.py +0 -0
- utils/formatting/formatter.py +0 -527
- utils/loaders/__init__.py +0 -7
- utils/loaders/_loader.py +0 -312
- utils/loaders/_loader_factory.py +0 -96
- utils/loaders/_nc_loader.py +0 -184
- utils/loaders/_np_loader.py +0 -112
- utils/loaders/_pandas_loader.py +0 -128
- utils/loaders/_parameter_loader.py +0 -386
- utils/savers/__init__.py +0 -11
- utils/savers/_path_checker.py +0 -37
- utils/savers/_to_folder.py +0 -165
- utils/savers/_to_nc.py +0 -60
- {mrio_toolbox-1.1.2.dist-info → mrio_toolbox-1.1.3.dist-info}/WHEEL +0 -0
- {mrio_toolbox-1.1.2.dist-info → mrio_toolbox-1.1.3.dist-info}/licenses/LICENSE +0 -0
utils/__init__.py
DELETED
utils/converters/__init__.py
DELETED
utils/converters/pandas.py
DELETED
|
@@ -1,244 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Routines for converting between Pandas DataFrames and Parts objects.
|
|
3
|
-
"""
|
|
4
|
-
|
|
5
|
-
import pandas as pd
|
|
6
|
-
import numpy as np
|
|
7
|
-
|
|
8
|
-
def to_pandas(part):
|
|
9
|
-
"""Return the current Part object as a Pandas DataFrame
|
|
10
|
-
|
|
11
|
-
Only applicable to Parts objects with 1 or 2 dimensions.
|
|
12
|
-
"""
|
|
13
|
-
if part.ndim==2:
|
|
14
|
-
return pd.DataFrame(part.data,
|
|
15
|
-
index = part.axes[0].label(True),
|
|
16
|
-
columns = part.axes[1].label(True))
|
|
17
|
-
if part.ndim==1:
|
|
18
|
-
return pd.DataFrame(part.data,index = part.axes[0].label(True))
|
|
19
|
-
return to_pandas(part.flatten())
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
def make_part(df,name="from_df",
|
|
23
|
-
label_detection=False,
|
|
24
|
-
**kwargs):
|
|
25
|
-
"""Load a Part object from a Pandas DataFrame
|
|
26
|
-
|
|
27
|
-
Parameters
|
|
28
|
-
----------
|
|
29
|
-
df : DataFrame
|
|
30
|
-
DataFrame to load
|
|
31
|
-
label_detection : bool, optional
|
|
32
|
-
Automatically detect labels, by default False
|
|
33
|
-
If True, the DataFrame is scanned to detect labels (defined as non-numeric data)
|
|
34
|
-
name : str, optional
|
|
35
|
-
Name of the data variable to load, by default None.
|
|
36
|
-
This can be left empty if there's a single variable in the DataFrame.
|
|
37
|
-
|
|
38
|
-
Returns
|
|
39
|
-
-------
|
|
40
|
-
dict
|
|
41
|
-
Data required to create the Part object
|
|
42
|
-
"""
|
|
43
|
-
part_data = dict()
|
|
44
|
-
if label_detection:
|
|
45
|
-
df = autodecode_labels(df)
|
|
46
|
-
part_data["data"] = df.to_numpy()
|
|
47
|
-
ndim = df.ndim
|
|
48
|
-
|
|
49
|
-
labels = []
|
|
50
|
-
if ndim == 1:
|
|
51
|
-
labels.append(convert_labels(df.index))
|
|
52
|
-
else:
|
|
53
|
-
labels.append(convert_labels(df.index))
|
|
54
|
-
labels.append(convert_labels(df.columns))
|
|
55
|
-
labels = disambiguate_labels(labels)
|
|
56
|
-
part_data["labels"] = labels
|
|
57
|
-
part_data["groupings"] = kwargs.pop("groupings",dict())
|
|
58
|
-
part_data["metadata"] = kwargs.pop("metadata",dict())
|
|
59
|
-
part_data["name"] = name
|
|
60
|
-
for key in kwargs:
|
|
61
|
-
part_data["metadata"][key] = kwargs[key]
|
|
62
|
-
return part_data
|
|
63
|
-
|
|
64
|
-
def autodecode_labels(df):
|
|
65
|
-
"""Automatically detect the labels from a DataFrame
|
|
66
|
-
|
|
67
|
-
This is done by indentifying the indices and columns
|
|
68
|
-
with non-numeric values.
|
|
69
|
-
"""
|
|
70
|
-
def test_selection(df,row,col):
|
|
71
|
-
"""Test if a selection is numeric"""
|
|
72
|
-
try:
|
|
73
|
-
for col in df.iloc[row:,col]:
|
|
74
|
-
pd.to_numeric(col)
|
|
75
|
-
return True
|
|
76
|
-
except ValueError:
|
|
77
|
-
return False
|
|
78
|
-
|
|
79
|
-
def try_reduce(df,row,col):
|
|
80
|
-
"""Try reducing the rectangle to the right or down"""
|
|
81
|
-
if test_selection(df,row+1,col):
|
|
82
|
-
return row+1,col
|
|
83
|
-
elif test_selection(df,row,col+1):
|
|
84
|
-
return row,col+1
|
|
85
|
-
else:
|
|
86
|
-
return row+1,col+1
|
|
87
|
-
|
|
88
|
-
def try_expand(df,row,col):
|
|
89
|
-
"""Try expanding the rectangle to the left or up"""
|
|
90
|
-
if not test_selection(df,row+1,col):
|
|
91
|
-
return row+1,col
|
|
92
|
-
elif not test_selection(df,row,col+1):
|
|
93
|
-
return row,col+1
|
|
94
|
-
else:
|
|
95
|
-
return row, col
|
|
96
|
-
|
|
97
|
-
def find_rectangle(df):
|
|
98
|
-
"""Find the largest rectangle with only numeric data"""
|
|
99
|
-
row = 0
|
|
100
|
-
col = 0
|
|
101
|
-
while not test_selection(df,row,col):
|
|
102
|
-
row,col = try_reduce(df,row,col)
|
|
103
|
-
while not test_selection(df,row,col):
|
|
104
|
-
#After the first while loop, we found only numeric data
|
|
105
|
-
#We now expand to the top and the left
|
|
106
|
-
#To make sure we didn't crop numerical data
|
|
107
|
-
row,col = try_expand(df,row,col)
|
|
108
|
-
return row,col
|
|
109
|
-
|
|
110
|
-
#First, we find the largest rectangle with only numeric data
|
|
111
|
-
row,col = find_rectangle(df)
|
|
112
|
-
|
|
113
|
-
#And we remove potential nan axes and ensure types are ok
|
|
114
|
-
data = pd.DataFrame(
|
|
115
|
-
data=df.iloc[row:,col:],
|
|
116
|
-
dtype=np.float64)
|
|
117
|
-
|
|
118
|
-
#We count Nan axes as they offset label names
|
|
119
|
-
row_offset = data.map(
|
|
120
|
-
np.isnan
|
|
121
|
-
).all(1).sum()
|
|
122
|
-
col_offset = data.map(
|
|
123
|
-
np.isnan
|
|
124
|
-
).all(0).sum()
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
data = data.dropna(axis=0,how="all")
|
|
128
|
-
data = data.dropna(axis=1,how="all")
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
#Then, we build the labels
|
|
132
|
-
if col>0:
|
|
133
|
-
col_names = df.iloc[:row,col-1+col_offset].to_list()
|
|
134
|
-
if row > 1:
|
|
135
|
-
labels = []
|
|
136
|
-
sel = df.iloc[:row,col:].transpose()
|
|
137
|
-
for column in sel.columns:
|
|
138
|
-
labels.append(sel[column].dropna().unique())
|
|
139
|
-
columns = pd.MultiIndex.from_product(
|
|
140
|
-
labels,
|
|
141
|
-
names = col_names)
|
|
142
|
-
else:
|
|
143
|
-
columns = pd.Index(
|
|
144
|
-
df.iloc[
|
|
145
|
-
:row,col:
|
|
146
|
-
].values.flatten(),
|
|
147
|
-
name = col_names[0]
|
|
148
|
-
)
|
|
149
|
-
|
|
150
|
-
else:
|
|
151
|
-
columns = None
|
|
152
|
-
if row > 0:
|
|
153
|
-
index_names = df.iloc[row-1+row_offset,:col].to_list()
|
|
154
|
-
if col > 1:
|
|
155
|
-
labels = []
|
|
156
|
-
sel = df.iloc[row+row_offset:,:col]
|
|
157
|
-
for column in sel.columns:
|
|
158
|
-
labels.append(
|
|
159
|
-
list(sel[column].dropna().unique())
|
|
160
|
-
)
|
|
161
|
-
index = pd.MultiIndex.from_product(
|
|
162
|
-
labels,
|
|
163
|
-
names = index_names)
|
|
164
|
-
else:
|
|
165
|
-
index = pd.Index(
|
|
166
|
-
list(
|
|
167
|
-
df.iloc[
|
|
168
|
-
row:,:col
|
|
169
|
-
].values.flatten()
|
|
170
|
-
),
|
|
171
|
-
name = index_names[0]
|
|
172
|
-
)
|
|
173
|
-
else:
|
|
174
|
-
index = None
|
|
175
|
-
|
|
176
|
-
#We build the formatted DataFrame
|
|
177
|
-
output = pd.DataFrame(
|
|
178
|
-
data = data.values,
|
|
179
|
-
columns=columns,
|
|
180
|
-
index = index
|
|
181
|
-
)
|
|
182
|
-
|
|
183
|
-
return output
|
|
184
|
-
|
|
185
|
-
def convert_labels(index):
|
|
186
|
-
"""Convert a Pandas Index to a dictionary of labels
|
|
187
|
-
|
|
188
|
-
Parameters
|
|
189
|
-
----------
|
|
190
|
-
index : Index
|
|
191
|
-
Pandas Index to convert
|
|
192
|
-
"""
|
|
193
|
-
output = []
|
|
194
|
-
if isinstance(index,pd.MultiIndex):
|
|
195
|
-
for i in range(index.nlevels):
|
|
196
|
-
name = index.names[i]
|
|
197
|
-
if name is None:
|
|
198
|
-
name = f"level_{i}"
|
|
199
|
-
output.append(
|
|
200
|
-
{name : list(index.levels[i].values)}
|
|
201
|
-
)
|
|
202
|
-
return output
|
|
203
|
-
if index.name is None:
|
|
204
|
-
return [{0:list(index.array)}]
|
|
205
|
-
return [{index.name:list(index.array)}]
|
|
206
|
-
|
|
207
|
-
def disambiguate_labels(labels):
|
|
208
|
-
"""Disambiguate the labels
|
|
209
|
-
|
|
210
|
-
This allow solving labels ambiguity if the name was incorrectly loaded.
|
|
211
|
-
|
|
212
|
-
Parameters
|
|
213
|
-
----------
|
|
214
|
-
index : dict of str:list of str
|
|
215
|
-
New index to disambiguate
|
|
216
|
-
labels : list of str:list of str
|
|
217
|
-
List of labels to disambiguate
|
|
218
|
-
"""
|
|
219
|
-
ordered = []
|
|
220
|
-
cleared = dict()
|
|
221
|
-
flat_labels = [label_dim for label in labels for label_dim in label]
|
|
222
|
-
values = []
|
|
223
|
-
for label in labels:
|
|
224
|
-
ordered.append([])
|
|
225
|
-
for level in range(len(label)):
|
|
226
|
-
name,value = list(
|
|
227
|
-
label[level].keys()
|
|
228
|
-
)[0],list(
|
|
229
|
-
label[level].values()
|
|
230
|
-
)[0]
|
|
231
|
-
if name not in cleared.keys():
|
|
232
|
-
if value in values:
|
|
233
|
-
#We have a duplicate
|
|
234
|
-
#We use the first occurrence as reference
|
|
235
|
-
ref_name = cleared.keys()[list(cleared.values()).index(value)]
|
|
236
|
-
ordered[-1].append(
|
|
237
|
-
{ref_name:value}
|
|
238
|
-
)
|
|
239
|
-
cleared[name] = value
|
|
240
|
-
ordered[-1].append(label[level])
|
|
241
|
-
cleared[name] = value
|
|
242
|
-
values.append(value)
|
|
243
|
-
|
|
244
|
-
return ordered
|
utils/converters/xarray.py
DELETED
|
@@ -1,132 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Routines for converting between xarray DataArrays and Parts objects.
|
|
3
|
-
|
|
4
|
-
"""
|
|
5
|
-
import pandas as pd
|
|
6
|
-
import xarray as xr
|
|
7
|
-
import numpy as np
|
|
8
|
-
|
|
9
|
-
def to_DataArray(part):
|
|
10
|
-
"""
|
|
11
|
-
Convert a Part object to an xarray DataArray
|
|
12
|
-
|
|
13
|
-
Labels are directly passed to the DataArray as coords.
|
|
14
|
-
|
|
15
|
-
Returns
|
|
16
|
-
-------
|
|
17
|
-
xr.DataArray
|
|
18
|
-
Corresponding DataArray
|
|
19
|
-
"""
|
|
20
|
-
developed = part.develop(squeeze=False) #Force non-squeeze to keep dimensions
|
|
21
|
-
old_dims = part.get_dimensions()
|
|
22
|
-
new_dims = developed.get_dimensions()
|
|
23
|
-
attrs = dict()
|
|
24
|
-
if old_dims != new_dims:
|
|
25
|
-
#We code the original dimensions in the metadata
|
|
26
|
-
#Because netcdf files do not support multi-level attributes
|
|
27
|
-
original_dims = [
|
|
28
|
-
dim for axe in old_dims for dim in axe+["_sep_"]
|
|
29
|
-
]
|
|
30
|
-
attrs["_original_dimensions"] = original_dims[:-1]
|
|
31
|
-
#The last bit removes the last separator
|
|
32
|
-
coords = list()
|
|
33
|
-
for axe in developed.axes:
|
|
34
|
-
coords.append(
|
|
35
|
-
axe.label(True)
|
|
36
|
-
)
|
|
37
|
-
attrs.update(part.metadata)
|
|
38
|
-
return xr.DataArray(
|
|
39
|
-
data = developed.data,
|
|
40
|
-
name = part.name,
|
|
41
|
-
attrs = attrs,
|
|
42
|
-
coords = coords
|
|
43
|
-
)
|
|
44
|
-
|
|
45
|
-
def to_DataSet(mrio):
|
|
46
|
-
ds = xr.Dataset(
|
|
47
|
-
attrs = mrio.metadata,
|
|
48
|
-
coords = mrio.labels
|
|
49
|
-
)
|
|
50
|
-
for part in mrio.parts:
|
|
51
|
-
ds[part] = mrio.parts[part].to_xarray()
|
|
52
|
-
return ds
|
|
53
|
-
|
|
54
|
-
def make_part(data,**kwargs):
|
|
55
|
-
"""
|
|
56
|
-
Load a Part object from an xarray DataArray
|
|
57
|
-
|
|
58
|
-
Parameters
|
|
59
|
-
----------
|
|
60
|
-
data : DataArray
|
|
61
|
-
Part object to load
|
|
62
|
-
name : str, optional
|
|
63
|
-
Name of the data variable to load, by default None.
|
|
64
|
-
This can be left empty if there's a single variable in the DataArray.
|
|
65
|
-
|
|
66
|
-
Returns
|
|
67
|
-
-------
|
|
68
|
-
dict
|
|
69
|
-
Data required to create the Part object
|
|
70
|
-
"""
|
|
71
|
-
part_data = dict()
|
|
72
|
-
|
|
73
|
-
if isinstance(data,xr.Dataset):
|
|
74
|
-
#Extract the data from the Dataset
|
|
75
|
-
list_vars = list(data.data_vars)
|
|
76
|
-
if len(list_vars) > 1:
|
|
77
|
-
#In ambiguous cases, the name must be provided
|
|
78
|
-
name = kwargs.get("name",None)
|
|
79
|
-
else:
|
|
80
|
-
name = list_vars[0]
|
|
81
|
-
data = data[name]
|
|
82
|
-
elif isinstance(data,xr.DataArray):
|
|
83
|
-
name = data.name
|
|
84
|
-
|
|
85
|
-
part_data["data"] = data.to_numpy()
|
|
86
|
-
|
|
87
|
-
#Format the labels
|
|
88
|
-
labels = []
|
|
89
|
-
for key in data.dims:
|
|
90
|
-
label = dict()
|
|
91
|
-
index = data.indexes[key]
|
|
92
|
-
label[index.name] = index.values.tolist()
|
|
93
|
-
labels.append(label)
|
|
94
|
-
part_data["name"] = name
|
|
95
|
-
part_data["labels"] = labels
|
|
96
|
-
part_data["metadata"] = kwargs.get("metadata",dict())
|
|
97
|
-
for attr in data.attrs:
|
|
98
|
-
#Add metadata
|
|
99
|
-
part_data["metadata"][attr] = data.attrs[attr]
|
|
100
|
-
part_data["groupings"] = kwargs.get("groupings",dict())
|
|
101
|
-
return part_data
|
|
102
|
-
|
|
103
|
-
def make_mrio(data,**kwargs):
|
|
104
|
-
"""
|
|
105
|
-
Load an MRIO object from an xarray DataSet
|
|
106
|
-
|
|
107
|
-
Parameters
|
|
108
|
-
----------
|
|
109
|
-
data : DataArray
|
|
110
|
-
Part object to load
|
|
111
|
-
|
|
112
|
-
Returns
|
|
113
|
-
-------
|
|
114
|
-
dict
|
|
115
|
-
Data required to create the Part object
|
|
116
|
-
"""
|
|
117
|
-
#Extract the data from the xarray
|
|
118
|
-
list_vars = list(data.data_vars)
|
|
119
|
-
to_load = kwargs.get("parts",list_vars)
|
|
120
|
-
|
|
121
|
-
mrio_data = dict()
|
|
122
|
-
|
|
123
|
-
labels = dict()
|
|
124
|
-
for coord in data.coords:
|
|
125
|
-
labels[coord] = data[coord].values.tolist()
|
|
126
|
-
mrio_data["labels"] = labels
|
|
127
|
-
mrio_data["groupings"] = kwargs.get("groupings",dict())
|
|
128
|
-
mrio_data["groupings"].update(data.attrs.get("groupings",dict()))
|
|
129
|
-
mrio_data["metadata"] = data.attrs
|
|
130
|
-
mrio_data["metadata"].update(kwargs.get("metadata",dict()))
|
|
131
|
-
mrio_data["parts"] = dict()
|
|
132
|
-
return {"data":mrio_data},to_load
|
utils/formatting/__init__.py
DELETED
|
File without changes
|