mrio-toolbox 1.1.2__py3-none-any.whl → 1.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mrio-toolbox might be problematic. Click here for more details.
- {mrio_toolbox-1.1.2.dist-info → mrio_toolbox-1.1.3.dist-info}/METADATA +1 -1
- mrio_toolbox-1.1.3.dist-info/RECORD +5 -0
- mrio_toolbox-1.1.3.dist-info/top_level.txt +1 -0
- __init__.py +0 -21
- _parts/_Axe.py +0 -539
- _parts/_Part.py +0 -1739
- _parts/__init__.py +0 -7
- _parts/part_operations.py +0 -57
- extractors/__init__.py +0 -20
- extractors/downloaders.py +0 -36
- extractors/emerging/__init__.py +0 -3
- extractors/emerging/emerging_extractor.py +0 -117
- extractors/eora/__init__.py +0 -3
- extractors/eora/eora_extractor.py +0 -132
- extractors/exiobase/__init__.py +0 -3
- extractors/exiobase/exiobase_extractor.py +0 -270
- extractors/extractors.py +0 -81
- extractors/figaro/__init__.py +0 -3
- extractors/figaro/figaro_downloader.py +0 -280
- extractors/figaro/figaro_extractor.py +0 -187
- extractors/gloria/__init__.py +0 -3
- extractors/gloria/gloria_extractor.py +0 -202
- extractors/gtap11/__init__.py +0 -7
- extractors/gtap11/extraction/__init__.py +0 -3
- extractors/gtap11/extraction/extractor.py +0 -129
- extractors/gtap11/extraction/harpy_files/__init__.py +0 -6
- extractors/gtap11/extraction/harpy_files/_header_sets.py +0 -279
- extractors/gtap11/extraction/harpy_files/har_file.py +0 -262
- extractors/gtap11/extraction/harpy_files/har_file_io.py +0 -974
- extractors/gtap11/extraction/harpy_files/header_array.py +0 -300
- extractors/gtap11/extraction/harpy_files/sl4.py +0 -229
- extractors/gtap11/gtap_mrio/__init__.py +0 -6
- extractors/gtap11/gtap_mrio/mrio_builder.py +0 -158
- extractors/icio/__init__.py +0 -3
- extractors/icio/icio_extractor.py +0 -121
- extractors/wiod/__init__.py +0 -3
- extractors/wiod/wiod_extractor.py +0 -143
- mrio.py +0 -899
- mrio_toolbox-1.1.2.dist-info/RECORD +0 -59
- mrio_toolbox-1.1.2.dist-info/top_level.txt +0 -6
- msm/__init__.py +0 -6
- msm/multi_scale_mapping.py +0 -863
- utils/__init__.py +0 -3
- utils/converters/__init__.py +0 -5
- utils/converters/pandas.py +0 -244
- utils/converters/xarray.py +0 -132
- utils/formatting/__init__.py +0 -0
- utils/formatting/formatter.py +0 -527
- utils/loaders/__init__.py +0 -7
- utils/loaders/_loader.py +0 -312
- utils/loaders/_loader_factory.py +0 -96
- utils/loaders/_nc_loader.py +0 -184
- utils/loaders/_np_loader.py +0 -112
- utils/loaders/_pandas_loader.py +0 -128
- utils/loaders/_parameter_loader.py +0 -386
- utils/savers/__init__.py +0 -11
- utils/savers/_path_checker.py +0 -37
- utils/savers/_to_folder.py +0 -165
- utils/savers/_to_nc.py +0 -60
- {mrio_toolbox-1.1.2.dist-info → mrio_toolbox-1.1.3.dist-info}/WHEEL +0 -0
- {mrio_toolbox-1.1.2.dist-info → mrio_toolbox-1.1.3.dist-info}/licenses/LICENSE +0 -0
utils/loaders/_loader.py
DELETED
|
@@ -1,312 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Central loading module for the mrio_toolbox package.
|
|
3
|
-
|
|
4
|
-
This module contains the central loading function for the mrio_toolbox package.
|
|
5
|
-
Depending on the loading mode, the function will call the appropriate loader.
|
|
6
|
-
"""
|
|
7
|
-
|
|
8
|
-
import os
|
|
9
|
-
import logging
|
|
10
|
-
import yaml
|
|
11
|
-
|
|
12
|
-
log = logging.Logger(__name__)
|
|
13
|
-
|
|
14
|
-
class Loader:
|
|
15
|
-
"""
|
|
16
|
-
Parent class for loaders in the MRIO toolbox.
|
|
17
|
-
|
|
18
|
-
The `Loader` class provides a base implementation for loading MRIO data.
|
|
19
|
-
It includes methods for extracting metadata, updating settings, and managing
|
|
20
|
-
groupings and labels. Specific loaders can inherit from this class to implement
|
|
21
|
-
format-specific loading functionality.
|
|
22
|
-
|
|
23
|
-
Instance variables
|
|
24
|
-
------------------
|
|
25
|
-
metadata : dict
|
|
26
|
-
Metadata associated with the loader.
|
|
27
|
-
labels : dict
|
|
28
|
-
Labels for the axes of the MRIO data.
|
|
29
|
-
groupings : dict
|
|
30
|
-
Groupings for the labels, defining higher-level aggregations.
|
|
31
|
-
file : str or None
|
|
32
|
-
Path to the file being loaded.
|
|
33
|
-
loader_kwargs : dict
|
|
34
|
-
Additional parameters for the loader.
|
|
35
|
-
|
|
36
|
-
Methods
|
|
37
|
-
-------
|
|
38
|
-
extract_basic_info(**kwargs):
|
|
39
|
-
Extract basic information such as path, labels, and groupings.
|
|
40
|
-
update_settings(**settings):
|
|
41
|
-
Update the loader settings with new parameters.
|
|
42
|
-
load_mrio():
|
|
43
|
-
Create an MRIO container based on the current parameters.
|
|
44
|
-
load_part(**kwargs):
|
|
45
|
-
Load an MRIO Part based on new or existing parameters.
|
|
46
|
-
set_groupings(groupings):
|
|
47
|
-
Update the groupings attribute of the loader.
|
|
48
|
-
update_attributes(**kwargs):
|
|
49
|
-
Update the current attributes of the loader.
|
|
50
|
-
load_groupings(file, dimension=None, path=None):
|
|
51
|
-
Load groupings from a file.
|
|
52
|
-
set_labels(labels):
|
|
53
|
-
Update the labels attribute of the loader.
|
|
54
|
-
available_parts(**kwargs):
|
|
55
|
-
Return the available parts in the MRIO data.
|
|
56
|
-
check_instructions(**kwargs):
|
|
57
|
-
Interpret the file argument for loading a part and check for instruction consistency.
|
|
58
|
-
|
|
59
|
-
Notes
|
|
60
|
-
-----
|
|
61
|
-
This class is intended to be used as a base class for specific loaders.
|
|
62
|
-
It provides general functionality for managing metadata, labels, and groupings,
|
|
63
|
-
but does not implement actual data loading.
|
|
64
|
-
"""
|
|
65
|
-
def __init__(
|
|
66
|
-
self
|
|
67
|
-
):
|
|
68
|
-
"""
|
|
69
|
-
Initialize a Loader object.
|
|
70
|
-
|
|
71
|
-
Notes
|
|
72
|
-
-----
|
|
73
|
-
Loaders are created with format-specific parameters. They hold metadata and methods to load MRIO data.
|
|
74
|
-
A loader is created using the base class if no specific loader is required,
|
|
75
|
-
i.e., if the data is directly loaded from dict, pandas or xarray.
|
|
76
|
-
In that case, the loader will fail when used,
|
|
77
|
-
triggering the creation of a specific loader.
|
|
78
|
-
"""
|
|
79
|
-
self.load_mrio()
|
|
80
|
-
|
|
81
|
-
def extract_basic_info(self,**kwargs):
|
|
82
|
-
"""
|
|
83
|
-
Extract basic information from the loader.
|
|
84
|
-
|
|
85
|
-
The function will extract the path, labels and groupings from the loader.
|
|
86
|
-
"""
|
|
87
|
-
self.loader_kwargs = kwargs.pop("loader_kwargs",dict())
|
|
88
|
-
self.file = kwargs.get("file",None)
|
|
89
|
-
self.groupings = kwargs.get("groupings",dict())
|
|
90
|
-
self.labels = kwargs.get("labels",dict())
|
|
91
|
-
#Remaining kwargs are metadata
|
|
92
|
-
self.metadata = kwargs
|
|
93
|
-
if isinstance(self.groupings,str):
|
|
94
|
-
self.groupings = self.load_groupings(self.groupings)
|
|
95
|
-
|
|
96
|
-
def update_settings(self,**settings):
|
|
97
|
-
"""
|
|
98
|
-
Update the loader settings with new parameters
|
|
99
|
-
"""
|
|
100
|
-
self.loader_kwargs.update(
|
|
101
|
-
settings.pop("loader_kwargs",dict())
|
|
102
|
-
)
|
|
103
|
-
self.groupings.update(
|
|
104
|
-
settings.pop("groupings",dict())
|
|
105
|
-
)
|
|
106
|
-
self.labels.update(
|
|
107
|
-
settings.pop("labels",dict())
|
|
108
|
-
)
|
|
109
|
-
self.metadata.update(
|
|
110
|
-
settings.pop("metadata",dict())
|
|
111
|
-
)
|
|
112
|
-
self.metadata.update(settings)
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
def load_mrio(
|
|
116
|
-
self
|
|
117
|
-
):
|
|
118
|
-
"""
|
|
119
|
-
Create an MRIO container based on the new parameters
|
|
120
|
-
|
|
121
|
-
Returns
|
|
122
|
-
-------
|
|
123
|
-
dict
|
|
124
|
-
Dictionary of MRIO metadata
|
|
125
|
-
"""
|
|
126
|
-
self.metadata = dict()
|
|
127
|
-
self.labels = dict()
|
|
128
|
-
self.groupings = dict()
|
|
129
|
-
self.file = None
|
|
130
|
-
pass
|
|
131
|
-
|
|
132
|
-
def load_part(
|
|
133
|
-
self,
|
|
134
|
-
**kwargs
|
|
135
|
-
):
|
|
136
|
-
"""
|
|
137
|
-
Load an MRIO Part based on new or existing parameters
|
|
138
|
-
|
|
139
|
-
Returns
|
|
140
|
-
-------
|
|
141
|
-
dict
|
|
142
|
-
Dictionary containing the Part data
|
|
143
|
-
"""
|
|
144
|
-
raise FileNotFoundError("No proper loader was initialised.\n"+\
|
|
145
|
-
"The loader needs to be reloaded with new instructions.")
|
|
146
|
-
|
|
147
|
-
def set_groupings(self,groupings):
|
|
148
|
-
"""
|
|
149
|
-
Update the groupings attribute of the loader
|
|
150
|
-
|
|
151
|
-
Parameters
|
|
152
|
-
----------
|
|
153
|
-
groupings : dict of dict of str
|
|
154
|
-
Aggregation on labels
|
|
155
|
-
"""
|
|
156
|
-
self.groupings = groupings
|
|
157
|
-
|
|
158
|
-
def update_attributes(self,**kwargs):
|
|
159
|
-
"""
|
|
160
|
-
Update the current attributes of the loader.
|
|
161
|
-
|
|
162
|
-
The function will update the groupings, paths, labels and metadata attributes.
|
|
163
|
-
"""
|
|
164
|
-
if "groupings" in kwargs:
|
|
165
|
-
log.debug("Update groupings")
|
|
166
|
-
self.groupings = kwargs.pop("groupings",self.groupings)
|
|
167
|
-
|
|
168
|
-
self.extract_path(update=True,**kwargs)
|
|
169
|
-
|
|
170
|
-
if "labels" in kwargs:
|
|
171
|
-
log.debug("Update labels")
|
|
172
|
-
self.format_labels(kwargs.pop("labels"))
|
|
173
|
-
|
|
174
|
-
for kwarg in kwargs:
|
|
175
|
-
log.debug(f"Override parameter {kwarg} with explicit parameter {kwargs[kwarg]}")
|
|
176
|
-
self.metadata[kwarg] = kwargs[kwarg]
|
|
177
|
-
|
|
178
|
-
def load_groupings(self,
|
|
179
|
-
file,
|
|
180
|
-
dimension=None,
|
|
181
|
-
path=None):
|
|
182
|
-
"""Load groupings from a file
|
|
183
|
-
|
|
184
|
-
Parameters
|
|
185
|
-
----------
|
|
186
|
-
file : str
|
|
187
|
-
Name of the file to load
|
|
188
|
-
dimension : str, optional
|
|
189
|
-
Name of the dimension to load groupings for.
|
|
190
|
-
By default (None), the file is interpreted as a preset
|
|
191
|
-
of groupings on different dimension.
|
|
192
|
-
path : path-like, optional
|
|
193
|
-
Path where the file is stored.
|
|
194
|
-
By default, the groupings are from the settings dir
|
|
195
|
-
in the working dir.
|
|
196
|
-
"""
|
|
197
|
-
def _check_groupings(groupings,dimension):
|
|
198
|
-
"""Check whether the groupings are consistent with the labels"""
|
|
199
|
-
for key in groupings.keys():
|
|
200
|
-
for item in groupings[key]:
|
|
201
|
-
if item not in self.labels[dimension]:
|
|
202
|
-
log.warning(
|
|
203
|
-
f"Item {item} not found in {dimension} labels"
|
|
204
|
-
)
|
|
205
|
-
groupings[key].remove(item)
|
|
206
|
-
if len(groupings[key])==0:
|
|
207
|
-
log.warning(f"Group {key} is empty")
|
|
208
|
-
groupings.pop(key)
|
|
209
|
-
return groupings
|
|
210
|
-
|
|
211
|
-
def load_grouping(file,level,path):
|
|
212
|
-
"""Load a single grouping file"""
|
|
213
|
-
path = os.path.join(path,level)
|
|
214
|
-
with open(os.path.join(path,file+'.txt')) as f:
|
|
215
|
-
group = f.read().splitlines()
|
|
216
|
-
return {file:group}
|
|
217
|
-
|
|
218
|
-
if path is None:
|
|
219
|
-
path = os.path.join("parameters","groupings")
|
|
220
|
-
|
|
221
|
-
#If no dimension is specified, interpret as a preset
|
|
222
|
-
output = dict()
|
|
223
|
-
if isinstance(file,str):
|
|
224
|
-
log.info("Load groupings set from "+path+file)
|
|
225
|
-
with open(os.path.join(path,file)) as f:
|
|
226
|
-
groupings = yaml.safe_load(f)
|
|
227
|
-
elif isinstance(file,dict):
|
|
228
|
-
groupings = file
|
|
229
|
-
output = self.groupings
|
|
230
|
-
|
|
231
|
-
if dimension is None:
|
|
232
|
-
dimensions = list(groupings.keys())
|
|
233
|
-
output = dict()
|
|
234
|
-
for level in dimensions:
|
|
235
|
-
if isinstance(groupings[level],dict):
|
|
236
|
-
#Case the preset explicitly defines a grouping
|
|
237
|
-
groupings[level] = _check_groupings(
|
|
238
|
-
groupings[level],level
|
|
239
|
-
)
|
|
240
|
-
output[level] = groupings[level]
|
|
241
|
-
continue
|
|
242
|
-
if isinstance(groupings[level],str):
|
|
243
|
-
groupings[level] = [groupings[level]]
|
|
244
|
-
if isinstance(groupings[level],list):
|
|
245
|
-
#Otherwise, interpret as a list of groupings
|
|
246
|
-
output[level] = dict()
|
|
247
|
-
covered = []
|
|
248
|
-
for item in groupings[level]:
|
|
249
|
-
#Load all groupings
|
|
250
|
-
groups= load_grouping(
|
|
251
|
-
item,level,path
|
|
252
|
-
)
|
|
253
|
-
if any([group in covered for group in groups]):
|
|
254
|
-
duplicate = [
|
|
255
|
-
group for group in groups if group in covered
|
|
256
|
-
]
|
|
257
|
-
log.warning("The following items are covered in "+\
|
|
258
|
-
"multiple groupings: "+duplicate)
|
|
259
|
-
covered += groups
|
|
260
|
-
output[level][item] = groups
|
|
261
|
-
return output
|
|
262
|
-
|
|
263
|
-
def set_labels(self,labels):
|
|
264
|
-
"""
|
|
265
|
-
Update the labels attribute of the loader
|
|
266
|
-
|
|
267
|
-
Parameters
|
|
268
|
-
----------
|
|
269
|
-
labels : dict of str:list of str
|
|
270
|
-
Labels of the axes
|
|
271
|
-
"""
|
|
272
|
-
self.labels = labels
|
|
273
|
-
|
|
274
|
-
def available_parts(self,**kwargs):
|
|
275
|
-
"""
|
|
276
|
-
Return the available parts in the MRIO data
|
|
277
|
-
"""
|
|
278
|
-
if self.file is None:
|
|
279
|
-
raise FileNotFoundError("No file was provided.")
|
|
280
|
-
|
|
281
|
-
def check_instructions(self,**kwargs):
|
|
282
|
-
"""
|
|
283
|
-
Interpret the file argument for loading a part.
|
|
284
|
-
|
|
285
|
-
This method solves the ambiguity between data files and optional
|
|
286
|
-
.yaml instructions.
|
|
287
|
-
If the file argument refers to an instruction file, it is compared
|
|
288
|
-
to the current instructions.
|
|
289
|
-
If the data file or instruction file differ from the ones currently loaded,
|
|
290
|
-
an exception is raised to force a reload.
|
|
291
|
-
|
|
292
|
-
Parameters
|
|
293
|
-
----------
|
|
294
|
-
file : path-like
|
|
295
|
-
User-provided file path
|
|
296
|
-
kwargs : additional arguments
|
|
297
|
-
|
|
298
|
-
Raises
|
|
299
|
-
------
|
|
300
|
-
FileNotFoundError
|
|
301
|
-
If the loader needs to be reloaded with new instructions.
|
|
302
|
-
|
|
303
|
-
"""
|
|
304
|
-
#The 'instructions' attribute is used to check if the loader needs to be reloaded
|
|
305
|
-
#It contains the reference to the potential yaml file used to load the data
|
|
306
|
-
new_instructions = kwargs.get("instructions",None)
|
|
307
|
-
ref_instructions = self.metadata.get("instructions",None)
|
|
308
|
-
if new_instructions is not None and ref_instructions != new_instructions:
|
|
309
|
-
#If the instructions differ from the current ones,
|
|
310
|
-
#trigger a reload of the loader
|
|
311
|
-
log.error("The loader needs to be reloaded with new instructions.")
|
|
312
|
-
raise FileNotFoundError("The loader needs to be reloaded with new instructions.")
|
utils/loaders/_loader_factory.py
DELETED
|
@@ -1,96 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Initialize the appropriate loader based on the provided parameters.
|
|
3
|
-
"""
|
|
4
|
-
import os
|
|
5
|
-
import yaml
|
|
6
|
-
from mrio_toolbox.utils.loaders._nc_loader import NetCDF_Loader
|
|
7
|
-
from mrio_toolbox.utils.loaders._parameter_loader import Parameter_Loader
|
|
8
|
-
from mrio_toolbox.utils.loaders._pandas_loader import Pandas_Loader
|
|
9
|
-
from mrio_toolbox.utils.loaders._loader import Loader
|
|
10
|
-
import logging
|
|
11
|
-
|
|
12
|
-
log = logging.getLogger(__name__)
|
|
13
|
-
|
|
14
|
-
def make_loader(**kwargs):
|
|
15
|
-
"""
|
|
16
|
-
Initialize the appropriate loader based on the provided parameters.
|
|
17
|
-
|
|
18
|
-
If a file or data_file is provided,
|
|
19
|
-
the function will attempt to determine the appropriate loader based on the file extension.
|
|
20
|
-
|
|
21
|
-
Namely:
|
|
22
|
-
- .nc files are loaded using the NetCDF_Loader
|
|
23
|
-
- .yaml files are interpreted as loading instructions
|
|
24
|
-
|
|
25
|
-
All non-netCDF files are loaded using the Parameter_Loader.
|
|
26
|
-
"""
|
|
27
|
-
file = kwargs.get("file",None)
|
|
28
|
-
if file is not None:
|
|
29
|
-
file = os.path.abspath(file) # Avoid issue with UNIX/windows path
|
|
30
|
-
extension = kwargs.get("extension",None)
|
|
31
|
-
|
|
32
|
-
if extension is None:
|
|
33
|
-
if file is None:
|
|
34
|
-
log.info("No file or extension provided.")
|
|
35
|
-
log.info("An empty loader will be created.")
|
|
36
|
-
return Loader()
|
|
37
|
-
extension = os.path.splitext(file)[1]
|
|
38
|
-
if extension == "":
|
|
39
|
-
log.error("File extension missing.")
|
|
40
|
-
raise ValueError("File extension missing.")
|
|
41
|
-
|
|
42
|
-
if extension == "":
|
|
43
|
-
log.error("File extension missing.")
|
|
44
|
-
raise ValueError("File extension missing.")
|
|
45
|
-
if extension == ".nc":
|
|
46
|
-
return NetCDF_Loader(**kwargs)
|
|
47
|
-
if extension in [".yaml",".yml"]:
|
|
48
|
-
return load_from_yaml(**kwargs)
|
|
49
|
-
if extension in [".npy",".txt"]:
|
|
50
|
-
return Parameter_Loader(**kwargs)
|
|
51
|
-
if extension in [".csv"]:
|
|
52
|
-
if "loader_kwargs" in kwargs:
|
|
53
|
-
pandas = kwargs["loader_kwargs"].pop(
|
|
54
|
-
"pandas",False
|
|
55
|
-
)
|
|
56
|
-
if pandas:
|
|
57
|
-
return Pandas_Loader(**kwargs)
|
|
58
|
-
return Parameter_Loader(**kwargs)
|
|
59
|
-
if extension == ".xlsx":
|
|
60
|
-
return Pandas_Loader(**kwargs)
|
|
61
|
-
log.error(f"File extension {extension} not supported.")
|
|
62
|
-
|
|
63
|
-
def load_from_yaml(**kwargs):
|
|
64
|
-
"""
|
|
65
|
-
Create a loader based on yaml file instructions.
|
|
66
|
-
|
|
67
|
-
Parameters
|
|
68
|
-
----------
|
|
69
|
-
file : path-like
|
|
70
|
-
Full path to the .yaml file
|
|
71
|
-
"""
|
|
72
|
-
instructions = kwargs.pop("file")
|
|
73
|
-
log.info("Get loading instructions from: "+instructions)
|
|
74
|
-
with open(instructions) as f:
|
|
75
|
-
parameters = yaml.safe_load(f)
|
|
76
|
-
for kwarg in kwargs:
|
|
77
|
-
#Override parameters with kwargs
|
|
78
|
-
log.debug(f"Override file parameter {kwarg} with explicit parameter {kwargs[kwarg]}")
|
|
79
|
-
parameters[kwarg] = kwargs[kwarg]
|
|
80
|
-
|
|
81
|
-
# Error handling
|
|
82
|
-
if "path" not in parameters.keys():
|
|
83
|
-
if "file" not in parameters.keys():
|
|
84
|
-
log.info("No path provided, using current working directory instead")
|
|
85
|
-
parameters["path"] = os.getcwd()
|
|
86
|
-
elif not os.path.isdir(parameters["path"]):
|
|
87
|
-
log.error("Provided path is not a directory")
|
|
88
|
-
raise ValueError("Provided path is not a directory")
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
return make_loader(instructions=instructions,**parameters)
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
utils/loaders/_nc_loader.py
DELETED
|
@@ -1,184 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Provides the NetCDF_Loader class for loading MRIO data from netCDF files.
|
|
3
|
-
"""
|
|
4
|
-
from mrio_toolbox.utils.loaders._loader import Loader
|
|
5
|
-
from mrio_toolbox.utils import converters
|
|
6
|
-
import xarray as xr
|
|
7
|
-
|
|
8
|
-
import logging
|
|
9
|
-
import pandas as pd
|
|
10
|
-
|
|
11
|
-
log = logging.getLogger(__name__)
|
|
12
|
-
|
|
13
|
-
class NetCDF_Loader(Loader):
|
|
14
|
-
"""
|
|
15
|
-
Class for loading MRIO data from a netCDF file.
|
|
16
|
-
|
|
17
|
-
The `NetCDF_Loader` class extends the base `Loader` class to provide
|
|
18
|
-
functionality for loading MRIO data stored in netCDF format. It uses the
|
|
19
|
-
xarray library to load the data and extract metadata, labels, and groupings.
|
|
20
|
-
|
|
21
|
-
Instance variables
|
|
22
|
-
------------------
|
|
23
|
-
data : xarray.Dataset
|
|
24
|
-
The loaded netCDF data stored as an xarray Dataset.
|
|
25
|
-
_available_parts : list
|
|
26
|
-
List of available parts in the MRIO data.
|
|
27
|
-
metadata : dict
|
|
28
|
-
Metadata extracted from the netCDF file.
|
|
29
|
-
labels : dict
|
|
30
|
-
Labels for the axes of the MRIO data.
|
|
31
|
-
groupings : dict
|
|
32
|
-
Groupings for the labels, defining higher-level aggregations.
|
|
33
|
-
file : str or None
|
|
34
|
-
Path to the netCDF file being loaded.
|
|
35
|
-
loader_kwargs : dict
|
|
36
|
-
Additional parameters passed to the xarray loader.
|
|
37
|
-
|
|
38
|
-
Methods
|
|
39
|
-
-------
|
|
40
|
-
load_mrio(file=None, **kwargs):
|
|
41
|
-
Load a netCDF file into memory and extract metadata.
|
|
42
|
-
load_part(file=None, **kwargs):
|
|
43
|
-
Load a specific part of the MRIO table.
|
|
44
|
-
get_file(file=None, **kwargs):
|
|
45
|
-
Get the file to load, updating the current file if necessary.
|
|
46
|
-
available_parts(**kwargs):
|
|
47
|
-
Return a list of available parts in the MRIO table.
|
|
48
|
-
"""
|
|
49
|
-
|
|
50
|
-
def __init__(
|
|
51
|
-
self,
|
|
52
|
-
**kwargs
|
|
53
|
-
):
|
|
54
|
-
"""
|
|
55
|
-
Initialize a NetCDF_Loader object.
|
|
56
|
-
|
|
57
|
-
Parameters
|
|
58
|
-
----------
|
|
59
|
-
loader_kwargs : dict, optional
|
|
60
|
-
Parameters passed to the xarray loader.
|
|
61
|
-
file : path-like
|
|
62
|
-
Full path to the netCDF file.
|
|
63
|
-
groupings : dict, optional
|
|
64
|
-
Aggregation on labels
|
|
65
|
-
**kwargs : dict
|
|
66
|
-
Metadata for the MRIO data.
|
|
67
|
-
MRIO metadata are passed to associated parts.
|
|
68
|
-
|
|
69
|
-
"""
|
|
70
|
-
self.extract_basic_info(**kwargs)
|
|
71
|
-
super().__init__()
|
|
72
|
-
self.update_settings(**kwargs)
|
|
73
|
-
|
|
74
|
-
def load_mrio(
|
|
75
|
-
self,
|
|
76
|
-
file = None,
|
|
77
|
-
**kwargs
|
|
78
|
-
):
|
|
79
|
-
"""
|
|
80
|
-
Load a netcdf file in the memory.
|
|
81
|
-
|
|
82
|
-
This procedure is based on the xarray library.
|
|
83
|
-
The xarray dataset is stored in the data attribute.
|
|
84
|
-
The loader also extracts all metadata from the file.
|
|
85
|
-
|
|
86
|
-
Parameters
|
|
87
|
-
----------
|
|
88
|
-
file : path-like, optional
|
|
89
|
-
Full path to the file.
|
|
90
|
-
If left empty, the file currently initialised is used.
|
|
91
|
-
|
|
92
|
-
Raises
|
|
93
|
-
------
|
|
94
|
-
ValueError
|
|
95
|
-
If the file is not provided.
|
|
96
|
-
"""
|
|
97
|
-
|
|
98
|
-
if file is None:
|
|
99
|
-
file = self.file
|
|
100
|
-
|
|
101
|
-
if file is None:
|
|
102
|
-
raise ValueError("No file provided.")
|
|
103
|
-
|
|
104
|
-
log.info(f"Load MRIO data from {file}")
|
|
105
|
-
self.data = xr.open_dataset(file, **self.loader_kwargs)
|
|
106
|
-
mrio_data,list_of_parts = converters.xarray.make_mrio(self.data)
|
|
107
|
-
self._available_parts = list_of_parts
|
|
108
|
-
self.update_settings(**mrio_data["data"])
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
def load_part(
|
|
112
|
-
self,
|
|
113
|
-
file = None,
|
|
114
|
-
**kwargs
|
|
115
|
-
):
|
|
116
|
-
"""
|
|
117
|
-
Load a part of the MRIO table.
|
|
118
|
-
|
|
119
|
-
Parameters
|
|
120
|
-
----------
|
|
121
|
-
name : str
|
|
122
|
-
Name of the variable to load
|
|
123
|
-
file : path, optional
|
|
124
|
-
Full path to the data.
|
|
125
|
-
If left empty, the current xarray Dataset is used.
|
|
126
|
-
|
|
127
|
-
Returns
|
|
128
|
-
-------
|
|
129
|
-
dict
|
|
130
|
-
Data required to create a Part object
|
|
131
|
-
"""
|
|
132
|
-
self.get_file(file,**kwargs) #Update the file if needed
|
|
133
|
-
return converters.xarray.make_part(
|
|
134
|
-
self.data,**kwargs
|
|
135
|
-
)
|
|
136
|
-
|
|
137
|
-
def get_file(self, file=None, **kwargs):
|
|
138
|
-
"""
|
|
139
|
-
Get the file to load.
|
|
140
|
-
|
|
141
|
-
Parameters
|
|
142
|
-
----------
|
|
143
|
-
file : path-like, optional
|
|
144
|
-
User-defined path to the file, by default None
|
|
145
|
-
|
|
146
|
-
Returns
|
|
147
|
-
-------
|
|
148
|
-
path-like
|
|
149
|
-
Path to the file to load from
|
|
150
|
-
|
|
151
|
-
Raises
|
|
152
|
-
------
|
|
153
|
-
ValueError
|
|
154
|
-
If no file is provided nor currently loaded
|
|
155
|
-
|
|
156
|
-
"""
|
|
157
|
-
self.check_instructions(**kwargs)
|
|
158
|
-
#Check if new instructions are provided
|
|
159
|
-
|
|
160
|
-
if file is None and self.file is None:
|
|
161
|
-
raise ValueError("No file provided.")
|
|
162
|
-
|
|
163
|
-
instructions = self.metadata.get("instructions",None)
|
|
164
|
-
|
|
165
|
-
if file != self.file and file != instructions:
|
|
166
|
-
#If the file is different from the one currently loaded, the current data is replaced
|
|
167
|
-
self.load_mrio(file)
|
|
168
|
-
|
|
169
|
-
return file
|
|
170
|
-
|
|
171
|
-
def available_parts(
|
|
172
|
-
self,**kwargs
|
|
173
|
-
):
|
|
174
|
-
"""
|
|
175
|
-
Return a list of available parts in the MRIO table.
|
|
176
|
-
|
|
177
|
-
Returns
|
|
178
|
-
-------
|
|
179
|
-
list
|
|
180
|
-
List of available parts
|
|
181
|
-
"""
|
|
182
|
-
return self._available_parts
|
|
183
|
-
|
|
184
|
-
|
utils/loaders/_np_loader.py
DELETED
|
@@ -1,112 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Routine for loading MRIO Parts from .npy and .csv files
|
|
3
|
-
"""
|
|
4
|
-
|
|
5
|
-
import os
|
|
6
|
-
import numpy as np
|
|
7
|
-
import pandas as pd
|
|
8
|
-
import logging
|
|
9
|
-
import yaml
|
|
10
|
-
|
|
11
|
-
log = logging.getLogger(__name__)
|
|
12
|
-
|
|
13
|
-
def load_file(file,extension=None,pandas=False,**kwargs):
|
|
14
|
-
"""
|
|
15
|
-
Load data from a .npy, .txt, .xlsx or .csv file.
|
|
16
|
-
|
|
17
|
-
Parameters
|
|
18
|
-
----------
|
|
19
|
-
file : path-like
|
|
20
|
-
Full path to the file
|
|
21
|
-
kwargs : dict
|
|
22
|
-
Additional parameters for the loaders
|
|
23
|
-
|
|
24
|
-
Returns
|
|
25
|
-
-------
|
|
26
|
-
data : np.array
|
|
27
|
-
Numerical data
|
|
28
|
-
|
|
29
|
-
Raises
|
|
30
|
-
------
|
|
31
|
-
FileNotFoundError
|
|
32
|
-
If the file is not found in the specified path
|
|
33
|
-
ValueError
|
|
34
|
-
If the file extension is not supported
|
|
35
|
-
"""
|
|
36
|
-
if extension is None:
|
|
37
|
-
extension = os.path.splitext(file)[1]
|
|
38
|
-
elif os.path.splitext(file)[1] == "":
|
|
39
|
-
file = file+extension
|
|
40
|
-
elif os.path.splitext(file)[1] != extension:
|
|
41
|
-
raise FileNotFoundError(f"File {file} does not match the provided extension {extension}.")
|
|
42
|
-
if extension == "":
|
|
43
|
-
log.info("No extension provided. Trying .npy, .csv and .txt.")
|
|
44
|
-
for loader in [load_npy,load_csv,load_txt,load_xlsx]:
|
|
45
|
-
try:
|
|
46
|
-
return loader(file)
|
|
47
|
-
except FileNotFoundError:
|
|
48
|
-
pass
|
|
49
|
-
log.error(f"File {file} not found with extensions .npy, .csv or .txt.")
|
|
50
|
-
raise FileNotFoundError(f"File {file} not found in the specified path.")
|
|
51
|
-
if extension not in [".npy",".csv",".txt",".xlsx",".yaml"]:
|
|
52
|
-
log.error(f"File extension {extension} not supported.")
|
|
53
|
-
raise ValueError(f"File extension {extension} not supported.\nSupported extensions: .npy, .csv, .txt")
|
|
54
|
-
if extension == ".npy":
|
|
55
|
-
return load_npy(file,**kwargs)
|
|
56
|
-
if extension == ".csv":
|
|
57
|
-
return load_csv(file,pandas=pandas,**kwargs)
|
|
58
|
-
if extension == ".txt":
|
|
59
|
-
return load_txt(file,**kwargs)
|
|
60
|
-
if extension == ".xlsx":
|
|
61
|
-
return load_xlsx(file,**kwargs)
|
|
62
|
-
if extension == ".yaml":
|
|
63
|
-
return load_yaml(file,**kwargs)
|
|
64
|
-
|
|
65
|
-
def load_yaml(file,**kwargs):
|
|
66
|
-
if os.path.splitext(file)[1] == "":
|
|
67
|
-
file = file+".yaml"
|
|
68
|
-
with open(file,"r") as f:
|
|
69
|
-
return yaml.safe_load(f)
|
|
70
|
-
|
|
71
|
-
def load_npy(file,**kwargs):
|
|
72
|
-
if os.path.splitext(file)[1] == "":
|
|
73
|
-
file = file+".npy"
|
|
74
|
-
return np.load(file,**kwargs)
|
|
75
|
-
|
|
76
|
-
def load_csv(file,pandas=False,**kwargs):
|
|
77
|
-
"""
|
|
78
|
-
Read a .csv file using pandas or numpy.
|
|
79
|
-
|
|
80
|
-
If pandas, the file is read using pandas,
|
|
81
|
-
such that labels are automatically extracted.
|
|
82
|
-
Otherwise, the file is read using numpy and labels are loaded from another file.
|
|
83
|
-
"""
|
|
84
|
-
if os.path.splitext(file)[1] == "":
|
|
85
|
-
file = file+".csv"
|
|
86
|
-
delimiter = kwargs.get("delimiter",",")
|
|
87
|
-
if pandas:
|
|
88
|
-
#Remove header if not provided
|
|
89
|
-
#This is to avoid issues with the label autodetection
|
|
90
|
-
kwargs["header"] = kwargs.get("header",None)
|
|
91
|
-
return pd.read_csv(file,
|
|
92
|
-
**kwargs)
|
|
93
|
-
return np.loadtxt(file,delimiter=delimiter,**kwargs)
|
|
94
|
-
|
|
95
|
-
def load_txt(file,**kwargs):
|
|
96
|
-
if os.path.splitext(file)[1] == "":
|
|
97
|
-
file = file+".txt"
|
|
98
|
-
delimiter = kwargs.get("delimiter","\t")
|
|
99
|
-
try:
|
|
100
|
-
return np.loadtxt(file,delimiter=delimiter,**kwargs)
|
|
101
|
-
except ValueError:
|
|
102
|
-
#If the basic loading fails, it's probably a label file
|
|
103
|
-
return np.loadtxt(file,dtype=str,delimiter=delimiter,**kwargs).tolist()
|
|
104
|
-
|
|
105
|
-
def load_xlsx(file, **kwargs):
|
|
106
|
-
if os.path.splitext(file)[1] == "":
|
|
107
|
-
file = file+".xlsx"
|
|
108
|
-
#Remove header if not provided
|
|
109
|
-
#This is to avoid issues with the label autodetection
|
|
110
|
-
kwargs["header"] = kwargs.get("header",None)
|
|
111
|
-
return pd.read_excel(file,
|
|
112
|
-
**kwargs)
|