mrio-toolbox 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mrio-toolbox might be problematic. Click here for more details.
- mrio_toolbox/__init__.py +5 -0
- mrio_toolbox/_parts/_Axe.py +481 -0
- mrio_toolbox/_parts/_Part.py +1504 -0
- mrio_toolbox/_parts/__init__.py +3 -0
- mrio_toolbox/_parts/part_operations.py +50 -0
- mrio_toolbox/mrio.py +739 -0
- mrio_toolbox/utils/__init__.py +0 -0
- mrio_toolbox/utils/converters/__init__.py +2 -0
- mrio_toolbox/utils/converters/pandas.py +245 -0
- mrio_toolbox/utils/converters/xarray.py +141 -0
- mrio_toolbox/utils/loaders/__init__.py +3 -0
- mrio_toolbox/utils/loaders/_loader.py +256 -0
- mrio_toolbox/utils/loaders/_loader_factory.py +75 -0
- mrio_toolbox/utils/loaders/_nc_loader.py +148 -0
- mrio_toolbox/utils/loaders/_np_loader.py +112 -0
- mrio_toolbox/utils/loaders/_pandas_loader.py +102 -0
- mrio_toolbox/utils/loaders/_parameter_loader.py +341 -0
- mrio_toolbox/utils/savers/__init__.py +8 -0
- mrio_toolbox/utils/savers/_path_checker.py +19 -0
- mrio_toolbox/utils/savers/_to_folder.py +160 -0
- mrio_toolbox/utils/savers/_to_nc.py +52 -0
- mrio_toolbox-1.0.0.dist-info/LICENSE +674 -0
- mrio_toolbox-1.0.0.dist-info/METADATA +28 -0
- mrio_toolbox-1.0.0.dist-info/RECORD +26 -0
- mrio_toolbox-1.0.0.dist-info/WHEEL +5 -0
- mrio_toolbox-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import yaml
|
|
3
|
+
from mrio_toolbox.utils.loaders._nc_loader import NetCDF_Loader
|
|
4
|
+
from mrio_toolbox.utils.loaders._parameter_loader import Parameter_Loader
|
|
5
|
+
from mrio_toolbox.utils.loaders._pandas_loader import Pandas_Loader
|
|
6
|
+
from mrio_toolbox.utils.loaders._loader import Loader
|
|
7
|
+
import logging
|
|
8
|
+
|
|
9
|
+
log = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
def make_loader(**kwargs):
|
|
12
|
+
"""
|
|
13
|
+
Initialize the appropriate loader based on the provided parameters.
|
|
14
|
+
|
|
15
|
+
If a file or data_file is provided,
|
|
16
|
+
the function will attempt to determine the appropriate loader based on the file extension.
|
|
17
|
+
|
|
18
|
+
Namely:
|
|
19
|
+
- .nc files are loaded using the NetCDF_Loader
|
|
20
|
+
- .yaml files are interpreted as loading instructions
|
|
21
|
+
|
|
22
|
+
All non-netCDF files are loaded using the Parameter_Loader.
|
|
23
|
+
"""
|
|
24
|
+
file = kwargs.get("file",None)
|
|
25
|
+
extension = kwargs.get("extension",None)
|
|
26
|
+
|
|
27
|
+
if extension is None:
|
|
28
|
+
if file is None:
|
|
29
|
+
log.info("No file or extension provided.")
|
|
30
|
+
log.info("An empty loader will be created.")
|
|
31
|
+
return Loader()
|
|
32
|
+
extension = os.path.splitext(file)[1]
|
|
33
|
+
if extension == "":
|
|
34
|
+
log.error("File extension missing.")
|
|
35
|
+
raise ValueError("File extension missing.")
|
|
36
|
+
|
|
37
|
+
if extension == "":
|
|
38
|
+
log.error("File extension missing.")
|
|
39
|
+
raise ValueError("File extension missing.")
|
|
40
|
+
if extension == ".nc":
|
|
41
|
+
return NetCDF_Loader(**kwargs)
|
|
42
|
+
if extension in [".yaml",".yml"]:
|
|
43
|
+
return load_from_yaml(**kwargs)
|
|
44
|
+
if extension in [".npy",".txt"]:
|
|
45
|
+
return Parameter_Loader(**kwargs)
|
|
46
|
+
if extension in [".csv"]:
|
|
47
|
+
if "loader_kwargs" in kwargs:
|
|
48
|
+
pandas = kwargs["loader_kwargs"].pop(
|
|
49
|
+
"pandas",False
|
|
50
|
+
)
|
|
51
|
+
if pandas:
|
|
52
|
+
return Pandas_Loader(**kwargs)
|
|
53
|
+
return Parameter_Loader(**kwargs)
|
|
54
|
+
if extension == ".xlsx":
|
|
55
|
+
return Pandas_Loader(**kwargs)
|
|
56
|
+
log.error(f"File extension {extension} not supported.")
|
|
57
|
+
|
|
58
|
+
def load_from_yaml(**kwargs):
|
|
59
|
+
"""
|
|
60
|
+
Create a loader based on yaml file instructions.
|
|
61
|
+
|
|
62
|
+
Parameters
|
|
63
|
+
----------
|
|
64
|
+
file : path-like
|
|
65
|
+
Full path to the .yaml file
|
|
66
|
+
"""
|
|
67
|
+
instructions = kwargs.pop("file")
|
|
68
|
+
log.info("Get loading instructions from: "+instructions)
|
|
69
|
+
with open(instructions) as f:
|
|
70
|
+
parameters = yaml.safe_load(f)
|
|
71
|
+
for kwarg in kwargs:
|
|
72
|
+
#Override parameters with kwargs
|
|
73
|
+
log.debug(f"Override file parameter {kwarg} with explicit parameter {kwargs[kwarg]}")
|
|
74
|
+
parameters[kwarg] = kwargs[kwarg]
|
|
75
|
+
return make_loader(instructions=instructions,**parameters)
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
from mrio_toolbox.utils.loaders._loader import Loader
|
|
2
|
+
from mrio_toolbox.utils import converters
|
|
3
|
+
import xarray as xr
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import pandas as pd
|
|
7
|
+
|
|
8
|
+
log = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
class NetCDF_Loader(Loader):
|
|
11
|
+
"""
|
|
12
|
+
Class for loading MRIO data from a netCDF file.
|
|
13
|
+
"""
|
|
14
|
+
def __init__(
|
|
15
|
+
self,
|
|
16
|
+
**kwargs
|
|
17
|
+
):
|
|
18
|
+
"""
|
|
19
|
+
Loader for MRIO data in netCDF format.
|
|
20
|
+
|
|
21
|
+
Parameters
|
|
22
|
+
----------
|
|
23
|
+
loader_kwargs : dict, optional
|
|
24
|
+
Parameters passed to the xarray loader.
|
|
25
|
+
file : path-like
|
|
26
|
+
Full path to the netCDF file.
|
|
27
|
+
groupings : dict, optional
|
|
28
|
+
Aggregation on labels
|
|
29
|
+
**kwargs : dict
|
|
30
|
+
Metadata for the MRIO data.
|
|
31
|
+
MRIO metadata are passed to associated parts.
|
|
32
|
+
|
|
33
|
+
"""
|
|
34
|
+
self.extract_basic_info(**kwargs)
|
|
35
|
+
super().__init__()
|
|
36
|
+
self.update_settings(**kwargs)
|
|
37
|
+
|
|
38
|
+
def load_mrio(
|
|
39
|
+
self,
|
|
40
|
+
file = None,
|
|
41
|
+
**kwargs
|
|
42
|
+
):
|
|
43
|
+
"""
|
|
44
|
+
Load a netcdf file in the memory.
|
|
45
|
+
|
|
46
|
+
This procedure is based on the xarray library.
|
|
47
|
+
The xarray dataset is stored in the data attribute.
|
|
48
|
+
The loader also extracts all metadata from the file.
|
|
49
|
+
|
|
50
|
+
Parameters
|
|
51
|
+
----------
|
|
52
|
+
file : path-like, optional
|
|
53
|
+
Full path to the file.
|
|
54
|
+
If left empty, the file currently initialised is used.
|
|
55
|
+
|
|
56
|
+
Raises
|
|
57
|
+
------
|
|
58
|
+
ValueError
|
|
59
|
+
If the file is not provided.
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
if file is None:
|
|
63
|
+
file = self.file
|
|
64
|
+
|
|
65
|
+
if file is None:
|
|
66
|
+
raise ValueError("No file provided.")
|
|
67
|
+
|
|
68
|
+
log.info(f"Load MRIO data from {file}")
|
|
69
|
+
self.data = xr.open_dataset(file, **self.loader_kwargs)
|
|
70
|
+
mrio_data,list_of_parts = converters.xarray.make_mrio(self.data)
|
|
71
|
+
self._available_parts = list_of_parts
|
|
72
|
+
self.update_settings(**mrio_data["data"])
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def load_part(
|
|
76
|
+
self,
|
|
77
|
+
file = None,
|
|
78
|
+
**kwargs
|
|
79
|
+
):
|
|
80
|
+
"""
|
|
81
|
+
Load a part of the MRIO table.
|
|
82
|
+
|
|
83
|
+
Parameters
|
|
84
|
+
----------
|
|
85
|
+
name : str
|
|
86
|
+
Name of the variable to load
|
|
87
|
+
file : path, optional
|
|
88
|
+
Full path to the data.
|
|
89
|
+
If left empty, the current xarray Dataset is used.
|
|
90
|
+
|
|
91
|
+
Returns
|
|
92
|
+
-------
|
|
93
|
+
dict
|
|
94
|
+
Data required to create a Part object
|
|
95
|
+
"""
|
|
96
|
+
self.get_file(file,**kwargs) #Update the file if needed
|
|
97
|
+
return converters.xarray.make_part(
|
|
98
|
+
self.data,**kwargs
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
def get_file(self, file=None, **kwargs):
|
|
102
|
+
"""
|
|
103
|
+
Get the file to load.
|
|
104
|
+
|
|
105
|
+
Parameters
|
|
106
|
+
----------
|
|
107
|
+
file : path-like, optional
|
|
108
|
+
User-defined path to the file, by default None
|
|
109
|
+
|
|
110
|
+
Returns
|
|
111
|
+
-------
|
|
112
|
+
path-like
|
|
113
|
+
Path to the file to load from
|
|
114
|
+
|
|
115
|
+
Raises
|
|
116
|
+
------
|
|
117
|
+
ValueError
|
|
118
|
+
If no file is provided nor currently loaded
|
|
119
|
+
|
|
120
|
+
"""
|
|
121
|
+
self.check_instructions(**kwargs)
|
|
122
|
+
#Check if new instructions are provided
|
|
123
|
+
|
|
124
|
+
if file is None and self.file is None:
|
|
125
|
+
raise ValueError("No file provided.")
|
|
126
|
+
|
|
127
|
+
instructions = self.metadata.get("instructions",None)
|
|
128
|
+
|
|
129
|
+
if file != self.file and file != instructions:
|
|
130
|
+
#If the file is different from the one currently loaded, the current data is replaced
|
|
131
|
+
self.load_mrio(file)
|
|
132
|
+
|
|
133
|
+
return file
|
|
134
|
+
|
|
135
|
+
def available_parts(
|
|
136
|
+
self,**kwargs
|
|
137
|
+
):
|
|
138
|
+
"""
|
|
139
|
+
Return a list of available parts in the MRIO table.
|
|
140
|
+
|
|
141
|
+
Returns
|
|
142
|
+
-------
|
|
143
|
+
list
|
|
144
|
+
List of available parts
|
|
145
|
+
"""
|
|
146
|
+
return self._available_parts
|
|
147
|
+
|
|
148
|
+
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Routine for loading MRIO Parts from .npy and .csv files
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import numpy as np
|
|
7
|
+
import pandas as pd
|
|
8
|
+
import logging
|
|
9
|
+
import yaml
|
|
10
|
+
|
|
11
|
+
log = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
def load_file(file,extension=None,pandas=False,**kwargs):
|
|
14
|
+
"""
|
|
15
|
+
Load data from a .npy, .txt, .xlsx or .csv file.
|
|
16
|
+
|
|
17
|
+
Parameters
|
|
18
|
+
----------
|
|
19
|
+
file : path-like
|
|
20
|
+
Full path to the file
|
|
21
|
+
kwargs : dict
|
|
22
|
+
Additional parameters for the loaders
|
|
23
|
+
|
|
24
|
+
Returns
|
|
25
|
+
-------
|
|
26
|
+
data : np.array
|
|
27
|
+
Numerical data
|
|
28
|
+
|
|
29
|
+
Raises
|
|
30
|
+
------
|
|
31
|
+
FileNotFoundError
|
|
32
|
+
If the file is not found in the specified path
|
|
33
|
+
ValueError
|
|
34
|
+
If the file extension is not supported
|
|
35
|
+
"""
|
|
36
|
+
if extension is None:
|
|
37
|
+
extension = os.path.splitext(file)[1]
|
|
38
|
+
elif os.path.splitext(file)[1] == "":
|
|
39
|
+
file = file+extension
|
|
40
|
+
elif os.path.splitext(file)[1] != extension:
|
|
41
|
+
raise FileNotFoundError(f"File {file} does not match the provided extension {extension}.")
|
|
42
|
+
if extension == "":
|
|
43
|
+
log.info("No extension provided. Trying .npy, .csv and .txt.")
|
|
44
|
+
for loader in [load_npy,load_csv,load_txt,load_xlsx]:
|
|
45
|
+
try:
|
|
46
|
+
return loader(file)
|
|
47
|
+
except FileNotFoundError:
|
|
48
|
+
pass
|
|
49
|
+
log.error(f"File {file} not found with extensions .npy, .csv or .txt.")
|
|
50
|
+
raise FileNotFoundError(f"File {file} not found in the specified path.")
|
|
51
|
+
if extension not in [".npy",".csv",".txt",".xlsx",".yaml"]:
|
|
52
|
+
log.error(f"File extension {extension} not supported.")
|
|
53
|
+
raise ValueError(f"File extension {extension} not supported.\nSupported extensions: .npy, .csv, .txt")
|
|
54
|
+
if extension == ".npy":
|
|
55
|
+
return load_npy(file,**kwargs)
|
|
56
|
+
if extension == ".csv":
|
|
57
|
+
return load_csv(file,pandas=pandas,**kwargs)
|
|
58
|
+
if extension == ".txt":
|
|
59
|
+
return load_txt(file,**kwargs)
|
|
60
|
+
if extension == ".xlsx":
|
|
61
|
+
return load_xlsx(file,**kwargs)
|
|
62
|
+
if extension == ".yaml":
|
|
63
|
+
return load_yaml(file,**kwargs)
|
|
64
|
+
|
|
65
|
+
def load_yaml(file,**kwargs):
|
|
66
|
+
if os.path.splitext(file)[1] == "":
|
|
67
|
+
file = file+".yaml"
|
|
68
|
+
with open(file,"r") as f:
|
|
69
|
+
return yaml.safe_load(f)
|
|
70
|
+
|
|
71
|
+
def load_npy(file,**kwargs):
|
|
72
|
+
if os.path.splitext(file)[1] == "":
|
|
73
|
+
file = file+".npy"
|
|
74
|
+
return np.load(file,**kwargs)
|
|
75
|
+
|
|
76
|
+
def load_csv(file,pandas=False,**kwargs):
|
|
77
|
+
"""
|
|
78
|
+
Read a .csv file using pandas or numpy.
|
|
79
|
+
|
|
80
|
+
If pandas, the file is read using pandas,
|
|
81
|
+
such that labels are automatically extracted.
|
|
82
|
+
Otherwise, the file is read using numpy and labels are loaded from another file.
|
|
83
|
+
"""
|
|
84
|
+
if os.path.splitext(file)[1] == "":
|
|
85
|
+
file = file+".csv"
|
|
86
|
+
delimiter = kwargs.get("delimiter",",")
|
|
87
|
+
if pandas:
|
|
88
|
+
#Remove header if not provided
|
|
89
|
+
#This is to avoid issues with the label autodetection
|
|
90
|
+
kwargs["header"] = kwargs.get("header",None)
|
|
91
|
+
return pd.read_csv(file,
|
|
92
|
+
**kwargs)
|
|
93
|
+
return np.loadtxt(file,delimiter=delimiter,**kwargs)
|
|
94
|
+
|
|
95
|
+
def load_txt(file,**kwargs):
|
|
96
|
+
if os.path.splitext(file)[1] == "":
|
|
97
|
+
file = file+".txt"
|
|
98
|
+
delimiter = kwargs.get("delimiter","\t")
|
|
99
|
+
try:
|
|
100
|
+
return np.loadtxt(file,delimiter=delimiter,**kwargs)
|
|
101
|
+
except ValueError:
|
|
102
|
+
#If the basic loading fails, it's probably a label file
|
|
103
|
+
return np.loadtxt(file,dtype=str,delimiter=delimiter,**kwargs).tolist()
|
|
104
|
+
|
|
105
|
+
def load_xlsx(file, **kwargs):
|
|
106
|
+
if os.path.splitext(file)[1] == "":
|
|
107
|
+
file = file+".xlsx"
|
|
108
|
+
#Remove header if not provided
|
|
109
|
+
#This is to avoid issues with the label autodetection
|
|
110
|
+
kwargs["header"] = kwargs.get("header",None)
|
|
111
|
+
return pd.read_excel(file,
|
|
112
|
+
**kwargs)
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
"""Routines for loading from Excel"""
|
|
2
|
+
|
|
3
|
+
from mrio_toolbox.utils.loaders._np_loader import load_file
|
|
4
|
+
from mrio_toolbox.utils.loaders._parameter_loader import Parameter_Loader
|
|
5
|
+
from mrio_toolbox.utils import converters
|
|
6
|
+
import os
|
|
7
|
+
import logging
|
|
8
|
+
|
|
9
|
+
log = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
class Pandas_Loader(Parameter_Loader):
|
|
12
|
+
"""
|
|
13
|
+
Class for loading MRIO data through Pandas.
|
|
14
|
+
"""
|
|
15
|
+
def __init__(
|
|
16
|
+
self,
|
|
17
|
+
**kwargs
|
|
18
|
+
):
|
|
19
|
+
"""
|
|
20
|
+
Loader for MRIO data through pandas.
|
|
21
|
+
|
|
22
|
+
Used for loading data from .xlsx and .csv files.
|
|
23
|
+
|
|
24
|
+
Parameters
|
|
25
|
+
----------
|
|
26
|
+
loader_kwargs : dict, optional
|
|
27
|
+
Parameters passed to the underlying loader.
|
|
28
|
+
- .xlsx: pandas.read_excel
|
|
29
|
+
- .csv: pandas.read_csv
|
|
30
|
+
groupings : dict, optional
|
|
31
|
+
Aggregation on labels
|
|
32
|
+
labels : dict, optional
|
|
33
|
+
Explicit dictionary of labels.
|
|
34
|
+
dimensions : list of int, optional
|
|
35
|
+
List of label names.
|
|
36
|
+
path : str, optional
|
|
37
|
+
Path to the data
|
|
38
|
+
The following paths are recognized:
|
|
39
|
+
- path
|
|
40
|
+
- mrio_path
|
|
41
|
+
- file
|
|
42
|
+
- data_path
|
|
43
|
+
- table/year/version
|
|
44
|
+
labels_path : str, optional
|
|
45
|
+
Path to the labels files
|
|
46
|
+
parts : dict, optional
|
|
47
|
+
Parts to load, with specific settings
|
|
48
|
+
**kwargs : dict
|
|
49
|
+
Metadata for the MRIO data.
|
|
50
|
+
MRIO metadata are passed to associated parts.
|
|
51
|
+
|
|
52
|
+
"""
|
|
53
|
+
super().__init__(**kwargs)
|
|
54
|
+
|
|
55
|
+
def load_part(
|
|
56
|
+
self,
|
|
57
|
+
**kwargs
|
|
58
|
+
):
|
|
59
|
+
"""
|
|
60
|
+
Load a Part from explicit parameters.
|
|
61
|
+
|
|
62
|
+
Parameters provided as arguments overload the corresponding instance attributes.
|
|
63
|
+
|
|
64
|
+
Returns
|
|
65
|
+
-------
|
|
66
|
+
dict
|
|
67
|
+
Data for creating the Part object
|
|
68
|
+
|
|
69
|
+
Raises
|
|
70
|
+
------
|
|
71
|
+
FileNotFoundError
|
|
72
|
+
If no file nor name argument is provided
|
|
73
|
+
"""
|
|
74
|
+
#Update loader parameters
|
|
75
|
+
self.update_attributes(**kwargs)
|
|
76
|
+
|
|
77
|
+
file = self.get_file(**kwargs)
|
|
78
|
+
|
|
79
|
+
loader_kwargs = kwargs.pop("loader_kwargs",self.loader_kwargs)
|
|
80
|
+
|
|
81
|
+
name = kwargs.pop("name",os.path.splitext(os.path.basename(file))[0])
|
|
82
|
+
|
|
83
|
+
log.info(f"Load part {name} from {file}")
|
|
84
|
+
|
|
85
|
+
if name in self.part_settings:
|
|
86
|
+
#Load preset settings
|
|
87
|
+
part_settings = self.part_settings[name]
|
|
88
|
+
kwargs.update(part_settings)
|
|
89
|
+
|
|
90
|
+
autodetect_labels = True
|
|
91
|
+
if any(key in loader_kwargs for key in ["index_col", "header"]):
|
|
92
|
+
#If labels are explicitly provided, do not autodetect
|
|
93
|
+
autodetect_labels = False
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
return converters.pandas.make_part(load_file(file,
|
|
97
|
+
**loader_kwargs,
|
|
98
|
+
extension=self.extension,
|
|
99
|
+
pandas=True),
|
|
100
|
+
name=name,
|
|
101
|
+
label_detection=autodetect_labels,
|
|
102
|
+
**kwargs)
|