mrio-toolbox 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mrio-toolbox might be problematic. Click here for more details.

@@ -0,0 +1,75 @@
1
+ import os
2
+ import yaml
3
+ from mrio_toolbox.utils.loaders._nc_loader import NetCDF_Loader
4
+ from mrio_toolbox.utils.loaders._parameter_loader import Parameter_Loader
5
+ from mrio_toolbox.utils.loaders._pandas_loader import Pandas_Loader
6
+ from mrio_toolbox.utils.loaders._loader import Loader
7
+ import logging
8
+
9
+ log = logging.getLogger(__name__)
10
+
11
+ def make_loader(**kwargs):
12
+ """
13
+ Initialize the appropriate loader based on the provided parameters.
14
+
15
+ If a file or data_file is provided,
16
+ the function will attempt to determine the appropriate loader based on the file extension.
17
+
18
+ Namely:
19
+ - .nc files are loaded using the NetCDF_Loader
20
+ - .yaml files are interpreted as loading instructions
21
+
22
+ All non-netCDF files are loaded using the Parameter_Loader.
23
+ """
24
+ file = kwargs.get("file",None)
25
+ extension = kwargs.get("extension",None)
26
+
27
+ if extension is None:
28
+ if file is None:
29
+ log.info("No file or extension provided.")
30
+ log.info("An empty loader will be created.")
31
+ return Loader()
32
+ extension = os.path.splitext(file)[1]
33
+ if extension == "":
34
+ log.error("File extension missing.")
35
+ raise ValueError("File extension missing.")
36
+
37
+ if extension == "":
38
+ log.error("File extension missing.")
39
+ raise ValueError("File extension missing.")
40
+ if extension == ".nc":
41
+ return NetCDF_Loader(**kwargs)
42
+ if extension in [".yaml",".yml"]:
43
+ return load_from_yaml(**kwargs)
44
+ if extension in [".npy",".txt"]:
45
+ return Parameter_Loader(**kwargs)
46
+ if extension in [".csv"]:
47
+ if "loader_kwargs" in kwargs:
48
+ pandas = kwargs["loader_kwargs"].pop(
49
+ "pandas",False
50
+ )
51
+ if pandas:
52
+ return Pandas_Loader(**kwargs)
53
+ return Parameter_Loader(**kwargs)
54
+ if extension == ".xlsx":
55
+ return Pandas_Loader(**kwargs)
56
+ log.error(f"File extension {extension} not supported.")
57
+
58
+ def load_from_yaml(**kwargs):
59
+ """
60
+ Create a loader based on yaml file instructions.
61
+
62
+ Parameters
63
+ ----------
64
+ file : path-like
65
+ Full path to the .yaml file
66
+ """
67
+ instructions = kwargs.pop("file")
68
+ log.info("Get loading instructions from: "+instructions)
69
+ with open(instructions) as f:
70
+ parameters = yaml.safe_load(f)
71
+ for kwarg in kwargs:
72
+ #Override parameters with kwargs
73
+ log.debug(f"Override file parameter {kwarg} with explicit parameter {kwargs[kwarg]}")
74
+ parameters[kwarg] = kwargs[kwarg]
75
+ return make_loader(instructions=instructions,**parameters)
@@ -0,0 +1,148 @@
1
+ from mrio_toolbox.utils.loaders._loader import Loader
2
+ from mrio_toolbox.utils import converters
3
+ import xarray as xr
4
+
5
+ import logging
6
+ import pandas as pd
7
+
8
+ log = logging.getLogger(__name__)
9
+
10
+ class NetCDF_Loader(Loader):
11
+ """
12
+ Class for loading MRIO data from a netCDF file.
13
+ """
14
+ def __init__(
15
+ self,
16
+ **kwargs
17
+ ):
18
+ """
19
+ Loader for MRIO data in netCDF format.
20
+
21
+ Parameters
22
+ ----------
23
+ loader_kwargs : dict, optional
24
+ Parameters passed to the xarray loader.
25
+ file : path-like
26
+ Full path to the netCDF file.
27
+ groupings : dict, optional
28
+ Aggregation on labels
29
+ **kwargs : dict
30
+ Metadata for the MRIO data.
31
+ MRIO metadata are passed to associated parts.
32
+
33
+ """
34
+ self.extract_basic_info(**kwargs)
35
+ super().__init__()
36
+ self.update_settings(**kwargs)
37
+
38
+ def load_mrio(
39
+ self,
40
+ file = None,
41
+ **kwargs
42
+ ):
43
+ """
44
+ Load a netcdf file in the memory.
45
+
46
+ This procedure is based on the xarray library.
47
+ The xarray dataset is stored in the data attribute.
48
+ The loader also extracts all metadata from the file.
49
+
50
+ Parameters
51
+ ----------
52
+ file : path-like, optional
53
+ Full path to the file.
54
+ If left empty, the file currently initialised is used.
55
+
56
+ Raises
57
+ ------
58
+ ValueError
59
+ If the file is not provided.
60
+ """
61
+
62
+ if file is None:
63
+ file = self.file
64
+
65
+ if file is None:
66
+ raise ValueError("No file provided.")
67
+
68
+ log.info(f"Load MRIO data from {file}")
69
+ self.data = xr.open_dataset(file, **self.loader_kwargs)
70
+ mrio_data,list_of_parts = converters.xarray.make_mrio(self.data)
71
+ self._available_parts = list_of_parts
72
+ self.update_settings(**mrio_data["data"])
73
+
74
+
75
+ def load_part(
76
+ self,
77
+ file = None,
78
+ **kwargs
79
+ ):
80
+ """
81
+ Load a part of the MRIO table.
82
+
83
+ Parameters
84
+ ----------
85
+ name : str
86
+ Name of the variable to load
87
+ file : path, optional
88
+ Full path to the data.
89
+ If left empty, the current xarray Dataset is used.
90
+
91
+ Returns
92
+ -------
93
+ dict
94
+ Data required to create a Part object
95
+ """
96
+ self.get_file(file,**kwargs) #Update the file if needed
97
+ return converters.xarray.make_part(
98
+ self.data,**kwargs
99
+ )
100
+
101
+ def get_file(self, file=None, **kwargs):
102
+ """
103
+ Get the file to load.
104
+
105
+ Parameters
106
+ ----------
107
+ file : path-like, optional
108
+ User-defined path to the file, by default None
109
+
110
+ Returns
111
+ -------
112
+ path-like
113
+ Path to the file to load from
114
+
115
+ Raises
116
+ ------
117
+ ValueError
118
+ If no file is provided nor currently loaded
119
+
120
+ """
121
+ self.check_instructions(**kwargs)
122
+ #Check if new instructions are provided
123
+
124
+ if file is None and self.file is None:
125
+ raise ValueError("No file provided.")
126
+
127
+ instructions = self.metadata.get("instructions",None)
128
+
129
+ if file != self.file and file != instructions:
130
+ #If the file is different from the one currently loaded, the current data is replaced
131
+ self.load_mrio(file)
132
+
133
+ return file
134
+
135
+ def available_parts(
136
+ self,**kwargs
137
+ ):
138
+ """
139
+ Return a list of available parts in the MRIO table.
140
+
141
+ Returns
142
+ -------
143
+ list
144
+ List of available parts
145
+ """
146
+ return self._available_parts
147
+
148
+
@@ -0,0 +1,112 @@
1
+ """
2
+ Routine for loading MRIO Parts from .npy and .csv files
3
+ """
4
+
5
+ import os
6
+ import numpy as np
7
+ import pandas as pd
8
+ import logging
9
+ import yaml
10
+
11
+ log = logging.getLogger(__name__)
12
+
13
+ def load_file(file,extension=None,pandas=False,**kwargs):
14
+ """
15
+ Load data from a .npy, .txt, .xlsx or .csv file.
16
+
17
+ Parameters
18
+ ----------
19
+ file : path-like
20
+ Full path to the file
21
+ kwargs : dict
22
+ Additional parameters for the loaders
23
+
24
+ Returns
25
+ -------
26
+ data : np.array
27
+ Numerical data
28
+
29
+ Raises
30
+ ------
31
+ FileNotFoundError
32
+ If the file is not found in the specified path
33
+ ValueError
34
+ If the file extension is not supported
35
+ """
36
+ if extension is None:
37
+ extension = os.path.splitext(file)[1]
38
+ elif os.path.splitext(file)[1] == "":
39
+ file = file+extension
40
+ elif os.path.splitext(file)[1] != extension:
41
+ raise FileNotFoundError(f"File {file} does not match the provided extension {extension}.")
42
+ if extension == "":
43
+ log.info("No extension provided. Trying .npy, .csv and .txt.")
44
+ for loader in [load_npy,load_csv,load_txt,load_xlsx]:
45
+ try:
46
+ return loader(file)
47
+ except FileNotFoundError:
48
+ pass
49
+ log.error(f"File {file} not found with extensions .npy, .csv or .txt.")
50
+ raise FileNotFoundError(f"File {file} not found in the specified path.")
51
+ if extension not in [".npy",".csv",".txt",".xlsx",".yaml"]:
52
+ log.error(f"File extension {extension} not supported.")
53
+ raise ValueError(f"File extension {extension} not supported.\nSupported extensions: .npy, .csv, .txt")
54
+ if extension == ".npy":
55
+ return load_npy(file,**kwargs)
56
+ if extension == ".csv":
57
+ return load_csv(file,pandas=pandas,**kwargs)
58
+ if extension == ".txt":
59
+ return load_txt(file,**kwargs)
60
+ if extension == ".xlsx":
61
+ return load_xlsx(file,**kwargs)
62
+ if extension == ".yaml":
63
+ return load_yaml(file,**kwargs)
64
+
65
+ def load_yaml(file,**kwargs):
66
+ if os.path.splitext(file)[1] == "":
67
+ file = file+".yaml"
68
+ with open(file,"r") as f:
69
+ return yaml.safe_load(f)
70
+
71
+ def load_npy(file,**kwargs):
72
+ if os.path.splitext(file)[1] == "":
73
+ file = file+".npy"
74
+ return np.load(file,**kwargs)
75
+
76
+ def load_csv(file,pandas=False,**kwargs):
77
+ """
78
+ Read a .csv file using pandas or numpy.
79
+
80
+ If pandas, the file is read using pandas,
81
+ such that labels are automatically extracted.
82
+ Otherwise, the file is read using numpy and labels are loaded from another file.
83
+ """
84
+ if os.path.splitext(file)[1] == "":
85
+ file = file+".csv"
86
+ delimiter = kwargs.get("delimiter",",")
87
+ if pandas:
88
+ #Remove header if not provided
89
+ #This is to avoid issues with the label autodetection
90
+ kwargs["header"] = kwargs.get("header",None)
91
+ return pd.read_csv(file,
92
+ **kwargs)
93
+ return np.loadtxt(file,delimiter=delimiter,**kwargs)
94
+
95
+ def load_txt(file,**kwargs):
96
+ if os.path.splitext(file)[1] == "":
97
+ file = file+".txt"
98
+ delimiter = kwargs.get("delimiter","\t")
99
+ try:
100
+ return np.loadtxt(file,delimiter=delimiter,**kwargs)
101
+ except ValueError:
102
+ #If the basic loading fails, it's probably a label file
103
+ return np.loadtxt(file,dtype=str,delimiter=delimiter,**kwargs).tolist()
104
+
105
+ def load_xlsx(file, **kwargs):
106
+ if os.path.splitext(file)[1] == "":
107
+ file = file+".xlsx"
108
+ #Remove header if not provided
109
+ #This is to avoid issues with the label autodetection
110
+ kwargs["header"] = kwargs.get("header",None)
111
+ return pd.read_excel(file,
112
+ **kwargs)
@@ -0,0 +1,102 @@
1
+ """Routines for loading from Excel"""
2
+
3
+ from mrio_toolbox.utils.loaders._np_loader import load_file
4
+ from mrio_toolbox.utils.loaders._parameter_loader import Parameter_Loader
5
+ from mrio_toolbox.utils import converters
6
+ import os
7
+ import logging
8
+
9
+ log = logging.getLogger(__name__)
10
+
11
+ class Pandas_Loader(Parameter_Loader):
12
+ """
13
+ Class for loading MRIO data through Pandas.
14
+ """
15
+ def __init__(
16
+ self,
17
+ **kwargs
18
+ ):
19
+ """
20
+ Loader for MRIO data through pandas.
21
+
22
+ Used for loading data from .xlsx and .csv files.
23
+
24
+ Parameters
25
+ ----------
26
+ loader_kwargs : dict, optional
27
+ Parameters passed to the underlying loader.
28
+ - .xlsx: pandas.read_excel
29
+ - .csv: pandas.read_csv
30
+ groupings : dict, optional
31
+ Aggregation on labels
32
+ labels : dict, optional
33
+ Explicit dictionary of labels.
34
+ dimensions : list of int, optional
35
+ List of label names.
36
+ path : str, optional
37
+ Path to the data
38
+ The following paths are recognized:
39
+ - path
40
+ - mrio_path
41
+ - file
42
+ - data_path
43
+ - table/year/version
44
+ labels_path : str, optional
45
+ Path to the labels files
46
+ parts : dict, optional
47
+ Parts to load, with specific settings
48
+ **kwargs : dict
49
+ Metadata for the MRIO data.
50
+ MRIO metadata are passed to associated parts.
51
+
52
+ """
53
+ super().__init__(**kwargs)
54
+
55
+ def load_part(
56
+ self,
57
+ **kwargs
58
+ ):
59
+ """
60
+ Load a Part from explicit parameters.
61
+
62
+ Parameters provided as arguments overload the corresponding instance attributes.
63
+
64
+ Returns
65
+ -------
66
+ dict
67
+ Data for creating the Part object
68
+
69
+ Raises
70
+ ------
71
+ FileNotFoundError
72
+ If no file nor name argument is provided
73
+ """
74
+ #Update loader parameters
75
+ self.update_attributes(**kwargs)
76
+
77
+ file = self.get_file(**kwargs)
78
+
79
+ loader_kwargs = kwargs.pop("loader_kwargs",self.loader_kwargs)
80
+
81
+ name = kwargs.pop("name",os.path.splitext(os.path.basename(file))[0])
82
+
83
+ log.info(f"Load part {name} from {file}")
84
+
85
+ if name in self.part_settings:
86
+ #Load preset settings
87
+ part_settings = self.part_settings[name]
88
+ kwargs.update(part_settings)
89
+
90
+ autodetect_labels = True
91
+ if any(key in loader_kwargs for key in ["index_col", "header"]):
92
+ #If labels are explicitly provided, do not autodetect
93
+ autodetect_labels = False
94
+
95
+
96
+ return converters.pandas.make_part(load_file(file,
97
+ **loader_kwargs,
98
+ extension=self.extension,
99
+ pandas=True),
100
+ name=name,
101
+ label_detection=autodetect_labels,
102
+ **kwargs)