mrio-toolbox 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mrio-toolbox might be problematic. Click here for more details.

@@ -0,0 +1,341 @@
1
+ """
2
+ Routine for loading MRIO tables from explicit parameters
3
+ """
4
+
5
+ import os
6
+ from mrio_toolbox.utils.loaders._loader import Loader
7
+ from mrio_toolbox.utils.loaders._np_loader import load_file
8
+ import pathlib
9
+ import logging
10
+ from pathlib import Path
11
+
12
+ log = logging.getLogger(__name__)
13
+
14
+ class Parameter_Loader(Loader):
15
+ """
16
+ Class for loading MRIO data from explicit parameters.
17
+ """
18
+ def __init__(
19
+ self,
20
+ **kwargs
21
+ ):
22
+ """
23
+ Loader for MRIO data in non-netCDF formats.
24
+
25
+ Parameters
26
+ ----------
27
+ loader_kwargs : dict, optional
28
+ Parameters passed to the underlying loader.
29
+ - .npy: numpy.load
30
+ - .csv, .txt: numpy.loadtxt
31
+ groupings : dict, optional
32
+ Aggregation on labels
33
+ labels : dict, optional
34
+ Explicit dictionary of labels.
35
+ dimensions : list of int, optional
36
+ List of label names.
37
+ path : str, optional
38
+ Path to the data
39
+ The following paths are recognized:
40
+ - path
41
+ - mrio_path
42
+ - file
43
+ - data_path
44
+ - table/year/version
45
+ labels_path : str, optional
46
+ Path to the labels files
47
+ parts : dict, optional
48
+ Parts to load, with specific settings
49
+ **kwargs : dict
50
+ Metadata for the MRIO data.
51
+ MRIO metadata are passed to associated parts.
52
+
53
+ """
54
+ self.extract_basic_info(**kwargs)
55
+ self.extract_path(update=True,**kwargs)
56
+ self.labels = dict()
57
+
58
+ try:
59
+ log.debug("Try bulk labels loading.")
60
+ self.labels = load_file(
61
+ os.path.join(
62
+ self.metadata["path"],"labels.yaml"
63
+ )
64
+ )
65
+ except FileNotFoundError:
66
+ log.debug("No labels found in the path.")
67
+ labels = kwargs.pop("labels",None)
68
+ if labels is None:
69
+ self.metadata["dimensions"] = kwargs.get("dimensions",None)
70
+ labels = self.metadata["dimensions"]
71
+ self.labels = dict()
72
+ self.format_labels(labels)
73
+
74
+ try:
75
+ self.groupings = load_file(
76
+ os.path.join(
77
+ self.metadata["path"],"groupings.yaml"
78
+ )
79
+ )
80
+ except FileNotFoundError:
81
+ self.groupings = kwargs.pop("groupings",dict())
82
+
83
+ self.extension = kwargs.get("extension",None)
84
+
85
+ self.part_settings = kwargs.get("parts",dict())
86
+ super().__init__()
87
+
88
+ def available_parts(self,extension=None):
89
+ """
90
+ List the available parts in the current path.
91
+
92
+ Parameters
93
+ ----------
94
+ extension : str, optional
95
+ Extension of the files to look for.
96
+ If not provided, all files are listed.
97
+
98
+ Returns
99
+ -------
100
+ list
101
+ List of available parts
102
+ """
103
+ if extension is None:
104
+ extension = self.extension
105
+
106
+ if extension is None:
107
+ return os.listdir(self.path)
108
+ files = os.listdir(self.path)
109
+ parts = [
110
+ Path(file).stem for file in files if file.endswith(extension)
111
+ ]
112
+ return parts
113
+
114
+ def extract_path(self,update=False,**kwargs):
115
+ """
116
+ Extract the path from the kwargs.
117
+
118
+ Valid formats are:
119
+ - path
120
+ - mrio_path
121
+ - file
122
+ - data_path
123
+ - table/year/version
124
+ In absence of explicit path, the current directory is used.
125
+
126
+ Parameters
127
+ ----------
128
+ update : bool, optional
129
+ Whether to update the path attribute.
130
+ If a path is already set, it is not overridden.
131
+ """
132
+ if "path" in kwargs:
133
+ self.path = kwargs.pop("path")
134
+ elif "mrio_path" in kwargs:
135
+ self.path = kwargs.pop("mrio_path")
136
+ elif "data_path" in kwargs and update:
137
+ self.path = kwargs.pop("data_path")
138
+ elif "file" in kwargs and update:
139
+ self.path = pathlib.Path(kwargs.pop("file")).parent
140
+ elif "path" in self.__dict__.keys() and not update:
141
+ log.debug("No path provided.")
142
+ self.path = "."
143
+
144
+ if "table" in kwargs and "year" in kwargs and "version" in kwargs:
145
+ self.path = os.path.join(
146
+ self.path,
147
+ kwargs.pop("table"),
148
+ str(kwargs.pop("year")),
149
+ kwargs.pop("version"))
150
+
151
+ self.labels_path = kwargs.get("labels_path",
152
+ self.__dict__.get("labels_path",self.path)
153
+ )
154
+
155
+ #Store paths in metadata
156
+ self.metadata["path"] = self.path
157
+ self.metadata["labels_path"] = self.labels_path
158
+
159
+ def format_labels(self,labels):
160
+ """
161
+ Treat the label information
162
+
163
+ If labels are provided as dict, they are kept as is.
164
+ If labels are provided as string, they are loaded from the labels_path folder.
165
+ The labels are stored as a dict of lists.
166
+ """
167
+ if labels is None:
168
+ log.debug("No labels provided.")
169
+ return
170
+ if isinstance(labels,dict):
171
+ self.labels = labels
172
+ if isinstance(labels,str):
173
+ labels = [labels]
174
+
175
+ for label in labels:
176
+ if isinstance(label,list):
177
+ for sublabel in label:
178
+ self.format_labels(sublabel)
179
+ elif label not in self.labels.keys():
180
+ log.debug("Load labels: "+label)
181
+ self.labels[label] = load_file(
182
+ os.path.join(self.labels_path,label),dtype=str
183
+ )
184
+
185
+ def load_mrio(
186
+ self,
187
+ **kwargs
188
+ ):
189
+ """
190
+ Load MRIO data from explicit parameters.
191
+
192
+ If parameters are provided, they overload the corresponding instance attributes.
193
+ """
194
+ self.update_attributes(**kwargs)
195
+
196
+ def get_file(self,**kwargs):
197
+ """
198
+ Get the file to load.
199
+
200
+ Parameters
201
+ ----------
202
+ file : path-like, optional
203
+ User-defined path to the file, by default None
204
+
205
+ Returns
206
+ -------
207
+ path-like
208
+ Path to the file to load from
209
+
210
+ Raises
211
+ ------
212
+ ValueError
213
+ If no file is provided nor currently loaded
214
+ """
215
+ self.check_instructions(**kwargs)
216
+ #Check if new instructions are provided
217
+
218
+ instructions = self.metadata.get("instructions",None)
219
+
220
+ #Find file
221
+ if "file" in kwargs and kwargs.get("file")!=instructions:
222
+ #Ignore the file argument if it is the same as the one in the instructions
223
+ return kwargs.pop("file")
224
+ if "file_name" in kwargs:
225
+ return os.path.join(self.path,kwargs.pop("file_name"))
226
+ if "name" in kwargs:
227
+ return os.path.join(self.path,kwargs.pop("name"))
228
+ if self.file is None:
229
+ log.error("No file provided: please provide a full file or a file name.")
230
+ raise ValueError("No file provided: please provide a full file or a file name.")
231
+ return self.file
232
+
233
+
234
+ def load_part(
235
+ self,
236
+ **kwargs
237
+ ):
238
+ """
239
+ Load a Part from explicit parameters.
240
+
241
+ Parameters provided as arguments overload the corresponding instance attributes.
242
+
243
+ Returns
244
+ -------
245
+ dict
246
+ Data for creating the Part object
247
+
248
+ Raises
249
+ ------
250
+ FileNotFoundError
251
+ If no file nor name argument is provided
252
+ """
253
+ #Initialize Part specific parameters
254
+ part_data = {
255
+ "metadata" : dict()
256
+ }
257
+
258
+ #Update loader parameters
259
+ self.update_attributes(**kwargs)
260
+
261
+ file = self.get_file(**kwargs)
262
+
263
+
264
+ loader_kwargs = kwargs.pop("loader_kwargs",self.loader_kwargs)
265
+
266
+ name = kwargs.pop("name",os.path.splitext(os.path.basename(file))[0])
267
+
268
+ log.info(f"Load part {name} from {file}")
269
+
270
+ if name in self.part_settings:
271
+ #Load preset settings
272
+ part_settings = self.part_settings[name]
273
+ kwargs.update(part_settings)
274
+
275
+ part_data["data"] = load_file(file,
276
+ extension=self.extension,**loader_kwargs)
277
+
278
+ labels = []
279
+ dimensions = kwargs.get("dimensions",
280
+ self.metadata.get("dimensions",
281
+ part_data["data"].shape)
282
+ )
283
+ if dimensions is None:
284
+ dimensions = part_data["data"].shape
285
+ for dim in dimensions:
286
+ labels.append(self._get_labels(dim))
287
+
288
+ part_data["metadata"] = self.metadata
289
+ part_data["name"] = name
290
+ part_data["metadata"]["path"] = self.path
291
+ part_data["metadata"]["loader_kwargs"] = loader_kwargs
292
+ part_data["labels"] = labels
293
+ part_data["groupings"] = kwargs.get("groupings",self.groupings)
294
+ return part_data
295
+
296
+ def _get_labels(self,l):
297
+ """Find the labels fitting an axis with a given shape
298
+
299
+ If no fitting label is found, data are labelled numerically
300
+
301
+ Parameters
302
+ ----------
303
+ l : int, list or str
304
+ Length of the data dimension or name of the dimensions.
305
+
306
+ Returns
307
+ -------
308
+ dict of str:list of str
309
+ Labels of the axis.
310
+
311
+ """
312
+ if isinstance(l,list):
313
+ output = dict()
314
+ try:
315
+ for label in l:
316
+ output.update(self._get_labels(label))
317
+ return output
318
+ except IndexError:
319
+ return {"0":l}
320
+ if isinstance(l,str):
321
+ if l not in self.labels.keys():
322
+ self.format_labels(l)
323
+ return {l:self.labels[l]}
324
+ if not isinstance(l,int):
325
+ log.error(f"Invalid dimension type {type(l)}")
326
+ raise TypeError(f"Invalid dimension type {type(l)}")
327
+ if l==1:
328
+ return {0:"all"}
329
+ log.debug("Try to infer label from axis of length "+str(l))
330
+ for label in self.labels:
331
+ #Look whether a basic label fits the axis
332
+ if l == len(self.labels[label]):
333
+ log.debug(f"Label {label} fits axis of length {l}")
334
+ return {label:self.labels[label]}
335
+ for grouping in self.groupings:
336
+ #Look whether a grouped label fits the axis
337
+ if l == len(self.groupings[grouping]):
338
+ log.debug(f"Label {label} fits axis of length {l}")
339
+ return {grouping:list(self.groupings[grouping]).keys()}
340
+ log.warning("No label found for axis of length "+str(l))
341
+ return {"0":[i for i in range(l)]}
@@ -0,0 +1,8 @@
1
+ from mrio_toolbox.utils.savers._to_folder import save_mrio_to_folder, save_part_to_folder
2
+ from mrio_toolbox.utils.savers._to_nc import save_to_nc
3
+
4
+ __all__ = [
5
+ "save_mrio_to_folder",
6
+ "save_part_to_folder",
7
+ "save_to_nc"
8
+ ]
@@ -0,0 +1,19 @@
1
+ import os
2
+
3
+ def check_path(path):
4
+ """
5
+ Extend the name path to avoid overwriting existing files.
6
+
7
+ Parameters
8
+ ----------
9
+ path : str
10
+ Path currently selected
11
+ """
12
+ if os.path.exists(path):
13
+ i=0
14
+ new_path = f"{path}_{i}"
15
+ while os.path.exists(new_path):
16
+ new_path = f"{path}_{i}"
17
+ i+=1
18
+ return new_path
19
+ return path
@@ -0,0 +1,160 @@
1
+ import os
2
+ from mrio_toolbox.utils.savers._path_checker import check_path
3
+ import numpy as np
4
+ import yaml
5
+ import pandas as pd
6
+
7
+ import logging
8
+
9
+ log = logging.getLogger(__name__)
10
+
11
+ def save_mrio_to_folder(obj,
12
+ path,
13
+ name=None,
14
+ extension=".npy",
15
+ overwrite=False,
16
+ **kwargs):
17
+ """
18
+ Save an MRIO instance in a folder
19
+
20
+ Parameters
21
+ ----------
22
+ path : str
23
+ Path to the folder to save the MRIO instance into.
24
+ extension : str, optional
25
+ Extension of the files to save the MRIO instance into.
26
+ The default is "npy".
27
+ overwrite : bool, optional
28
+ Whether to overwrite the existing files. The default is False.
29
+ If False, the version name is iterated until a non-existing
30
+ file name is found.
31
+ kwargs : dict
32
+ Additional arguments to pass to the saver.
33
+ """
34
+ if name is None:
35
+ name = os.path.basename(path)+".yaml"
36
+ if not os.path.isdir(path):
37
+ os.mkdir(path)
38
+ elif not overwrite:
39
+ os.mkdir(check_path(path))
40
+ log.info(f"Saving MRIO instance {name} to folder {path}")
41
+ loading_instructions = dict()
42
+ loading_instructions.update(obj.metadata)
43
+ loading_instructions["path"] = path
44
+ parts_instructions = dict()
45
+ for part in obj.parts:
46
+ save_part_to_folder(
47
+ obj.parts[part],
48
+ path,
49
+ extension=extension,
50
+ overwrite=overwrite,
51
+ include_labels=False,
52
+ write_instructions=False,
53
+ **kwargs
54
+ )
55
+ #Save part metadata
56
+ parts_instructions[part] = dict()
57
+ parts_instructions[part]["dimensions"] = obj.parts[part].get_dimensions()
58
+ parts_instructions[part]["metadata"] = obj.parts[part].metadata
59
+ write_labels(path,obj.labels)
60
+ loading_instructions["parts"] = parts_instructions
61
+ loading_instructions["extension"] = extension
62
+ with open(os.path.join(path+".yaml"),"w") as file:
63
+ yaml.dump(loading_instructions,file)
64
+
65
+ def write_labels(path,labels):
66
+ """
67
+ Save labels in a folder
68
+
69
+ Parameters
70
+ ----------
71
+ path : str
72
+ Path to the folder to save the labels into.
73
+ extension : str, optional
74
+ Extension of the files to save the labels into.
75
+ The default is "txt".
76
+ overwrite : bool, optional
77
+ Whether to overwrite the existing files. The default is False.
78
+ If False, the version name is iterated until a non-existing
79
+ file name is found.
80
+ kwargs : dict
81
+ Additional arguments to pass to the saver.
82
+ """
83
+ with open(os.path.join(path,"labels.yaml"),"w") as file:
84
+ yaml.dump(labels,file)
85
+
86
+ def save_part_to_folder(obj,
87
+ path,
88
+ name=None,
89
+ extension=".npy",
90
+ save_labels=True,
91
+ write_instructions=True,
92
+ overwrite=False,
93
+ include_labels=True,
94
+ **kwargs):
95
+ """
96
+ Save a Part instance in a folder
97
+
98
+ Parameters
99
+ ----------
100
+ obj : Part
101
+ Part instance to save
102
+ path : str
103
+ Path to the folder to save the Part instance into.
104
+ extension : str, optional
105
+ Extension of the files to save the Part instance into.
106
+ The default is ".npy".
107
+ save_labels : bool, optional
108
+ Whether to save the labels. The default is True.
109
+ save_instructions : bool, optional
110
+ Whether to save the instructions. The default is True.
111
+ overwrite : bool, optional
112
+ Whether to overwrite the existing files. The default is False.
113
+ If False, the version name is iterated until a non-existing
114
+ file name is found.
115
+ include_labels: bool, optional
116
+ Whether to include the labels in the file. The default is True.
117
+ This is only relevant for .csv and .xlsx files.
118
+ If False, the labels are saved in a separate file.
119
+ kwargs : dict
120
+ Additional arguments to pass to the saver.
121
+ """
122
+ if name is None:
123
+ name = obj.name
124
+ log.info(f"Saving Part instance {name} to folder {path} with extension {extension}")
125
+ if not os.path.isdir(path):
126
+ os.mkdir(path)
127
+ elif not overwrite:
128
+ os.mkdir(check_path(path))
129
+ if save_labels:
130
+ write_labels(path,obj.labels)
131
+ parts_instructions = dict()
132
+ if write_instructions:
133
+ parts_instructions["dimensions"] = obj.get_dimensions()
134
+ parts_instructions["metadata"] = obj.metadata
135
+ if extension == ".npy":
136
+ np.save(os.path.join(path,name+extension),obj.data,**kwargs)
137
+ elif extension == ".csv":
138
+ delimiter = kwargs.pop("delimiter",",")
139
+ if include_labels:
140
+ obj.to_pandas().to_csv(os.path.join(path,name+extension),
141
+ **kwargs)
142
+ parts_instructions["index_col"] = [i for i in range(len(obj.axes[0].dims))]
143
+ if len(obj.axes) > 1:
144
+ parts_instructions["header"] = [i for i in range(len(obj.axes[1].dimensions))]
145
+ else:
146
+ np.savetxt(os.path.join(path,name+extension),obj.data,
147
+ delimiter=delimiter,**kwargs)
148
+ elif extension == ".txt":
149
+ delimiter = kwargs.pop("delimiter","\t")
150
+ np.savetxt(os.path.join(path,name+extension),obj.data,
151
+ delimiter=delimiter,**kwargs)
152
+ elif extension == ".xlsx":
153
+ obj.to_pandas().to_excel(os.path.join(path,name+extension),
154
+ **kwargs)
155
+ else:
156
+ raise NotImplementedError(f"Extension {extension} not supported")
157
+ if write_instructions:
158
+ parts_instructions["file"] = os.path.join(path,name+extension)
159
+ with open(os.path.join(path,name+".yaml"),"w") as file:
160
+ yaml.dump(parts_instructions,file)
@@ -0,0 +1,52 @@
1
+ import os
2
+ import yaml
3
+ import pandas as pd
4
+ from mrio_toolbox.utils.savers._path_checker import check_path
5
+ import logging
6
+
7
+ log = logging.getLogger(__name__)
8
+
9
+ def save_to_nc(obj,path,overwrite=False,write_instructions=False,**kwargs):
10
+ """
11
+ Save an MRIO or Path instance in a .nc file
12
+
13
+ Parameters
14
+ ----------
15
+ path : str
16
+ Path to the .nc file to save the MRIO instance into.
17
+
18
+ **kwargs : dict
19
+ Additional arguments to pass to the saver.
20
+ """
21
+ log.info(f"Saving {obj.__class__.__name__} instance to {path}")
22
+ ds = obj.to_xarray()
23
+ for index in ds.indexes:
24
+ if isinstance(ds.indexes[index],pd.MultiIndex):
25
+ import cf_xarray as cfxr
26
+ #Compress MultiIndex data as it is not supported by xarray
27
+ ds = cfxr.encode_multi_index_as_compress(ds,index)
28
+
29
+ #Remove dict attrs (not supported for serialization)
30
+ attrs = list(ds.attrs.keys())
31
+ for attr in attrs:
32
+ if isinstance(ds.attrs[attr],dict):
33
+ log.warning(f"Attribute {attr} is a dict. It will not be saved.")
34
+ ds.attrs.pop(attr)
35
+
36
+ for var in ds.data_vars:
37
+ attrs = list(ds[var].attrs.keys())
38
+ for attr in attrs:
39
+ if isinstance(ds[var].attrs[attr],dict):
40
+ log.warning(f"Attribute {attr} of {var} is a dict. It will not be saved.")
41
+ ds[var].attrs.pop(attr)
42
+
43
+ if not overwrite:
44
+ base_path = os.path.splitext(path)[0]
45
+ path = check_path(base_path)
46
+ ds.to_netcdf(path+".nc",**kwargs)
47
+ if write_instructions:
48
+ instructions = {
49
+ "file": path+".nc"
50
+ }
51
+ with open(path+".yaml","w") as file:
52
+ yaml.dump(instructions,file)