mrio-toolbox 1.1.1__py3-none-any.whl → 1.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mrio-toolbox might be problematic. Click here for more details.

Files changed (61) hide show
  1. {mrio_toolbox-1.1.1.dist-info → mrio_toolbox-1.1.3.dist-info}/METADATA +2 -2
  2. mrio_toolbox-1.1.3.dist-info/RECORD +5 -0
  3. mrio_toolbox-1.1.3.dist-info/top_level.txt +1 -0
  4. mrio_toolbox/__init__.py +0 -21
  5. mrio_toolbox/_parts/_Axe.py +0 -539
  6. mrio_toolbox/_parts/_Part.py +0 -1698
  7. mrio_toolbox/_parts/__init__.py +0 -7
  8. mrio_toolbox/_parts/part_operations.py +0 -57
  9. mrio_toolbox/extractors/__init__.py +0 -20
  10. mrio_toolbox/extractors/downloaders.py +0 -36
  11. mrio_toolbox/extractors/emerging/__init__.py +0 -3
  12. mrio_toolbox/extractors/emerging/emerging_extractor.py +0 -117
  13. mrio_toolbox/extractors/eora/__init__.py +0 -3
  14. mrio_toolbox/extractors/eora/eora_extractor.py +0 -132
  15. mrio_toolbox/extractors/exiobase/__init__.py +0 -3
  16. mrio_toolbox/extractors/exiobase/exiobase_extractor.py +0 -270
  17. mrio_toolbox/extractors/extractors.py +0 -79
  18. mrio_toolbox/extractors/figaro/__init__.py +0 -3
  19. mrio_toolbox/extractors/figaro/figaro_downloader.py +0 -280
  20. mrio_toolbox/extractors/figaro/figaro_extractor.py +0 -187
  21. mrio_toolbox/extractors/gloria/__init__.py +0 -3
  22. mrio_toolbox/extractors/gloria/gloria_extractor.py +0 -202
  23. mrio_toolbox/extractors/gtap11/__init__.py +0 -7
  24. mrio_toolbox/extractors/gtap11/extraction/__init__.py +0 -3
  25. mrio_toolbox/extractors/gtap11/extraction/extractor.py +0 -129
  26. mrio_toolbox/extractors/gtap11/extraction/harpy_files/__init__.py +0 -6
  27. mrio_toolbox/extractors/gtap11/extraction/harpy_files/_header_sets.py +0 -279
  28. mrio_toolbox/extractors/gtap11/extraction/harpy_files/har_file.py +0 -262
  29. mrio_toolbox/extractors/gtap11/extraction/harpy_files/har_file_io.py +0 -974
  30. mrio_toolbox/extractors/gtap11/extraction/harpy_files/header_array.py +0 -300
  31. mrio_toolbox/extractors/gtap11/extraction/harpy_files/sl4.py +0 -229
  32. mrio_toolbox/extractors/gtap11/gtap_mrio/__init__.py +0 -6
  33. mrio_toolbox/extractors/gtap11/gtap_mrio/mrio_builder.py +0 -158
  34. mrio_toolbox/extractors/icio/__init__.py +0 -3
  35. mrio_toolbox/extractors/icio/icio_extractor.py +0 -121
  36. mrio_toolbox/extractors/wiod/__init__.py +0 -3
  37. mrio_toolbox/extractors/wiod/wiod_extractor.py +0 -143
  38. mrio_toolbox/mrio.py +0 -899
  39. mrio_toolbox/msm/__init__.py +0 -6
  40. mrio_toolbox/msm/multi_scale_mapping.py +0 -863
  41. mrio_toolbox/utils/__init__.py +0 -3
  42. mrio_toolbox/utils/converters/__init__.py +0 -5
  43. mrio_toolbox/utils/converters/pandas.py +0 -247
  44. mrio_toolbox/utils/converters/xarray.py +0 -130
  45. mrio_toolbox/utils/formatting/__init__.py +0 -0
  46. mrio_toolbox/utils/formatting/formatter.py +0 -528
  47. mrio_toolbox/utils/loaders/__init__.py +0 -7
  48. mrio_toolbox/utils/loaders/_loader.py +0 -312
  49. mrio_toolbox/utils/loaders/_loader_factory.py +0 -96
  50. mrio_toolbox/utils/loaders/_nc_loader.py +0 -184
  51. mrio_toolbox/utils/loaders/_np_loader.py +0 -112
  52. mrio_toolbox/utils/loaders/_pandas_loader.py +0 -128
  53. mrio_toolbox/utils/loaders/_parameter_loader.py +0 -386
  54. mrio_toolbox/utils/savers/__init__.py +0 -11
  55. mrio_toolbox/utils/savers/_path_checker.py +0 -37
  56. mrio_toolbox/utils/savers/_to_folder.py +0 -165
  57. mrio_toolbox/utils/savers/_to_nc.py +0 -60
  58. mrio_toolbox-1.1.1.dist-info/RECORD +0 -59
  59. mrio_toolbox-1.1.1.dist-info/top_level.txt +0 -1
  60. {mrio_toolbox-1.1.1.dist-info → mrio_toolbox-1.1.3.dist-info}/WHEEL +0 -0
  61. {mrio_toolbox-1.1.1.dist-info → mrio_toolbox-1.1.3.dist-info}/licenses/LICENSE +0 -0
@@ -1,312 +0,0 @@
1
- """
2
- Central loading module for the mrio_toolbox package.
3
-
4
- This module contains the central loading function for the mrio_toolbox package.
5
- Depending on the loading mode, the function will call the appropriate loader.
6
- """
7
-
8
- import os
9
- import logging
10
- import yaml
11
-
12
- log = logging.Logger(__name__)
13
-
14
- class Loader:
15
- """
16
- Parent class for loaders in the MRIO toolbox.
17
-
18
- The `Loader` class provides a base implementation for loading MRIO data.
19
- It includes methods for extracting metadata, updating settings, and managing
20
- groupings and labels. Specific loaders can inherit from this class to implement
21
- format-specific loading functionality.
22
-
23
- Instance variables
24
- ------------------
25
- metadata : dict
26
- Metadata associated with the loader.
27
- labels : dict
28
- Labels for the axes of the MRIO data.
29
- groupings : dict
30
- Groupings for the labels, defining higher-level aggregations.
31
- file : str or None
32
- Path to the file being loaded.
33
- loader_kwargs : dict
34
- Additional parameters for the loader.
35
-
36
- Methods
37
- -------
38
- extract_basic_info(**kwargs):
39
- Extract basic information such as path, labels, and groupings.
40
- update_settings(**settings):
41
- Update the loader settings with new parameters.
42
- load_mrio():
43
- Create an MRIO container based on the current parameters.
44
- load_part(**kwargs):
45
- Load an MRIO Part based on new or existing parameters.
46
- set_groupings(groupings):
47
- Update the groupings attribute of the loader.
48
- update_attributes(**kwargs):
49
- Update the current attributes of the loader.
50
- load_groupings(file, dimension=None, path=None):
51
- Load groupings from a file.
52
- set_labels(labels):
53
- Update the labels attribute of the loader.
54
- available_parts(**kwargs):
55
- Return the available parts in the MRIO data.
56
- check_instructions(**kwargs):
57
- Interpret the file argument for loading a part and check for instruction consistency.
58
-
59
- Notes
60
- -----
61
- This class is intended to be used as a base class for specific loaders.
62
- It provides general functionality for managing metadata, labels, and groupings,
63
- but does not implement actual data loading.
64
- """
65
- def __init__(
66
- self
67
- ):
68
- """
69
- Initialize a Loader object.
70
-
71
- Notes
72
- -----
73
- Loaders are created with format-specific parameters. They hold metadata and methods to load MRIO data.
74
- A loader is created using the base class if no specific loader is required,
75
- i.e., if the data is directly loaded from dict, pandas or xarray.
76
- In that case, the loader will fail when used,
77
- triggering the creation of a specific loader.
78
- """
79
- self.load_mrio()
80
-
81
- def extract_basic_info(self,**kwargs):
82
- """
83
- Extract basic information from the loader.
84
-
85
- The function will extract the path, labels and groupings from the loader.
86
- """
87
- self.loader_kwargs = kwargs.pop("loader_kwargs",dict())
88
- self.file = kwargs.get("file",None)
89
- self.groupings = kwargs.get("groupings",dict())
90
- self.labels = kwargs.get("labels",dict())
91
- #Remaining kwargs are metadata
92
- self.metadata = kwargs
93
- if isinstance(self.groupings,str):
94
- self.groupings = self.load_groupings(self.groupings)
95
-
96
- def update_settings(self,**settings):
97
- """
98
- Update the loader settings with new parameters
99
- """
100
- self.loader_kwargs.update(
101
- settings.pop("loader_kwargs",dict())
102
- )
103
- self.groupings.update(
104
- settings.pop("groupings",dict())
105
- )
106
- self.labels.update(
107
- settings.pop("labels",dict())
108
- )
109
- self.metadata.update(
110
- settings.pop("metadata",dict())
111
- )
112
- self.metadata.update(settings)
113
-
114
-
115
- def load_mrio(
116
- self
117
- ):
118
- """
119
- Create an MRIO container based on the new parameters
120
-
121
- Returns
122
- -------
123
- dict
124
- Dictionary of MRIO metadata
125
- """
126
- self.metadata = dict()
127
- self.labels = dict()
128
- self.groupings = dict()
129
- self.file = None
130
- pass
131
-
132
- def load_part(
133
- self,
134
- **kwargs
135
- ):
136
- """
137
- Load an MRIO Part based on new or existing parameters
138
-
139
- Returns
140
- -------
141
- dict
142
- Dictionary containing the Part data
143
- """
144
- raise FileNotFoundError("No proper loader was initialised.\n"+\
145
- "The loader needs to be reloaded with new instructions.")
146
-
147
- def set_groupings(self,groupings):
148
- """
149
- Update the groupings attribute of the loader
150
-
151
- Parameters
152
- ----------
153
- groupings : dict of dict of str
154
- Aggregation on labels
155
- """
156
- self.groupings = groupings
157
-
158
- def update_attributes(self,**kwargs):
159
- """
160
- Update the current attributes of the loader.
161
-
162
- The function will update the groupings, paths, labels and metadata attributes.
163
- """
164
- if "groupings" in kwargs:
165
- log.debug("Update groupings")
166
- self.groupings = kwargs.pop("groupings",self.groupings)
167
-
168
- self.extract_path(update=True,**kwargs)
169
-
170
- if "labels" in kwargs:
171
- log.debug("Update labels")
172
- self.format_labels(kwargs.pop("labels"))
173
-
174
- for kwarg in kwargs:
175
- log.debug(f"Override parameter {kwarg} with explicit parameter {kwargs[kwarg]}")
176
- self.metadata[kwarg] = kwargs[kwarg]
177
-
178
- def load_groupings(self,
179
- file,
180
- dimension=None,
181
- path=None):
182
- """Load groupings from a file
183
-
184
- Parameters
185
- ----------
186
- file : str
187
- Name of the file to load
188
- dimension : str, optional
189
- Name of the dimension to load groupings for.
190
- By default (None), the file is interpreted as a preset
191
- of groupings on different dimension.
192
- path : path-like, optional
193
- Path where the file is stored.
194
- By default, the groupings are from the settings dir
195
- in the working dir.
196
- """
197
- def _check_groupings(groupings,dimension):
198
- """Check whether the groupings are consistent with the labels"""
199
- for key in groupings.keys():
200
- for item in groupings[key]:
201
- if item not in self.labels[dimension]:
202
- log.warning(
203
- f"Item {item} not found in {dimension} labels"
204
- )
205
- groupings[key].remove(item)
206
- if len(groupings[key])==0:
207
- log.warning(f"Group {key} is empty")
208
- groupings.pop(key)
209
- return groupings
210
-
211
- def load_grouping(file,level,path):
212
- """Load a single grouping file"""
213
- path = os.path.join(path,level)
214
- with open(os.path.join(path,file+'.txt')) as f:
215
- group = f.read().splitlines()
216
- return {file:group}
217
-
218
- if path is None:
219
- path = os.path.join("parameters","groupings")
220
-
221
- #If no dimension is specified, interpret as a preset
222
- output = dict()
223
- if isinstance(file,str):
224
- log.info("Load groupings set from "+path+file)
225
- with open(os.path.join(path,file)) as f:
226
- groupings = yaml.safe_load(f)
227
- elif isinstance(file,dict):
228
- groupings = file
229
- output = self.groupings
230
-
231
- if dimension is None:
232
- dimensions = list(groupings.keys())
233
- output = dict()
234
- for level in dimensions:
235
- if isinstance(groupings[level],dict):
236
- #Case the preset explicitly defines a grouping
237
- groupings[level] = _check_groupings(
238
- groupings[level],level
239
- )
240
- output[level] = groupings[level]
241
- continue
242
- if isinstance(groupings[level],str):
243
- groupings[level] = [groupings[level]]
244
- if isinstance(groupings[level],list):
245
- #Otherwise, interpret as a list of groupings
246
- output[level] = dict()
247
- covered = []
248
- for item in groupings[level]:
249
- #Load all groupings
250
- groups= load_grouping(
251
- item,level,path
252
- )
253
- if any([group in covered for group in groups]):
254
- duplicate = [
255
- group for group in groups if group in covered
256
- ]
257
- log.warning("The following items are covered in "+\
258
- "multiple groupings: "+duplicate)
259
- covered += groups
260
- output[level][item] = groups
261
- return output
262
-
263
- def set_labels(self,labels):
264
- """
265
- Update the labels attribute of the loader
266
-
267
- Parameters
268
- ----------
269
- labels : dict of str:list of str
270
- Labels of the axes
271
- """
272
- self.labels = labels
273
-
274
- def available_parts(self,**kwargs):
275
- """
276
- Return the available parts in the MRIO data
277
- """
278
- if self.file is None:
279
- raise FileNotFoundError("No file was provided.")
280
-
281
- def check_instructions(self,**kwargs):
282
- """
283
- Interpret the file argument for loading a part.
284
-
285
- This method solves the ambiguity between data files and optional
286
- .yaml instructions.
287
- If the file argument refers to an instruction file, it is compared
288
- to the current instructions.
289
- If the data file or instruction file differ from the ones currently loaded,
290
- an exception is raised to force a reload.
291
-
292
- Parameters
293
- ----------
294
- file : path-like
295
- User-provided file path
296
- kwargs : additional arguments
297
-
298
- Raises
299
- ------
300
- FileNotFoundError
301
- If the loader needs to be reloaded with new instructions.
302
-
303
- """
304
- #The 'instructions' attribute is used to check if the loader needs to be reloaded
305
- #It contains the reference to the potential yaml file used to load the data
306
- new_instructions = kwargs.get("instructions",None)
307
- ref_instructions = self.metadata.get("instructions",None)
308
- if new_instructions is not None and ref_instructions != new_instructions:
309
- #If the instructions differ from the current ones,
310
- #trigger a reload of the loader
311
- log.error("The loader needs to be reloaded with new instructions.")
312
- raise FileNotFoundError("The loader needs to be reloaded with new instructions.")
@@ -1,96 +0,0 @@
1
- """
2
- Initialize the appropriate loader based on the provided parameters.
3
- """
4
- import os
5
- import yaml
6
- from mrio_toolbox.utils.loaders._nc_loader import NetCDF_Loader
7
- from mrio_toolbox.utils.loaders._parameter_loader import Parameter_Loader
8
- from mrio_toolbox.utils.loaders._pandas_loader import Pandas_Loader
9
- from mrio_toolbox.utils.loaders._loader import Loader
10
- import logging
11
-
12
- log = logging.getLogger(__name__)
13
-
14
- def make_loader(**kwargs):
15
- """
16
- Initialize the appropriate loader based on the provided parameters.
17
-
18
- If a file or data_file is provided,
19
- the function will attempt to determine the appropriate loader based on the file extension.
20
-
21
- Namely:
22
- - .nc files are loaded using the NetCDF_Loader
23
- - .yaml files are interpreted as loading instructions
24
-
25
- All non-netCDF files are loaded using the Parameter_Loader.
26
- """
27
- file = kwargs.get("file",None)
28
- if file is not None:
29
- file = os.path.abspath(file) # Avoid issue with UNIX/windows path
30
- extension = kwargs.get("extension",None)
31
-
32
- if extension is None:
33
- if file is None:
34
- log.info("No file or extension provided.")
35
- log.info("An empty loader will be created.")
36
- return Loader()
37
- extension = os.path.splitext(file)[1]
38
- if extension == "":
39
- log.error("File extension missing.")
40
- raise ValueError("File extension missing.")
41
-
42
- if extension == "":
43
- log.error("File extension missing.")
44
- raise ValueError("File extension missing.")
45
- if extension == ".nc":
46
- return NetCDF_Loader(**kwargs)
47
- if extension in [".yaml",".yml"]:
48
- return load_from_yaml(**kwargs)
49
- if extension in [".npy",".txt"]:
50
- return Parameter_Loader(**kwargs)
51
- if extension in [".csv"]:
52
- if "loader_kwargs" in kwargs:
53
- pandas = kwargs["loader_kwargs"].pop(
54
- "pandas",False
55
- )
56
- if pandas:
57
- return Pandas_Loader(**kwargs)
58
- return Parameter_Loader(**kwargs)
59
- if extension == ".xlsx":
60
- return Pandas_Loader(**kwargs)
61
- log.error(f"File extension {extension} not supported.")
62
-
63
- def load_from_yaml(**kwargs):
64
- """
65
- Create a loader based on yaml file instructions.
66
-
67
- Parameters
68
- ----------
69
- file : path-like
70
- Full path to the .yaml file
71
- """
72
- instructions = kwargs.pop("file")
73
- log.info("Get loading instructions from: "+instructions)
74
- with open(instructions) as f:
75
- parameters = yaml.safe_load(f)
76
- for kwarg in kwargs:
77
- #Override parameters with kwargs
78
- log.debug(f"Override file parameter {kwarg} with explicit parameter {kwargs[kwarg]}")
79
- parameters[kwarg] = kwargs[kwarg]
80
-
81
- # Error handling
82
- if "path" not in parameters.keys():
83
- if "file" not in parameters.keys():
84
- log.info("No path provided, using current working directory instead")
85
- parameters["path"] = os.getcwd()
86
- elif not os.path.isdir(parameters["path"]):
87
- log.error("Provided path is not a directory")
88
- raise ValueError("Provided path is not a directory")
89
-
90
-
91
- return make_loader(instructions=instructions,**parameters)
92
-
93
-
94
-
95
-
96
-
@@ -1,184 +0,0 @@
1
- """
2
- Provides the NetCDF_Loader class for loading MRIO data from netCDF files.
3
- """
4
- from mrio_toolbox.utils.loaders._loader import Loader
5
- from mrio_toolbox.utils import converters
6
- import xarray as xr
7
-
8
- import logging
9
- import pandas as pd
10
-
11
- log = logging.getLogger(__name__)
12
-
13
- class NetCDF_Loader(Loader):
14
- """
15
- Class for loading MRIO data from a netCDF file.
16
-
17
- The `NetCDF_Loader` class extends the base `Loader` class to provide
18
- functionality for loading MRIO data stored in netCDF format. It uses the
19
- xarray library to load the data and extract metadata, labels, and groupings.
20
-
21
- Instance variables
22
- ------------------
23
- data : xarray.Dataset
24
- The loaded netCDF data stored as an xarray Dataset.
25
- _available_parts : list
26
- List of available parts in the MRIO data.
27
- metadata : dict
28
- Metadata extracted from the netCDF file.
29
- labels : dict
30
- Labels for the axes of the MRIO data.
31
- groupings : dict
32
- Groupings for the labels, defining higher-level aggregations.
33
- file : str or None
34
- Path to the netCDF file being loaded.
35
- loader_kwargs : dict
36
- Additional parameters passed to the xarray loader.
37
-
38
- Methods
39
- -------
40
- load_mrio(file=None, **kwargs):
41
- Load a netCDF file into memory and extract metadata.
42
- load_part(file=None, **kwargs):
43
- Load a specific part of the MRIO table.
44
- get_file(file=None, **kwargs):
45
- Get the file to load, updating the current file if necessary.
46
- available_parts(**kwargs):
47
- Return a list of available parts in the MRIO table.
48
- """
49
-
50
- def __init__(
51
- self,
52
- **kwargs
53
- ):
54
- """
55
- Initialize a NetCDF_Loader object.
56
-
57
- Parameters
58
- ----------
59
- loader_kwargs : dict, optional
60
- Parameters passed to the xarray loader.
61
- file : path-like
62
- Full path to the netCDF file.
63
- groupings : dict, optional
64
- Aggregation on labels
65
- **kwargs : dict
66
- Metadata for the MRIO data.
67
- MRIO metadata are passed to associated parts.
68
-
69
- """
70
- self.extract_basic_info(**kwargs)
71
- super().__init__()
72
- self.update_settings(**kwargs)
73
-
74
- def load_mrio(
75
- self,
76
- file = None,
77
- **kwargs
78
- ):
79
- """
80
- Load a netcdf file in the memory.
81
-
82
- This procedure is based on the xarray library.
83
- The xarray dataset is stored in the data attribute.
84
- The loader also extracts all metadata from the file.
85
-
86
- Parameters
87
- ----------
88
- file : path-like, optional
89
- Full path to the file.
90
- If left empty, the file currently initialised is used.
91
-
92
- Raises
93
- ------
94
- ValueError
95
- If the file is not provided.
96
- """
97
-
98
- if file is None:
99
- file = self.file
100
-
101
- if file is None:
102
- raise ValueError("No file provided.")
103
-
104
- log.info(f"Load MRIO data from {file}")
105
- self.data = xr.open_dataset(file, **self.loader_kwargs)
106
- mrio_data,list_of_parts = converters.xarray.make_mrio(self.data)
107
- self._available_parts = list_of_parts
108
- self.update_settings(**mrio_data["data"])
109
-
110
-
111
- def load_part(
112
- self,
113
- file = None,
114
- **kwargs
115
- ):
116
- """
117
- Load a part of the MRIO table.
118
-
119
- Parameters
120
- ----------
121
- name : str
122
- Name of the variable to load
123
- file : path, optional
124
- Full path to the data.
125
- If left empty, the current xarray Dataset is used.
126
-
127
- Returns
128
- -------
129
- dict
130
- Data required to create a Part object
131
- """
132
- self.get_file(file,**kwargs) #Update the file if needed
133
- return converters.xarray.make_part(
134
- self.data,**kwargs
135
- )
136
-
137
- def get_file(self, file=None, **kwargs):
138
- """
139
- Get the file to load.
140
-
141
- Parameters
142
- ----------
143
- file : path-like, optional
144
- User-defined path to the file, by default None
145
-
146
- Returns
147
- -------
148
- path-like
149
- Path to the file to load from
150
-
151
- Raises
152
- ------
153
- ValueError
154
- If no file is provided nor currently loaded
155
-
156
- """
157
- self.check_instructions(**kwargs)
158
- #Check if new instructions are provided
159
-
160
- if file is None and self.file is None:
161
- raise ValueError("No file provided.")
162
-
163
- instructions = self.metadata.get("instructions",None)
164
-
165
- if file != self.file and file != instructions:
166
- #If the file is different from the one currently loaded, the current data is replaced
167
- self.load_mrio(file)
168
-
169
- return file
170
-
171
- def available_parts(
172
- self,**kwargs
173
- ):
174
- """
175
- Return a list of available parts in the MRIO table.
176
-
177
- Returns
178
- -------
179
- list
180
- List of available parts
181
- """
182
- return self._available_parts
183
-
184
-
@@ -1,112 +0,0 @@
1
- """
2
- Routine for loading MRIO Parts from .npy and .csv files
3
- """
4
-
5
- import os
6
- import numpy as np
7
- import pandas as pd
8
- import logging
9
- import yaml
10
-
11
- log = logging.getLogger(__name__)
12
-
13
- def load_file(file,extension=None,pandas=False,**kwargs):
14
- """
15
- Load data from a .npy, .txt, .xlsx or .csv file.
16
-
17
- Parameters
18
- ----------
19
- file : path-like
20
- Full path to the file
21
- kwargs : dict
22
- Additional parameters for the loaders
23
-
24
- Returns
25
- -------
26
- data : np.array
27
- Numerical data
28
-
29
- Raises
30
- ------
31
- FileNotFoundError
32
- If the file is not found in the specified path
33
- ValueError
34
- If the file extension is not supported
35
- """
36
- if extension is None:
37
- extension = os.path.splitext(file)[1]
38
- elif os.path.splitext(file)[1] == "":
39
- file = file+extension
40
- elif os.path.splitext(file)[1] != extension:
41
- raise FileNotFoundError(f"File {file} does not match the provided extension {extension}.")
42
- if extension == "":
43
- log.info("No extension provided. Trying .npy, .csv and .txt.")
44
- for loader in [load_npy,load_csv,load_txt,load_xlsx]:
45
- try:
46
- return loader(file)
47
- except FileNotFoundError:
48
- pass
49
- log.error(f"File {file} not found with extensions .npy, .csv or .txt.")
50
- raise FileNotFoundError(f"File {file} not found in the specified path.")
51
- if extension not in [".npy",".csv",".txt",".xlsx",".yaml"]:
52
- log.error(f"File extension {extension} not supported.")
53
- raise ValueError(f"File extension {extension} not supported.\nSupported extensions: .npy, .csv, .txt")
54
- if extension == ".npy":
55
- return load_npy(file,**kwargs)
56
- if extension == ".csv":
57
- return load_csv(file,pandas=pandas,**kwargs)
58
- if extension == ".txt":
59
- return load_txt(file,**kwargs)
60
- if extension == ".xlsx":
61
- return load_xlsx(file,**kwargs)
62
- if extension == ".yaml":
63
- return load_yaml(file,**kwargs)
64
-
65
- def load_yaml(file,**kwargs):
66
- if os.path.splitext(file)[1] == "":
67
- file = file+".yaml"
68
- with open(file,"r") as f:
69
- return yaml.safe_load(f)
70
-
71
- def load_npy(file,**kwargs):
72
- if os.path.splitext(file)[1] == "":
73
- file = file+".npy"
74
- return np.load(file,**kwargs)
75
-
76
- def load_csv(file,pandas=False,**kwargs):
77
- """
78
- Read a .csv file using pandas or numpy.
79
-
80
- If pandas, the file is read using pandas,
81
- such that labels are automatically extracted.
82
- Otherwise, the file is read using numpy and labels are loaded from another file.
83
- """
84
- if os.path.splitext(file)[1] == "":
85
- file = file+".csv"
86
- delimiter = kwargs.get("delimiter",",")
87
- if pandas:
88
- #Remove header if not provided
89
- #This is to avoid issues with the label autodetection
90
- kwargs["header"] = kwargs.get("header",None)
91
- return pd.read_csv(file,
92
- **kwargs)
93
- return np.loadtxt(file,delimiter=delimiter,**kwargs)
94
-
95
- def load_txt(file,**kwargs):
96
- if os.path.splitext(file)[1] == "":
97
- file = file+".txt"
98
- delimiter = kwargs.get("delimiter","\t")
99
- try:
100
- return np.loadtxt(file,delimiter=delimiter,**kwargs)
101
- except ValueError:
102
- #If the basic loading fails, it's probably a label file
103
- return np.loadtxt(file,dtype=str,delimiter=delimiter,**kwargs).tolist()
104
-
105
- def load_xlsx(file, **kwargs):
106
- if os.path.splitext(file)[1] == "":
107
- file = file+".xlsx"
108
- #Remove header if not provided
109
- #This is to avoid issues with the label autodetection
110
- kwargs["header"] = kwargs.get("header",None)
111
- return pd.read_excel(file,
112
- **kwargs)