mrio-toolbox 1.0.0__py3-none-any.whl → 1.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mrio-toolbox might be problematic. Click here for more details.
- mrio_toolbox/__init__.py +18 -2
- mrio_toolbox/_parts/_Axe.py +95 -37
- mrio_toolbox/_parts/_Part.py +264 -70
- mrio_toolbox/_parts/__init__.py +4 -0
- mrio_toolbox/_parts/part_operations.py +24 -17
- mrio_toolbox/extractors/__init__.py +20 -0
- mrio_toolbox/extractors/downloaders.py +36 -0
- mrio_toolbox/extractors/emerging/__init__.py +3 -0
- mrio_toolbox/extractors/emerging/emerging_extractor.py +117 -0
- mrio_toolbox/extractors/eora/__init__.py +3 -0
- mrio_toolbox/extractors/eora/eora_extractor.py +132 -0
- mrio_toolbox/extractors/exiobase/__init__.py +3 -0
- mrio_toolbox/extractors/exiobase/exiobase_extractor.py +270 -0
- mrio_toolbox/extractors/extractors.py +79 -0
- mrio_toolbox/extractors/figaro/__init__.py +3 -0
- mrio_toolbox/extractors/figaro/figaro_downloader.py +280 -0
- mrio_toolbox/extractors/figaro/figaro_extractor.py +187 -0
- mrio_toolbox/extractors/gloria/__init__.py +3 -0
- mrio_toolbox/extractors/gloria/gloria_extractor.py +202 -0
- mrio_toolbox/extractors/gtap11/__init__.py +7 -0
- mrio_toolbox/extractors/gtap11/extraction/__init__.py +3 -0
- mrio_toolbox/extractors/gtap11/extraction/extractor.py +129 -0
- mrio_toolbox/extractors/gtap11/extraction/harpy_files/__init__.py +6 -0
- mrio_toolbox/extractors/gtap11/extraction/harpy_files/_header_sets.py +279 -0
- mrio_toolbox/extractors/gtap11/extraction/harpy_files/har_file.py +262 -0
- mrio_toolbox/extractors/gtap11/extraction/harpy_files/har_file_io.py +974 -0
- mrio_toolbox/extractors/gtap11/extraction/harpy_files/header_array.py +300 -0
- mrio_toolbox/extractors/gtap11/extraction/harpy_files/sl4.py +229 -0
- mrio_toolbox/extractors/gtap11/gtap_mrio/__init__.py +6 -0
- mrio_toolbox/extractors/gtap11/gtap_mrio/mrio_builder.py +158 -0
- mrio_toolbox/extractors/icio/__init__.py +3 -0
- mrio_toolbox/extractors/icio/icio_extractor.py +121 -0
- mrio_toolbox/extractors/wiod/__init__.py +3 -0
- mrio_toolbox/extractors/wiod/wiod_extractor.py +143 -0
- mrio_toolbox/mrio.py +254 -94
- mrio_toolbox/msm/__init__.py +6 -0
- mrio_toolbox/msm/multi_scale_mapping.py +863 -0
- mrio_toolbox/utils/__init__.py +3 -0
- mrio_toolbox/utils/converters/__init__.py +3 -0
- mrio_toolbox/utils/converters/pandas.py +8 -6
- mrio_toolbox/utils/converters/xarray.py +2 -13
- mrio_toolbox/utils/formatting/__init__.py +0 -0
- mrio_toolbox/utils/formatting/formatter.py +528 -0
- mrio_toolbox/utils/loaders/__init__.py +4 -0
- mrio_toolbox/utils/loaders/_loader.py +60 -4
- mrio_toolbox/utils/loaders/_loader_factory.py +22 -1
- mrio_toolbox/utils/loaders/_nc_loader.py +37 -1
- mrio_toolbox/utils/loaders/_pandas_loader.py +29 -3
- mrio_toolbox/utils/loaders/_parameter_loader.py +61 -16
- mrio_toolbox/utils/savers/__init__.py +3 -0
- mrio_toolbox/utils/savers/_path_checker.py +25 -7
- mrio_toolbox/utils/savers/_to_folder.py +6 -1
- mrio_toolbox/utils/savers/_to_nc.py +26 -18
- {mrio_toolbox-1.0.0.dist-info → mrio_toolbox-1.1.1.dist-info}/METADATA +10 -6
- mrio_toolbox-1.1.1.dist-info/RECORD +59 -0
- {mrio_toolbox-1.0.0.dist-info → mrio_toolbox-1.1.1.dist-info}/WHEEL +1 -1
- mrio_toolbox-1.0.0.dist-info/RECORD +0 -26
- {mrio_toolbox-1.0.0.dist-info → mrio_toolbox-1.1.1.dist-info/licenses}/LICENSE +0 -0
- {mrio_toolbox-1.0.0.dist-info → mrio_toolbox-1.1.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Extracts the data from the .har file and saves it as .npy files.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from mrio_toolbox.extractors.gtap11.extraction.harpy_files.har_file import HarFileObj
|
|
7
|
+
|
|
8
|
+
import xarray as xr
|
|
9
|
+
import logging
|
|
10
|
+
import mrio_toolbox.extractors.gtap11.gtap_mrio.mrio_builder as mrio_builder
|
|
11
|
+
|
|
12
|
+
log = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
def extract_gtap11(
|
|
15
|
+
year = 2017,
|
|
16
|
+
source = "",
|
|
17
|
+
files = "all",
|
|
18
|
+
name = "gtap",
|
|
19
|
+
build_io = True,
|
|
20
|
+
save_raw = True
|
|
21
|
+
):
|
|
22
|
+
"""
|
|
23
|
+
Extract GTAP data from .har files and save it as .nc file.
|
|
24
|
+
|
|
25
|
+
Parameters
|
|
26
|
+
----------
|
|
27
|
+
source : str, optional
|
|
28
|
+
Location of the source files, by default the current directory
|
|
29
|
+
save_raw : str, optional
|
|
30
|
+
Where to save the raw files, by default the current directory
|
|
31
|
+
files : list, optional
|
|
32
|
+
List of files to extract, by default, all files in the source directory
|
|
33
|
+
name : str, optional
|
|
34
|
+
Name under which the files are saved, by default "gtap"
|
|
35
|
+
build_io : bool, optional
|
|
36
|
+
Whether to build the input-output table, by default False
|
|
37
|
+
|
|
38
|
+
Raises
|
|
39
|
+
------
|
|
40
|
+
NotADirectoryError
|
|
41
|
+
Exception raised when the source directory does not exist
|
|
42
|
+
FileNotFoundError
|
|
43
|
+
Exception raised when the destination directory does not contain any .har files
|
|
44
|
+
If only some files are missing, a warning is issued but the extraction continues
|
|
45
|
+
"""
|
|
46
|
+
#Check source path
|
|
47
|
+
if not os.path.exists(source):
|
|
48
|
+
log.error(f"{os.path.abspath(source)} does not exist.")
|
|
49
|
+
raise NotADirectoryError(f"{os.path.abspath(source)} does not exist.")
|
|
50
|
+
|
|
51
|
+
log.info(f"Extracting GTAP data from {os.path.abspath(source)}")
|
|
52
|
+
|
|
53
|
+
#List available har files
|
|
54
|
+
har_files = [f for f in os.listdir(source) if f.endswith(".har")]
|
|
55
|
+
if len(har_files) == 0:
|
|
56
|
+
log.error(f"No .har files found in {os.path.abspath(source)}")
|
|
57
|
+
raise FileNotFoundError(f"No .har files found in {os.path.abspath(source)}")
|
|
58
|
+
log.info(f"Found {len(har_files)} .har files in {os.path.abspath(source)}")
|
|
59
|
+
|
|
60
|
+
if isinstance(files, str) and files == "all":
|
|
61
|
+
files = har_files
|
|
62
|
+
|
|
63
|
+
ds = xr.Dataset()
|
|
64
|
+
for file in files:
|
|
65
|
+
if file not in har_files:
|
|
66
|
+
log.warning(f"{file} not found in {os.path.abspath(source)}")
|
|
67
|
+
continue
|
|
68
|
+
log.info(f" Extracting {file}")
|
|
69
|
+
filename = os.path.join(source, file)
|
|
70
|
+
data = HarFileObj(filename = filename)
|
|
71
|
+
variables = data.getHeaderArrayNames()
|
|
72
|
+
for variable in variables:
|
|
73
|
+
log.info(f" Extracting {variable}")
|
|
74
|
+
ds = convert_variable(data, variable, ds)
|
|
75
|
+
|
|
76
|
+
if save_raw:
|
|
77
|
+
log.info(f"Saving {name}_raw.nc")
|
|
78
|
+
if save_raw is True:
|
|
79
|
+
#By default, save in the source folder
|
|
80
|
+
save_raw = source
|
|
81
|
+
ds.to_netcdf(os.path.join(save_raw, f"{name}_raw.nc"))
|
|
82
|
+
if build_io:
|
|
83
|
+
log.info("Building input-output table")
|
|
84
|
+
mrio = mrio_builder.build_io(ds)
|
|
85
|
+
mrio.name = f"{name}_{year}"
|
|
86
|
+
return mrio
|
|
87
|
+
|
|
88
|
+
def convert_variable(file, variable, ds):
|
|
89
|
+
"""
|
|
90
|
+
Convert a variable from a .har file to a xarray DataArray.
|
|
91
|
+
|
|
92
|
+
Data descriptor variables are stored as attributes of the dataset.
|
|
93
|
+
|
|
94
|
+
Parameters
|
|
95
|
+
----------
|
|
96
|
+
file : harpy.HarFileObj
|
|
97
|
+
Representation of the .har file
|
|
98
|
+
variable : str
|
|
99
|
+
Name of the variable to extract
|
|
100
|
+
ds : xarray.Dataset
|
|
101
|
+
Dataset to which the variable is added
|
|
102
|
+
|
|
103
|
+
Returns
|
|
104
|
+
-------
|
|
105
|
+
ds : xarray.Dataset
|
|
106
|
+
Updated dataset
|
|
107
|
+
"""
|
|
108
|
+
data = file[variable]
|
|
109
|
+
coords = dict()
|
|
110
|
+
dims = []
|
|
111
|
+
for dim in data.sets.dims:
|
|
112
|
+
if dim.name is None:
|
|
113
|
+
#Intercepts descriptive variables
|
|
114
|
+
log.info(f" {variable} is a descriptive variable")
|
|
115
|
+
ds.attrs[variable] = str(data.array)
|
|
116
|
+
return ds
|
|
117
|
+
dims.append(dim.name)
|
|
118
|
+
coords[dim.name] = dim.dim_desc
|
|
119
|
+
ds[variable] = xr.DataArray(
|
|
120
|
+
data.array,
|
|
121
|
+
coords = coords,
|
|
122
|
+
dims = dims,
|
|
123
|
+
attrs = {
|
|
124
|
+
"long_name": data.long_name,
|
|
125
|
+
"name" : variable
|
|
126
|
+
}
|
|
127
|
+
)
|
|
128
|
+
return ds
|
|
129
|
+
|
|
@@ -0,0 +1,279 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Created on Jun 29 14:46:48 2018
|
|
3
|
+
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
from typing import List, Union
|
|
8
|
+
|
|
9
|
+
class _HeaderSet:
|
|
10
|
+
"""
|
|
11
|
+
This class is used to represent sets associated with header arrays.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
# Status is unknown elements but set, element index, known set elements, no set just numeric
|
|
15
|
+
_valid_status = ["u", "e", "k", "n"]
|
|
16
|
+
_genSetID = 0
|
|
17
|
+
|
|
18
|
+
def __init__(self, name: 'Union[str,None]',
|
|
19
|
+
status: str,
|
|
20
|
+
dim_desc: 'Union[List[str],str,None]',
|
|
21
|
+
dim_size: int):
|
|
22
|
+
|
|
23
|
+
self.name = name
|
|
24
|
+
self.status = status
|
|
25
|
+
self.dim_desc = dim_desc
|
|
26
|
+
if not dim_desc is None:
|
|
27
|
+
if any([len(el) > 12 for el in dim_desc]):
|
|
28
|
+
raise ValueError("Set Element too long (maximum 12 Characters for set Elements)")
|
|
29
|
+
self.elemPosDict={} if self.dim_desc is None else dict(zip( [elem.strip().lower() for elem in dim_desc], range(0,len(self.dim_desc))))
|
|
30
|
+
self.dim_size = dim_size
|
|
31
|
+
|
|
32
|
+
def transform_index(self,index):
|
|
33
|
+
if isinstance(index,(str,int)):
|
|
34
|
+
return self.name_to_ind(index), None
|
|
35
|
+
|
|
36
|
+
elif isinstance(index,slice):
|
|
37
|
+
newslice=self.convertSlice(index)
|
|
38
|
+
npIndList=list(range(self.dim_size))[newslice]
|
|
39
|
+
SetName=self._newname() if not all(p is None for p in [newslice.start,newslice.stop,newslice.step]) else self.name
|
|
40
|
+
if self.dim_desc:
|
|
41
|
+
return npIndList, _HeaderSet(SetName, self.status, self.dim_desc[newslice], len(npIndList))
|
|
42
|
+
else:
|
|
43
|
+
return npIndList, _HeaderSet(SetName, self.status, dim_desc=None, dim_size=len(npIndList))
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
elif isinstance(index,list):
|
|
47
|
+
useElem=self.status in ["e","k"]
|
|
48
|
+
setElList=[] if useElem else None
|
|
49
|
+
npIndList=[]
|
|
50
|
+
for ind in index:
|
|
51
|
+
if isinstance(ind, (str,int) ):
|
|
52
|
+
idx=self.name_to_ind(ind)
|
|
53
|
+
npIndList.append(idx)
|
|
54
|
+
if useElem: setElList.append(self.dim_desc[idx])
|
|
55
|
+
elif isinstance(ind,slice):
|
|
56
|
+
newslice = self.convertSlice(ind)
|
|
57
|
+
npIndList.append(list(range(self.dim_size))[newslice])
|
|
58
|
+
if useElem: setElList.extend(self.dim_desc[newslice])
|
|
59
|
+
else:
|
|
60
|
+
raise TypeError("Only slice, str, int allowed in list indexing")
|
|
61
|
+
if useElem:
|
|
62
|
+
if len(set(setElList)) != len(setElList):
|
|
63
|
+
raise ValueError("Indexing leads to duplicate set elements which is not permitted")
|
|
64
|
+
if setElList != self.dim_desc:
|
|
65
|
+
return npIndList, _HeaderSet(self._newname(), self.status, setElList, len(npIndList))
|
|
66
|
+
else:
|
|
67
|
+
return npIndList, self
|
|
68
|
+
else:
|
|
69
|
+
return npIndList, _HeaderSet(self._newname(), self.status, None, len(npIndList))
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def convertSlice(self,index):
|
|
73
|
+
if not isinstance(index.step, int) and not index.step is None:
|
|
74
|
+
raise ValueError("step in slice has to be integer")
|
|
75
|
+
start=self.name_to_ind(index.start)
|
|
76
|
+
start= None if start==0 else start
|
|
77
|
+
stop = self.name_to_ind(index.stop)
|
|
78
|
+
stop = None if stop==self.dim_size else stop
|
|
79
|
+
step= None if index.step == 1 else index.step
|
|
80
|
+
return slice(start, stop, step)
|
|
81
|
+
|
|
82
|
+
def name_to_ind(self,idx):
|
|
83
|
+
if idx is None:
|
|
84
|
+
return None
|
|
85
|
+
elif isinstance(idx,str):
|
|
86
|
+
if idx.strip().lower() in self.elemPosDict:
|
|
87
|
+
return self.elemPosDict[idx.strip().lower()]
|
|
88
|
+
else:
|
|
89
|
+
raise ValueError("Element not in set")
|
|
90
|
+
elif isinstance(idx,int):
|
|
91
|
+
if idx >= self.dim_size:
|
|
92
|
+
raise ValueError("Index Out Of bounds")
|
|
93
|
+
return idx
|
|
94
|
+
|
|
95
|
+
def _newname(self):
|
|
96
|
+
_HeaderSet._genSetID+=1
|
|
97
|
+
return "S@"+str(_HeaderSet._genSetID)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
class _HeaderDims:
|
|
101
|
+
|
|
102
|
+
def __init__(self, setList):
|
|
103
|
+
self._dims=setList
|
|
104
|
+
|
|
105
|
+
@staticmethod
|
|
106
|
+
def fromShape(shape):
|
|
107
|
+
setList=[_HeaderSet(None, 'n', None, dim) for dim in shape]
|
|
108
|
+
return _HeaderDims(setList)
|
|
109
|
+
|
|
110
|
+
@staticmethod
|
|
111
|
+
def fromSetShape(sets, setElDict, shape):
|
|
112
|
+
setObjList=[]
|
|
113
|
+
lowerDict=dict(zip([key.strip().lower() for key in setElDict.keys()], setElDict.keys() ))
|
|
114
|
+
for idim, setName in enumerate(sets):
|
|
115
|
+
lowSet=setName.strip().lower()
|
|
116
|
+
if lowSet in lowerDict:
|
|
117
|
+
setObjList.append(_HeaderSet(setName,'k',setElDict[lowerDict[lowSet]],shape[idim]))
|
|
118
|
+
else:
|
|
119
|
+
setObjList.append(_HeaderSet(setName, 'u', None, shape[idim]))
|
|
120
|
+
return _HeaderDims(setObjList)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
@property
|
|
124
|
+
def dims(self) -> List[_HeaderSet]:
|
|
125
|
+
return self._dims
|
|
126
|
+
|
|
127
|
+
@dims.setter
|
|
128
|
+
def dims(self, obj) -> None:
|
|
129
|
+
self._dims = obj
|
|
130
|
+
|
|
131
|
+
def ndim(self):
|
|
132
|
+
"""
|
|
133
|
+
Number of dimensions
|
|
134
|
+
"""
|
|
135
|
+
return len(self._dims)
|
|
136
|
+
|
|
137
|
+
def defined(self):
|
|
138
|
+
"""
|
|
139
|
+
Tells whether dimensensions have sets defined or are just array like
|
|
140
|
+
"""
|
|
141
|
+
return not all([dim.name is None for dim in self._dims])
|
|
142
|
+
|
|
143
|
+
@property
|
|
144
|
+
def setNames(self):
|
|
145
|
+
return [dim.name for dim in self.dims]
|
|
146
|
+
|
|
147
|
+
@setNames.setter
|
|
148
|
+
def setNames(self, sNames):
|
|
149
|
+
if not isinstance(sNames,list): raise TypeError("set Names needs to be given as a list of strings")
|
|
150
|
+
if len(sNames) != len(self.dims) : raise ValueError("wrong length of set List. Header is rank "+str(len(self.dims))+ "but received list size "+ len(sNames))
|
|
151
|
+
for name in sNames:
|
|
152
|
+
if not isinstance(name,str): raise TypeError("set Names contains a non string object: "+str(name))
|
|
153
|
+
if len(name.strip()) > 12 : raise ValueError("Set names are limited to 12 characters. received '"+name+"'")
|
|
154
|
+
for newName, dim in zip(sNames,self.dims):
|
|
155
|
+
dim.name=newName.strip()
|
|
156
|
+
|
|
157
|
+
@property
|
|
158
|
+
def setElements(self):
|
|
159
|
+
return [dim.dim_desc for dim in self.dims]
|
|
160
|
+
|
|
161
|
+
@property
|
|
162
|
+
def shape(self):
|
|
163
|
+
return tuple([sets.dim_size for sets in self._dims])
|
|
164
|
+
|
|
165
|
+
def __str__(self):
|
|
166
|
+
outputstr=""
|
|
167
|
+
for setDim in self._dims:
|
|
168
|
+
if setDim.status in "keu":
|
|
169
|
+
outputstr+=" " + setDim.name.ljust(12) + ": \n"
|
|
170
|
+
else:
|
|
171
|
+
outputstr+=" "+"Not Specified"
|
|
172
|
+
if setDim.status in "ke":
|
|
173
|
+
outputstr+=" " +", ".join(setDim.dim_desc) + "\n"
|
|
174
|
+
return outputstr
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def compatible_shape(self,other):
|
|
179
|
+
return self.shape == other
|
|
180
|
+
|
|
181
|
+
def matchSets(self,sets=None, shape:tuple=None):
|
|
182
|
+
if sets is None and shape is None : raise KeyError("Only one argument allowed")
|
|
183
|
+
newSets = []
|
|
184
|
+
if not sets is None:
|
|
185
|
+
# Try to match the shape of the dimensions
|
|
186
|
+
iset=len(self.dims)-1; jset=len(sets.dims)-1
|
|
187
|
+
while iset >=0 and jset >=0:
|
|
188
|
+
if jset < 0 :
|
|
189
|
+
newSets.append(self.dims[iset])
|
|
190
|
+
iset -=1
|
|
191
|
+
elif iset < 0 :
|
|
192
|
+
newSets.append(sets.dims[jset])
|
|
193
|
+
jset -=1
|
|
194
|
+
if self.dims[iset].dim_size == sets.dims[jset].dim_size or self.dims[iset].dim_size == 1 or sets.dims[jset].dim_size == 1:
|
|
195
|
+
if self.dims[iset].status != 'n':
|
|
196
|
+
newSets.append(self.dims[iset])
|
|
197
|
+
else:
|
|
198
|
+
newSets.append(sets.dims[jset])
|
|
199
|
+
iset-= 1 ; jset -=1
|
|
200
|
+
newSets.reverse()
|
|
201
|
+
elif not shape is None:
|
|
202
|
+
iset = len(self.dims) - 1; jset=len(shape)-1
|
|
203
|
+
while iset >=0 and jset >=0:
|
|
204
|
+
if jset < 0 :
|
|
205
|
+
newSets.append(self.dims[iset])
|
|
206
|
+
iset -=1
|
|
207
|
+
elif iset < 0 :
|
|
208
|
+
newSets.append(_HeaderSet(None , 'n' , None, shape[jset]))
|
|
209
|
+
jset -=1
|
|
210
|
+
if self.dims[iset].dim_size == shape[jset] or self.dims[iset].dim_size == 1 or shape[jset] == 1:
|
|
211
|
+
newSets.append(self.dims[iset])
|
|
212
|
+
iset-= 1 ; jset -=1
|
|
213
|
+
newSets.reverse()
|
|
214
|
+
else:
|
|
215
|
+
return KeyError("Either sets o shape have to be defined")
|
|
216
|
+
|
|
217
|
+
return _HeaderDims(newSets)
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def transform_index(self,index_tuple):
|
|
221
|
+
if not isinstance(index_tuple,tuple):
|
|
222
|
+
index_tuple=(index_tuple,)
|
|
223
|
+
|
|
224
|
+
trueLen=len([x for x in index_tuple if x is not None])
|
|
225
|
+
if trueLen != self.ndim() and not Ellipsis in index_tuple:
|
|
226
|
+
raise ValueError("Rank mismatch in indexing")
|
|
227
|
+
if index_tuple.count(Ellipsis)>1:
|
|
228
|
+
raise ValueError("Only single Ellipsis (...) allowed in indexing")
|
|
229
|
+
|
|
230
|
+
thisIndex=[]
|
|
231
|
+
for ind in index_tuple:
|
|
232
|
+
if ind == Ellipsis:
|
|
233
|
+
for i in range(0,self.ndim()-trueLen+1):
|
|
234
|
+
thisIndex.append(slice(None,None,None))
|
|
235
|
+
elif isinstance(ind,(list,str,int,slice)) or ind is None:
|
|
236
|
+
thisIndex.append(ind)
|
|
237
|
+
else:
|
|
238
|
+
raise TypeError("Only ...,list,str,int,slice and None allowed as indices")
|
|
239
|
+
|
|
240
|
+
npIndex=[]
|
|
241
|
+
newSets=[]
|
|
242
|
+
|
|
243
|
+
iset=0
|
|
244
|
+
for index in thisIndex:
|
|
245
|
+
if index is None:
|
|
246
|
+
npInd=np.newaxis
|
|
247
|
+
newSet=_HeaderSet(None , 'n' , None, 1)
|
|
248
|
+
else:
|
|
249
|
+
setDim=self._dims[iset]
|
|
250
|
+
npInd, newSet = setDim.transform_index(index)
|
|
251
|
+
iset+=1
|
|
252
|
+
npIndex.append(npInd)
|
|
253
|
+
newSets.append(newSet)
|
|
254
|
+
|
|
255
|
+
rankIndex=tuple([slice(None) if isinstance(ind,list) or ind is None else 0 for ind in npIndex])
|
|
256
|
+
newSets = [setDim for ri, setDim in zip(rankIndex,newSets) if ri != 0]
|
|
257
|
+
return self._makeNPIndex(npIndex), rankIndex, _HeaderDims(newSets)
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
@staticmethod
|
|
261
|
+
def _makeNPIndex(indexList):
|
|
262
|
+
newinds = []
|
|
263
|
+
for i, item in enumerate(indexList):
|
|
264
|
+
if isinstance(item, list):
|
|
265
|
+
newinds.append(item)
|
|
266
|
+
elif isinstance(item,int):
|
|
267
|
+
newinds.append([item])
|
|
268
|
+
|
|
269
|
+
numpyInd = list(np.ix_(*newinds))
|
|
270
|
+
newinds=[]
|
|
271
|
+
for item in indexList:
|
|
272
|
+
if not item is None:
|
|
273
|
+
newinds.append(numpyInd.pop(0))
|
|
274
|
+
else:
|
|
275
|
+
newinds.append(None)
|
|
276
|
+
|
|
277
|
+
return tuple(newinds)
|
|
278
|
+
|
|
279
|
+
|
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
"""
|
|
2
|
+
.. autoclass: HarFileObj
|
|
3
|
+
:members:
|
|
4
|
+
|
|
5
|
+
Created on Mar 12 09:53:27 2018
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from .har_file_io import HarFileIO, HarFileInfoObj
|
|
9
|
+
from .header_array import HeaderArrayObj
|
|
10
|
+
from collections import OrderedDict
|
|
11
|
+
from typing import TypeVar, List, Union
|
|
12
|
+
from os import path
|
|
13
|
+
import warnings
|
|
14
|
+
TypeHarFileObj = TypeVar('TypeHarFileObj', bound='HarFileObj')
|
|
15
|
+
|
|
16
|
+
class HarFileObj(object):
|
|
17
|
+
"""
|
|
18
|
+
HAR file object - essentially a memory representation of a HAR file.
|
|
19
|
+
|
|
20
|
+
``HarFileObj`` stores a `list` of ``harpy.HeaderArrayObj`` in ``self``.
|
|
21
|
+
Each ``harpy.HeaderArrayObj`` corresponds to a header-array.
|
|
22
|
+
If ``HarFileObj`` is provided with ``filename``, then the header-arrays in that file will be loaded - i.e., each ``harpy.HeaderArrayObj`` in ``self`` will correspond to a header-array in that file.
|
|
23
|
+
|
|
24
|
+
Access to the ``HarFileObj`` is provided in a dict-like style: `__getitem__`, `__setitem__`, `__delitem__`, and `__contains__` are implemented. Each can take list arguments and returns results as a list.
|
|
25
|
+
Note that all methods are case insensitive with respect to header names.
|
|
26
|
+
|
|
27
|
+
Example: given file ex.har with Headers HD1 and HD2::
|
|
28
|
+
|
|
29
|
+
>>> from harpy import HarFileObj
|
|
30
|
+
>>> thisHar = HarFileObj("ex.har")
|
|
31
|
+
>>> headersOnFile = thisHar.getHeaderArrayNames() # ["HD1","HD2"]
|
|
32
|
+
>>> hd1Head = thisHar["HD1"] # obtain the HeaderArrayObj for HD1
|
|
33
|
+
>>> del thisHar["HD1"] # remove HD1 from HarFile
|
|
34
|
+
>>> print("HD1" in thisHar)
|
|
35
|
+
False
|
|
36
|
+
>>> thisHar.writeToDisk() # overwrites ex.har, now only contains HD2
|
|
37
|
+
|
|
38
|
+
Attributes
|
|
39
|
+
----------
|
|
40
|
+
head_arrs : OrderedDict
|
|
41
|
+
Returned/provided as a `list` of ``HeaderArrayObj`` defining all ``HeaderArrayObj`` associated with a file.
|
|
42
|
+
|
|
43
|
+
_hfi : HarFileInfoObj
|
|
44
|
+
Basic info of the HAR file content. This is used in conjunction with `head_arrs` to permit on-the-fly reading of ``HeaderArrayObj`` and thus reduce the memory footprint.
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def __init__(self, filename: str=None):
|
|
49
|
+
self._head_arrs = OrderedDict()
|
|
50
|
+
self.filename=filename
|
|
51
|
+
if isinstance(filename, str):
|
|
52
|
+
if path.isfile(filename):
|
|
53
|
+
self._hfi = HarFileIO.readHarFileInfo(filename)
|
|
54
|
+
else:
|
|
55
|
+
self._hfi = HarFileInfoObj(file=filename)
|
|
56
|
+
|
|
57
|
+
def __getitem__(self, item : 'Union[str, List[str]]' ):
|
|
58
|
+
if isinstance(item,str):
|
|
59
|
+
return self._getHeaderArrayObj(item)
|
|
60
|
+
elif isinstance(item,list):
|
|
61
|
+
if not all([isinstance(myitem,str) for myitem in item]):
|
|
62
|
+
raise TypeError("All items in item must be of type 'str'")
|
|
63
|
+
return self._getHeaderArrayObjs(item)
|
|
64
|
+
else:
|
|
65
|
+
raise TypeError("item must be string or list of strings")
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def __setitem__(self, key: 'Union[str, List[str]]', value: 'Union[HeaderArrayObj, List[HeaderArrayObj]]'):
|
|
69
|
+
if isinstance(key, str) and isinstance(value,HeaderArrayObj):
|
|
70
|
+
self._addHeaderArrayObj(key, value)
|
|
71
|
+
elif isinstance(key, list) and isinstance(value,list):
|
|
72
|
+
if not all([isinstance(mykey,str) for mykey in key]):
|
|
73
|
+
raise TypeError("All items in key must be of type 'str'")
|
|
74
|
+
if not all([isinstance(myval,HeaderArrayObj) for myval in value]):
|
|
75
|
+
raise TypeError("All items in value must be of type 'HeaderArrayObj'")
|
|
76
|
+
self._addHeaderArrayObjs(key, value)
|
|
77
|
+
else:
|
|
78
|
+
raise TypeError("Only combination str-HeaderArrayObj or list(str)-list(HeaderArrayObj) permitted in __getitem__'")
|
|
79
|
+
|
|
80
|
+
return None
|
|
81
|
+
|
|
82
|
+
def __delitem__(self, key):
|
|
83
|
+
if isinstance(key,str):
|
|
84
|
+
if key.strip().upper() in self._head_arrs:
|
|
85
|
+
del self._head_arrs[key.strip().upper()]
|
|
86
|
+
elif isinstance(key,list):
|
|
87
|
+
for mykey in key:
|
|
88
|
+
if mykey in self._head_arrs:
|
|
89
|
+
del self[mykey]
|
|
90
|
+
else:
|
|
91
|
+
raise TypeError("key must be string or list of strings")
|
|
92
|
+
|
|
93
|
+
return None
|
|
94
|
+
|
|
95
|
+
def __contains__(self, key):
|
|
96
|
+
if isinstance(key,str):
|
|
97
|
+
return key.strip().upper() in self._head_arrs
|
|
98
|
+
return False
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def getHeaderArrayNames(self):
|
|
104
|
+
"""
|
|
105
|
+
:return: Returns the name of all ``harpy.HeaderArrayObj()`` stored with ``self``.
|
|
106
|
+
"""
|
|
107
|
+
|
|
108
|
+
if not self._hfi.is_valid(fatal=False):
|
|
109
|
+
warnings.warn("Har file "+self._hfi.filename+" has changed since last access, rereading information")
|
|
110
|
+
self._hfi=HarFileObj(self._hfi.file)._hfi
|
|
111
|
+
self._head_arrs=OrderedDict()
|
|
112
|
+
|
|
113
|
+
return self._hfi.getHeaderArrayNames()
|
|
114
|
+
|
|
115
|
+
def getRealHeaderArrayNames(self):
|
|
116
|
+
"""
|
|
117
|
+
:return: Returns only the names of arrays of type 2D or 7D - i.e. multi-dimensional header arrays of floating point numbers.
|
|
118
|
+
"""
|
|
119
|
+
|
|
120
|
+
if not self._hfi.is_valid():
|
|
121
|
+
warnings.warn("Har file "+self._hfi.filename+" has changed since last access, rereading information")
|
|
122
|
+
self._hfi = HarFileObj(self._hfi.file)._hfi
|
|
123
|
+
self._head_arrs = OrderedDict()
|
|
124
|
+
return [key for key,val in self._hfi.items() if val.data_type in ["RE","RL","2R"]]
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def _getHeaderArrayObj(self, ha_name: str):
|
|
128
|
+
"""
|
|
129
|
+
Retrieve a single ``harpy.HeaderArrayObj``.
|
|
130
|
+
|
|
131
|
+
:param ha_name: The ``"name"`` of the ``harpy.HeaderArrayObj``.
|
|
132
|
+
:return: A ``harpy.HeaderArrayObj``.
|
|
133
|
+
"""
|
|
134
|
+
|
|
135
|
+
if not self._hfi.is_valid(fatal=False):
|
|
136
|
+
warnings.warn("Har file "+self._hfi.filename+" has changed since last access, rereading information")
|
|
137
|
+
self._hfi = HarFileObj(self._hfi.file)._hfi
|
|
138
|
+
self._head_arrs = OrderedDict()
|
|
139
|
+
|
|
140
|
+
if not isinstance(ha_name, str):
|
|
141
|
+
raise TypeError("'ha_name' must be a string.")
|
|
142
|
+
|
|
143
|
+
upname=ha_name.strip().upper()
|
|
144
|
+
if not upname in self._hfi:
|
|
145
|
+
raise KeyError("HeaderArrayObj '%s' does not exist in HarFileObj." % ha_name)
|
|
146
|
+
if not upname in self._head_arrs:
|
|
147
|
+
hnames, haos= HarFileIO.readHeaderArraysFromFile(self._hfi, ha_names=upname)
|
|
148
|
+
self._head_arrs[upname]=haos[0]
|
|
149
|
+
|
|
150
|
+
return self._head_arrs[upname]
|
|
151
|
+
|
|
152
|
+
def _getHeaderArrayObjs(self, ha_names=None):
|
|
153
|
+
"""
|
|
154
|
+
Retrieve a `list` of `harpy.HeaderArrayObj`.
|
|
155
|
+
|
|
156
|
+
:param 'Union[None,str,List[str]]' ha_names: The name or `list` of names of ``harpy.HeaderArrayObj``. If `None` is provided (the default) then all ``harpy.HeaderArrayObj`` are returned.
|
|
157
|
+
:return: `list` of ``harpy.HeaderArrayObj``.
|
|
158
|
+
"""
|
|
159
|
+
|
|
160
|
+
if isinstance(ha_names, str):
|
|
161
|
+
ha_names = [ha_names]
|
|
162
|
+
elif ha_names is None:
|
|
163
|
+
ha_names = self.getHeaderArrayNames()
|
|
164
|
+
|
|
165
|
+
ha_objs = []
|
|
166
|
+
for ha_name in ha_names:
|
|
167
|
+
ha_objs.append(self._getHeaderArrayObj(ha_name))
|
|
168
|
+
return ha_objs
|
|
169
|
+
|
|
170
|
+
def _readHeaderArrayObjs(self, ha_names = None):
|
|
171
|
+
"""
|
|
172
|
+
Reads the header array objects with names ``ha_names`` from ``filename``. If `None` (the default), read all header array objects. `harpy.HeaderArrayObj` are stored in ``self`` and can be retrieved with the ``self.getHeaderArrayObjs()`` method.
|
|
173
|
+
|
|
174
|
+
:param 'Union[None,str,List[str]]' ha_names:
|
|
175
|
+
"""
|
|
176
|
+
hnames, haos = HarFileIO.readHeaderArraysFromFile(self._hfi, ha_names=ha_names)
|
|
177
|
+
self._head_arrs=OrderedDict(zip(hnames, haos))
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def writeToDisk(self, filename: str=None, ha_names=None):
|
|
181
|
+
"""
|
|
182
|
+
:param str filename: Writes `harpy.HeaderArrayObj` with ``ha_names`` to ``filename``. If ``ha_names`` is None, write all the `harpy.HeaderArrayObj` stored in ``self``.
|
|
183
|
+
:param 'Union[None,str,List[str]]' ha_names: The names of the header arrays to write to ``filename``.
|
|
184
|
+
"""
|
|
185
|
+
if filename is None and self.filename is None:
|
|
186
|
+
raise ValueError("No filename specified in write or upon creation, use writeToDisk(filename=YOURFILENAME)")
|
|
187
|
+
if filename is None:
|
|
188
|
+
filename=self.filename
|
|
189
|
+
if ha_names is None:
|
|
190
|
+
ha_names = self.getHeaderArrayNames()
|
|
191
|
+
elif isinstance(ha_names, str):
|
|
192
|
+
ha_names = [ha_names]
|
|
193
|
+
|
|
194
|
+
ha_to_write = self._getHeaderArrayObjs(ha_names)
|
|
195
|
+
|
|
196
|
+
HarFileIO.writeHeaders(filename, ha_names, ha_to_write)
|
|
197
|
+
self._hfi.updateMtime()
|
|
198
|
+
|
|
199
|
+
def _removeHeaderArrayObjs(self, ha_names):
|
|
200
|
+
"""
|
|
201
|
+
TODO: its more of a pop, not a remove, maybe rename
|
|
202
|
+
:param 'Union[str,List[str]]' ha_names: Remove one or more `harpy.HeaderArrayObj` from ``self``.
|
|
203
|
+
"""
|
|
204
|
+
|
|
205
|
+
if isinstance(ha_names, str):
|
|
206
|
+
ha_names = [ha_names]
|
|
207
|
+
|
|
208
|
+
outlist=self._getHeaderArrayObjs(ha_names)
|
|
209
|
+
|
|
210
|
+
for ha_name in ha_names:
|
|
211
|
+
if ha_name.strip().upper() in self._hfi:
|
|
212
|
+
del self._hfi._ha_infos[ha_name.strip().upper()]
|
|
213
|
+
if ha_name.strip().upper() in self._head_arrs:
|
|
214
|
+
del self._head_arrs[ha_name.strip().upper()]
|
|
215
|
+
return outlist
|
|
216
|
+
|
|
217
|
+
def _addHeaderArrayObjs(self, hnames, ha_objs) -> None:
|
|
218
|
+
"""
|
|
219
|
+
:param 'Union[HeaderArrayObj,List[HeaderArrayObj]]' ha_objs: Add one or more `harpy.HeaderArrayObj` to ``self``.
|
|
220
|
+
"""
|
|
221
|
+
|
|
222
|
+
if isinstance(ha_objs, HeaderArrayObj):
|
|
223
|
+
ha_objs = [ha_objs]
|
|
224
|
+
if isinstance(hnames, str):
|
|
225
|
+
hnames = [hnames]
|
|
226
|
+
|
|
227
|
+
for hname, ha_obj in zip(hnames,ha_objs):
|
|
228
|
+
if ha_obj.is_valid():
|
|
229
|
+
self._addHeaderArrayObj(hname, ha_obj)
|
|
230
|
+
|
|
231
|
+
return None
|
|
232
|
+
|
|
233
|
+
def _addHeaderArrayObj(self, hname : str, ha_obj: HeaderArrayObj):
|
|
234
|
+
"""
|
|
235
|
+
:param ha_obj: A `harpy.HeaderArrayObj` object.
|
|
236
|
+
"""
|
|
237
|
+
|
|
238
|
+
if len(hname.strip()) > 4:
|
|
239
|
+
raise HarFileObj.InvalidHeaderArrayName("Name of Header too long")
|
|
240
|
+
|
|
241
|
+
self._hfi.addHAInfo(hname.strip().upper(),0,0)
|
|
242
|
+
self._head_arrs[hname.strip().upper()]= ha_obj
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
@staticmethod
|
|
246
|
+
def _loadFromDisk(filename: str, ha_names: list = None) -> TypeHarFileObj:
|
|
247
|
+
"""Loads a HAR file into memory, returning a HarFileObj.
|
|
248
|
+
|
|
249
|
+
:param filename: The name of the file to load.
|
|
250
|
+
:param ha_names: If provided, only reads headers with the names matching the strings contained in this list. By default, all header arrays are read.
|
|
251
|
+
:return "HarFileObj": Returns ``HarFileObj`` with
|
|
252
|
+
"""
|
|
253
|
+
|
|
254
|
+
hfo = HarFileObj(filename=filename)
|
|
255
|
+
hfo._readHeaderArrayObjs(ha_names=ha_names)
|
|
256
|
+
|
|
257
|
+
return hfo
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
class InvalidHeaderArrayName(ValueError):
|
|
261
|
+
"""Raised if header array name is not exactly four (alphanumeric) characters long."""
|
|
262
|
+
pass
|