mrio-toolbox 1.1.2__py3-none-any.whl → 1.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mrio-toolbox might be problematic. Click here for more details.

Files changed (61) hide show
  1. {mrio_toolbox-1.1.2.dist-info → mrio_toolbox-1.1.3.dist-info}/METADATA +1 -1
  2. mrio_toolbox-1.1.3.dist-info/RECORD +5 -0
  3. mrio_toolbox-1.1.3.dist-info/top_level.txt +1 -0
  4. __init__.py +0 -21
  5. _parts/_Axe.py +0 -539
  6. _parts/_Part.py +0 -1739
  7. _parts/__init__.py +0 -7
  8. _parts/part_operations.py +0 -57
  9. extractors/__init__.py +0 -20
  10. extractors/downloaders.py +0 -36
  11. extractors/emerging/__init__.py +0 -3
  12. extractors/emerging/emerging_extractor.py +0 -117
  13. extractors/eora/__init__.py +0 -3
  14. extractors/eora/eora_extractor.py +0 -132
  15. extractors/exiobase/__init__.py +0 -3
  16. extractors/exiobase/exiobase_extractor.py +0 -270
  17. extractors/extractors.py +0 -81
  18. extractors/figaro/__init__.py +0 -3
  19. extractors/figaro/figaro_downloader.py +0 -280
  20. extractors/figaro/figaro_extractor.py +0 -187
  21. extractors/gloria/__init__.py +0 -3
  22. extractors/gloria/gloria_extractor.py +0 -202
  23. extractors/gtap11/__init__.py +0 -7
  24. extractors/gtap11/extraction/__init__.py +0 -3
  25. extractors/gtap11/extraction/extractor.py +0 -129
  26. extractors/gtap11/extraction/harpy_files/__init__.py +0 -6
  27. extractors/gtap11/extraction/harpy_files/_header_sets.py +0 -279
  28. extractors/gtap11/extraction/harpy_files/har_file.py +0 -262
  29. extractors/gtap11/extraction/harpy_files/har_file_io.py +0 -974
  30. extractors/gtap11/extraction/harpy_files/header_array.py +0 -300
  31. extractors/gtap11/extraction/harpy_files/sl4.py +0 -229
  32. extractors/gtap11/gtap_mrio/__init__.py +0 -6
  33. extractors/gtap11/gtap_mrio/mrio_builder.py +0 -158
  34. extractors/icio/__init__.py +0 -3
  35. extractors/icio/icio_extractor.py +0 -121
  36. extractors/wiod/__init__.py +0 -3
  37. extractors/wiod/wiod_extractor.py +0 -143
  38. mrio.py +0 -899
  39. mrio_toolbox-1.1.2.dist-info/RECORD +0 -59
  40. mrio_toolbox-1.1.2.dist-info/top_level.txt +0 -6
  41. msm/__init__.py +0 -6
  42. msm/multi_scale_mapping.py +0 -863
  43. utils/__init__.py +0 -3
  44. utils/converters/__init__.py +0 -5
  45. utils/converters/pandas.py +0 -244
  46. utils/converters/xarray.py +0 -132
  47. utils/formatting/__init__.py +0 -0
  48. utils/formatting/formatter.py +0 -527
  49. utils/loaders/__init__.py +0 -7
  50. utils/loaders/_loader.py +0 -312
  51. utils/loaders/_loader_factory.py +0 -96
  52. utils/loaders/_nc_loader.py +0 -184
  53. utils/loaders/_np_loader.py +0 -112
  54. utils/loaders/_pandas_loader.py +0 -128
  55. utils/loaders/_parameter_loader.py +0 -386
  56. utils/savers/__init__.py +0 -11
  57. utils/savers/_path_checker.py +0 -37
  58. utils/savers/_to_folder.py +0 -165
  59. utils/savers/_to_nc.py +0 -60
  60. {mrio_toolbox-1.1.2.dist-info → mrio_toolbox-1.1.3.dist-info}/WHEEL +0 -0
  61. {mrio_toolbox-1.1.2.dist-info → mrio_toolbox-1.1.3.dist-info}/licenses/LICENSE +0 -0
@@ -1,974 +0,0 @@
1
- """
2
- Created on Mar 02 10:23:40 2018
3
-
4
- """
5
- from collections import OrderedDict
6
- import io
7
- import struct
8
- import sys
9
- import os
10
- import math
11
- from typing import List,Union,BinaryIO
12
- import numpy as np
13
-
14
- from .header_array import HeaderArrayObj
15
- from ._header_sets import _HeaderSet, _HeaderDims
16
-
17
- # compatibility function for python 2.7/3.x
18
- if sys.version_info < (3,):
19
- is_unicode = False
20
- def tb(x):
21
- return x
22
-
23
- def fb(x):
24
- return x
25
- else:
26
- import codecs
27
- is_unicode = True
28
-
29
- def tb(x):
30
- if not x: return x
31
- try:
32
- return codecs.latin_1_encode(x)[0]
33
- except:
34
- return x
35
-
36
- def fb(x):
37
- if not x: return x
38
- return x.decode('utf-8')
39
-
40
- class HarFileInfoObj(object):
41
-
42
- def __init__(self, file: str=None, ha_infos: 'List[dict]'=None):
43
- """
44
- :param str file: The absolute path to the file.
45
- """
46
- self.filename = os.path.abspath(file)
47
- if os.path.isfile(self.filename):
48
- self._mtime = os.path.getmtime(self.filename)
49
- if ha_infos is None:
50
- self._ha_infos = OrderedDict()
51
-
52
-
53
- def updateMtime(self):
54
- self._mtime = os.path.getmtime(self.filename)
55
-
56
- def addHAInfo(self, name, pos_name, pos_data):
57
- name=name.strip().upper()
58
- self._ha_infos[name] = HarFileInfoObj._HAInfo(name, pos_name, pos_data, parent_har_file=self)
59
-
60
-
61
- def getHeaderArrayNames(self):
62
- return list(self._ha_infos.keys())
63
-
64
- def items(self):
65
- return self._ha_infos.items()
66
-
67
- def __contains__(self, item):
68
- return item in self._ha_infos
69
-
70
- @property
71
- def file(self):
72
- return self.filename
73
-
74
- @file.setter
75
- def file(self, obj):
76
- try:
77
- assert(issubclass(type(obj), str))
78
- except AssertionError:
79
- msg = "'obj' is not a subclass of 'str' ('obj' is of type %s)." % type(obj)
80
- raise TypeError(msg)
81
- self.filename = obj
82
-
83
- @property
84
- def ha_infos(self):
85
- return self._ha_infos
86
-
87
- @ha_infos.setter
88
- def ha_infos(self, obj):
89
- self._ha_infos = obj
90
-
91
- def getHeaderArrayInfo(self, ha_name: str):
92
- if not ha_name.strip().upper() in self._ha_infos:
93
- raise ValueError("'%s' does not exist in har file '%s'." % (ha_name, self.filename))
94
- return self._ha_infos[ha_name.strip().upper()]
95
-
96
- def is_valid(self, fatal=True):
97
- if not os.path.isfile(self.filename):
98
- if fatal:
99
- raise FileNotFoundError("HAR file "+self.filename+" does not exist")
100
- else:
101
- return True
102
- valid= self._mtime == os.path.getmtime(self.filename)
103
- self._mtime = os.path.getmtime(self.filename)
104
- return valid
105
-
106
-
107
- class _HAInfo(object):
108
- """HAInfo is for Header-Array specific information. Any header array written to disk must exist in a HarFile, hence the nesting of ``_HAInfo`` within ``HarFileInfoObj``."""
109
-
110
- def __init__(self, name, pos_name, pos_data, parent_har_file=None):
111
- # TODO: Perform checks on name, pos_name, pos_data
112
- self.name = name
113
- self.pos_name = pos_name
114
- self.pos_data = pos_data
115
- self.parent_har_file = parent_har_file
116
- self.version = 0
117
- self.data_type = None
118
- self.storage_type = None
119
- self.long_name = None
120
- self.file_dims = None
121
- self.sets = None
122
- self.coeff_name = None
123
-
124
-
125
-
126
- class HarFileIO(object):
127
-
128
- V1SupDataTypes = ['1C', '2R', '2I', 'RE', 'RL', 'DE', 'DL'] # Supported data types for Version 1
129
- SupStorageTypes = ['FULL', 'SPSE'] # Supported storage types
130
- MaxDimVersion = [0, 7, 0, 14] # Maximum dimensions for each version???
131
-
132
- def __init__(self):
133
- """
134
- :rtype: HarFileIO
135
- """
136
-
137
- self._HeaderPos = OrderedDict()
138
-
139
- @staticmethod
140
- def readHarFileInfo(filename: str) -> HarFileInfoObj:
141
- """
142
- :param filename: Filename.
143
- :return: An `dict`, with the key-value pairs:
144
-
145
- """
146
- hfiObj = HarFileInfoObj(file=filename)
147
-
148
- with open(filename, "rb") as f:
149
- f.seek(0)
150
- while True:
151
- # Read all header names
152
- pos, name, end_pos = HarFileIO._readHeaderPosName(f)
153
- if not name:
154
- break
155
- hfiObj.addHAInfo(name, pos, end_pos)
156
- hfi=hfiObj.ha_infos[name.strip().upper()]
157
- (hfi.version, hfi.data_type, hfi.storage_type, hfi.long_name, hfi.file_dims) = HarFileIO._getHeaderInfo(f, name)
158
- return hfiObj
159
-
160
- @staticmethod
161
- def _readHeaderPosName(fp: BinaryIO):
162
- """
163
- Identifies position of header name and name itself.
164
-
165
- :param file fp: a file pointer
166
- :return:
167
- """
168
- # type: () -> (int, str)
169
- pos = fp.tell()
170
- data = ''
171
- while True:
172
- Hpos = pos
173
- nbyte = HarFileIO._getEntrySize(fp)
174
- if not nbyte: break
175
- data = fp.read(4).strip()
176
- if data:
177
- data += fp.read(nbyte - 4)
178
- HarFileIO._checkRead(fp, nbyte)
179
- break
180
- data = None
181
- pos = pos + 8 + nbyte
182
- fp.seek(pos - 4)
183
- HarFileIO._checkRead(fp, nbyte)
184
- return Hpos, fb(data), fp.tell()
185
-
186
- @staticmethod
187
- def readHeaderArraysFromFile(hfi: HarFileInfoObj, ha_names: 'Union[None, str, List[str]]' = None, readData=False):
188
-
189
- if ha_names is None:
190
- ha_names = hfi.getHeaderArrayNames()
191
- elif isinstance(ha_names, str):
192
- ha_names = [ha_names]
193
-
194
- haos = []
195
-
196
- for ha_name in ha_names or readData:
197
- haos.append(HarFileIO.readHeader(hfi=hfi, header_name=ha_name))
198
-
199
- return ha_names, haos
200
-
201
-
202
- @staticmethod
203
- def readHeader(hfi: 'HarFileInfoObj', header_name: str):
204
- """
205
-
206
- :param hfi: HarFileMemObj with file information.
207
- :param header_name: The name of the
208
- :return: Header object with data.
209
- """
210
-
211
- hfi.is_valid()
212
- ha_info = hfi.getHeaderArrayInfo(header_name)
213
-
214
- with open(hfi.filename, "rb") as fp:
215
-
216
- try:
217
- fp.seek(ha_info.pos_name)
218
- except KeyError:
219
- raise KeyError("Header '%s' does not exist in file." % header_name)
220
-
221
- fp.seek(ha_info.pos_data)
222
-
223
- (ha_info.version, ha_info.data_type, ha_info.storage_type, ha_info.long_name, ha_info.file_dims) = HarFileIO._getHeaderInfo(fp, header_name)
224
-
225
- # # readHeader methods alter self._DataObj, self.RealDim, self.DataDimension, self.StorageType possibly self.f
226
- if ha_info.version == 1:
227
- ha_info.header_type = "data"
228
- if ha_info.data_type == '1C':
229
- ha_info.array = HarFileIO._read1CArray(fp, file_dims=ha_info.file_dims)
230
- if ha_info.long_name.startswith("Set "):
231
- setList = [_HeaderSet(name=ha_info.long_name.split()[1], status='k',
232
- dim_desc=ha_info.array.tolist(), dim_size=ha_info.file_dims[0])]
233
- else:
234
- setList = [_HeaderSet(name=None, status='n', dim_desc=None, dim_size=ha_info.file_dims[idim]) for idim in range(0, 1)]
235
- ha_info.sets = _HeaderDims(setList)
236
- elif ha_info.data_type == 'RE':
237
- ha_info.has_elements = True
238
- ha_info.array = HarFileIO._readREArray(fp, ha_info, file_dims=ha_info.file_dims)
239
- elif ha_info.data_type == 'RL':
240
- ha_info.has_elements = False
241
- ha_info.array = HarFileIO._readREArray(fp, ha_info, file_dims=ha_info.file_dims,hasSets=False)
242
- elif ha_info.data_type in ['2R', '2I']:
243
- if ha_info.data_type in ['2R']:
244
- data_type = 'f'
245
- else:
246
- data_type = 'i'
247
-
248
- setList = [_HeaderSet(name=None, status='n', dim_desc=None, dim_size=ha_info.file_dims[idim]) for idim in range(0, 2)]
249
- ha_info.sets = _HeaderDims(setList)
250
- ha_info.array = HarFileIO._read2DArray(fp, data_type=data_type,
251
- file_dims=ha_info.file_dims,
252
- storage_type=ha_info.storage_type)
253
-
254
- else:
255
- raise ValueError("Data type '%s' is unsupported." % ha_info.data_type)
256
- else:
257
- raise RuntimeError("Unsupported/unrecognised HAR header version.")
258
-
259
- return HeaderArrayObj.HeaderArrayFromCompiledData(coeff_name=ha_info.coeff_name,
260
- long_name=ha_info.long_name,
261
- array=ha_info.array,
262
- SetDims=ha_info.sets)
263
-
264
- @staticmethod
265
- def _read1CArray(fp, file_dims=None, use_unicode: bool = is_unicode, ):
266
- array = HarFileIO._readCharVec(fp,
267
- itemsize=file_dims[1],
268
- dtype="<U12",
269
- size=(file_dims[0],),
270
- use_unicode=use_unicode)
271
-
272
- return np.ascontiguousarray(array)
273
-
274
- @staticmethod
275
- def _read2DArray(fp, data_type: str = "i", storage_type: str = None, file_dims: tuple = None):
276
- if storage_type == 'SPSE':
277
- raise TypeError('Sparse storage not allowed for 2D data form.')
278
-
279
- if data_type in ["i", "I"]:
280
- data_type = np.int32
281
- data_type_str = "i"
282
- elif data_type in ["f", "F"]:
283
- data_type = np.float32
284
- data_type_str = "f"
285
- else:
286
- raise ValueError(
287
- "Provided argument 'data_type' must be 'i' (integer) or 'f' (float). '%s' was provided." % data_type)
288
-
289
- # Note that 'order' refers to Fortran vs C (i.e. has nothing to do with floats or ints)
290
- array = np.ndarray(shape=file_dims[0:2], dtype=data_type, order='F')
291
-
292
- arraySize = array.size
293
- nread = 0
294
- while nread != arraySize:
295
- nbyte = HarFileIO._getEntrySize(fp)
296
- dataForm = "=4siiiiiii"
297
- V = HarFileIO._unpack_data(fp, dataForm)
298
-
299
- if fb(V[0]) != ' ':
300
- raise RuntimeError("Encountered characters at read2D loop")
301
- if V[2] != array.shape[0]:
302
- raise ValueError("Mismatching row sizes on header")
303
- if V[3] != array.shape[1]:
304
- raise ValueError("Mismatching col sizes on header")
305
-
306
- xsize = V[5] - V[4] + 1
307
- ysize = V[7] - V[6] + 1
308
- ndata = xsize * ysize
309
- nread += ndata
310
- dataForm = "=" + str(ndata) + data_type_str
311
- dat = HarFileIO._unpack_data(fp, dataForm)
312
- array[V[4] - 1:V[5], V[6] - 1:V[7]] = np.array(dat).reshape(xsize, ysize, order='F')
313
-
314
- if nbyte != HarFileIO._getEntrySize(fp):
315
- raise RuntimeError('Header corrupted.')
316
-
317
- # array = HAR_IO._read2DObj(fp, data_type, file_dims)
318
- array = np.ascontiguousarray(array)
319
- return array
320
-
321
- @staticmethod
322
- def _readREArray(fp: BinaryIO, header_info: HarFileInfoObj._HAInfo, file_dims: tuple = None, hasSets=True):
323
- """
324
-
325
- :param fp:
326
- :param header_info:
327
- :param file_dims:
328
- :param hasSets:
329
- :return:
330
- """
331
-
332
- if hasSets:
333
- (header_info.coeff_name, header_info.sets) = HarFileIO._readSets(fp, file_dims=file_dims)
334
- tmpDim = header_info.sets.ndim()
335
- else:
336
- header_info.sets=_HeaderDims([_HeaderSet(name=None, status='n', dim_desc=None, dim_size= file_dims[idim]) for idim in range(0,7)])
337
- tmpDim = 7
338
-
339
- array = np.ndarray(shape=file_dims[0:tmpDim], dtype=np.float32, order='F')
340
- array.fill(0.0)
341
-
342
- if header_info.storage_type == 'FULL':
343
- array = HarFileIO._readREFullObj(fp, array, 'f')
344
- else:
345
- array = HarFileIO._readRESparseObj(fp, array, 'f')
346
- # this is needed for rank 0 objects
347
- myshape=array.shape
348
- return np.ascontiguousarray(array).reshape(myshape)
349
-
350
-
351
- @staticmethod
352
- def _getHeaderInfo(fp: BinaryIO, name):
353
-
354
- nbyte = HarFileIO._getEntrySize(fp)
355
-
356
- secondRecordForm = "=4s2s4s70si"
357
- Record = HarFileIO._unpack_data(fp, secondRecordForm)
358
-
359
- if fb(Record[0]) != ' ':
360
- raise RuntimeError("Encountered characters at first four positions of 2nd Record.")
361
-
362
- if fb(Record[1]) in HarFileIO.V1SupDataTypes:
363
- Version = 1
364
- DataType = fb(Record[1])
365
- else:
366
- raise RuntimeError('Header "' + name + '" is HAR Version ' + fb(Record[1]) +
367
- " format which cannot not be read.\nPlease check for updates of The HARpy module")
368
-
369
- StorageType = fb(Record[2])
370
- if not StorageType in HarFileIO.SupStorageTypes:
371
- raise ValueError('Unknown StorageType "' + StorageType + '" in Header "' + name + '"')
372
-
373
- LongName = fb(Record[3])
374
-
375
- if Record[4] > HarFileIO.MaxDimVersion[Version]:
376
- raise TypeError("Array Rank " + str(Record[4]) + ' in header "' + name + \
377
- '" exceeds maximum permitted dimension for HAR Version' + str(Version))
378
- if 84 + 4 * Record[4] != nbyte:
379
- raise ValueError('Header "' + name + '" is corrupted at dimensions in second Record')
380
-
381
- Sizes = HarFileIO._unpack_data(fp, "=" + ("i" * Record[-1]))
382
-
383
- HarFileIO._checkRead(fp, nbyte)
384
- return Version, DataType, StorageType, LongName, Sizes
385
-
386
- @staticmethod
387
- def _readCharVec(fp: BinaryIO, itemsize:int=None, dtype=None, size:tuple=None, use_unicode=is_unicode):
388
- """
389
-
390
- :param fp:
391
- :param itemsize:
392
- :param dtype:
393
- :param size:
394
- :param is_unicode:
395
- :return:
396
- """
397
-
398
- array = np.chararray(size, itemsize=itemsize, unicode=use_unicode)
399
- Clen = array.itemsize
400
-
401
- if "<U" in str(dtype):
402
- Clen = Clen // 4
403
-
404
- MaxEntry = size[0]
405
- NRec = 100
406
- ndata = 0
407
- dataFormStart = '=4siii'
408
-
409
- while NRec > 1:
410
- nbyte = HarFileIO._getEntrySize(fp)
411
-
412
- V = HarFileIO._unpack_data(fp, dataFormStart)
413
-
414
- if fb(V[0]) != ' ':
415
- raise IOError("Encountered characters at first four positions")
416
-
417
- NRec = V[1]
418
- if V[2] != MaxEntry:
419
- raise ValueError("Different Size than specified")
420
-
421
- if ndata + V[3] > MaxEntry:
422
- raise RuntimeError("More data on Header than declared")
423
-
424
- AllStr = fb(fp.read(V[3] * Clen))
425
-
426
- if nbyte != HarFileIO._getEntrySize(fp):
427
- raise IOError("I/O Error: sizes on 1C header do not match record length")
428
-
429
- for j, i in enumerate(range(0, V[3] * Clen, Clen)):
430
- array[ndata + j] = AllStr[i:i + Clen]
431
- ndata += V[3]
432
-
433
- return np.ascontiguousarray(array)
434
-
435
- @staticmethod
436
- def _readREFullObj(fp, array, dtype):
437
-
438
- nbyte = HarFileIO._getEntrySize(fp)
439
- dataForm = '=4sii'
440
-
441
- V = HarFileIO._unpack_data(fp, dataForm)
442
-
443
- if fb(V[0]) != ' ':
444
- raise Exception("Encountered characters at read7D[1]")
445
-
446
- nrec = V[1]
447
- NDim = V[2]
448
- dataForm = "=" + ('i' * NDim)
449
-
450
- V = HarFileIO._unpack_data(fp, dataForm)
451
-
452
- if nbyte != HarFileIO._getEntrySize(fp):
453
- raise RuntimeError("Header corrupted read7D[0] @ %d" % fp.tell())
454
-
455
- oldshape = array.shape
456
- array = array.flatten('F')
457
- idata = 0
458
- while nrec > 1:
459
- nbyte = HarFileIO._getEntrySize(fp)
460
- dataForm = '4s15i'
461
- V = HarFileIO._unpack_data(fp, dataForm)
462
-
463
- if fb(V[0]) != ' ':
464
- raise RuntimeError("Encountered characters at first four positions at)SetEl")
465
-
466
- if nbyte != HarFileIO._getEntrySize(fp):
467
- raise RuntimeError('read7D data[2] corrupted')
468
-
469
- nbyte = HarFileIO._getEntrySize(fp)
470
- ndata = (nbyte - 8) // struct.calcsize(dtype)
471
- dataForm = '4si' + str(ndata) + dtype
472
- V = HarFileIO._unpack_data(fp, dataForm)
473
-
474
- if nbyte != HarFileIO._getEntrySize(fp):
475
- raise RuntimeError('read7D data[2])corrupted')
476
-
477
- if fb(V[0]) != ' ':
478
- raise RuntimeError("Encountered characters at read7D[2]")
479
-
480
- nrec = V[1]
481
- array[idata:idata + ndata] = V[2:]
482
- idata += ndata
483
- array = array.reshape(oldshape, order='F')
484
- return array
485
-
486
- @staticmethod
487
- def _readRESparseObj(fp:BinaryIO, array: np.ndarray, dtype):
488
- nbyte = HarFileIO._getEntrySize(fp)
489
- dataForm = '=4siii80s'
490
- nrec = 50
491
- V = HarFileIO._unpack_data(fp, dataForm)
492
- if V[2] != 4:
493
- raise ValueError("Can only read integer 4 in read7DSparse7D ")
494
- if V[3] != 4:
495
- raise ValueError("Can only read real 4 in read7DSparse7D ")
496
- if nbyte != HarFileIO._getEntrySize(fp):
497
- raise ValueError('Header corrupted read7DSparse[0]')
498
-
499
- oldshape = array.shape
500
-
501
- array = array.flatten('F')
502
-
503
- while nrec > 1:
504
- nbyte = HarFileIO._getEntrySize(fp)
505
- dataForm = '=4siii'
506
- V = HarFileIO._unpack_data(fp, dataForm)
507
-
508
- if fb(V[0]) != ' ':
509
- raise ValueError("Encountered characters at read7DSparse loop")
510
-
511
- nrec = V[1]
512
- NHere = V[3]
513
- dataForm = '=' + str(NHere) + 'i' + str(NHere) + dtype
514
- V = HarFileIO._unpack_data(fp, dataForm)
515
-
516
- if nbyte != HarFileIO._getEntrySize(fp):
517
- raise ValueError('Header corrupted read7DSparse)[1]')
518
-
519
- for i in range(0, NHere):
520
- array[V[i] - 1] = V[i + NHere]
521
-
522
- array = array.reshape(oldshape, order='F')
523
- return array
524
-
525
- @staticmethod
526
- def _readSetElementInfoRecord(fp):
527
-
528
- SetNames = []
529
- ElementList = []
530
- SetStatus = []
531
-
532
- nbyte = HarFileIO._getEntrySize(fp)
533
-
534
- # read the data, has to be in chunks as it is dependent on interanl size specifications
535
- dataForm = '=' + '4siii12si'
536
-
537
- V = HarFileIO._unpack_data(fp, dataForm)
538
-
539
- if fb(V[0]) != ' ':
540
- raise RuntimeError("Encountered characters at first four positions at SetEl")
541
-
542
- NSets = V[3]
543
- Coefficient = fb(V[4])
544
- setKnown=V[5]!=0
545
- if not setKnown:
546
- dataForm = '=i'
547
- else:
548
- dataForm = "=" + str(NSets * 12) + 's' + str(NSets) + 's' + str(NSets) + 'i' + 'i'
549
-
550
- V = HarFileIO._unpack_data(fp, dataForm)
551
- if setKnown:
552
- SetNames = [fb(V[0][i:i + 12]) for i in range(0, NSets * 12, 12)]
553
- SetStatus = [fb(V[1][i:i + 1]) for i in range(0, NSets)]
554
-
555
- Nexplicit = V[-1]
556
- dataForm = '=' + str(Nexplicit * 12) + 's'
557
- V = HarFileIO._unpack_data(fp, dataForm)
558
-
559
- if Nexplicit > 0:
560
- dataForm = '=' + str(Nexplicit * 12) + 's'
561
- V = HarFileIO._unpack_data(fp, dataForm)
562
-
563
- ElementList = [fb(V[-1][i:i + 12]) for i in range(0, NSets * 12, 12)]
564
-
565
- HarFileIO._checkRead(fp, nbyte)
566
-
567
- return Coefficient, SetNames, SetStatus, ElementList
568
-
569
- @staticmethod
570
- def _readSets(fp: BinaryIO, file_dims=None) -> (str,_HeaderDims):
571
- """
572
- :param fp: BinaryIO object.
573
- :return tuple: (coefficient_name, header_sets)
574
- """
575
- Coefficient, SetList, SetStatus, ElementList = HarFileIO._readSetElementInfoRecord(fp)
576
-
577
- set_names = [name.strip() for name in SetList]
578
-
579
- idim = 0
580
- header_sets = []
581
- processedSet = OrderedDict()
582
- for name, status in zip(set_names, SetStatus):
583
- if status == 'k':
584
- if name not in processedSet:
585
- processedSet[name] = HarFileIO._readCharVec(fp, itemsize=12, use_unicode=is_unicode, size=tuple([file_dims[idim]]), dtype="<U12")
586
- header_sets.append(_HeaderSet(name=name, status=status, dim_desc=[item.strip() for item in processedSet[name]], dim_size= file_dims[idim]))
587
- elif status == 'u':
588
- header_sets.append(_HeaderSet(name=name, status=status, dim_desc=None, dim_size= file_dims[idim]))
589
- elif status == 'e':
590
- header_sets.append(_HeaderSet(name=name, status=status, dim_desc=ElementList.pop(0), dim_size= file_dims[idim]))
591
- idim += 1
592
-
593
- return Coefficient, _HeaderDims(header_sets)
594
-
595
- @staticmethod
596
- def _unpack_data(fp, form, data=''):
597
- """
598
- :param fp:
599
- :param form:
600
- :param data:
601
- :return:
602
- """
603
- # print("HarFileIO._unpack_data() form, data ", form, data)
604
- if not data:
605
- data = fp.read(struct.calcsize(form))
606
- return struct.unpack(form, data[0:struct.calcsize(form)])
607
-
608
- @staticmethod
609
- def _getEntrySize(fp: BinaryIO) -> Union[int,None]:
610
- """
611
- Reads 4 bytes (corresponds to the size of the entry).
612
- :param fp: Read directly from ``fp``.
613
- """
614
- data = fp.read(4)
615
- if not data:
616
- return None
617
- tmp = struct.unpack("=i", data)[0]
618
- return tmp
619
-
620
- @staticmethod
621
- def _checkRead(fp, nbyte):
622
- if HarFileIO._getEntrySize(fp) != nbyte: # Must be int at end that says how long the entry was as well...
623
- import traceback
624
- traceback.print_stack()
625
- raise IOError('File Corrupted, start int does not match end int.')
626
-
627
- @staticmethod
628
- def writeHeaders(filename: 'Union[str, io.BufferedWriter]', hnames : 'List[str]',
629
- head_arr_objs: 'Union[HeaderArrayObj, List[HeaderArrayObj]]'):
630
-
631
- if isinstance(head_arr_objs, HeaderArrayObj):
632
- head_arr_objs = [head_arr_objs]
633
-
634
- for head_arr_obj in head_arr_objs:
635
- if not isinstance(head_arr_obj, HeaderArrayObj):
636
- raise TypeError("All 'head_arr_objs' must be of HeaderArrayObj type.")
637
- head_arr_obj.is_valid()
638
-
639
- if isinstance(filename, str):
640
- fp = open(filename, "wb")
641
- else:
642
- raise TypeError("'filename' is invalid - must be either file object or string.")
643
-
644
- with fp:
645
- for hname, head_arr_obj in zip(hnames, head_arr_objs):
646
- header_type_str = str(head_arr_obj.array.dtype)
647
- has_sets = head_arr_obj.sets.defined()
648
- # HarFileIO._writeHeader(fp, head_arr_obj)
649
-
650
- if header_type_str in ['float32','float64'] and (head_arr_obj.array.ndim != 2 or has_sets):
651
- HarFileIO._writeHeader7D(fp, hname, head_arr_obj)
652
- elif header_type_str in ['int32','int64', 'float32','float64' ]:
653
- HarFileIO._writeHeader2D(fp, hname, head_arr_obj)
654
- elif '<U' in header_type_str or '|S' in header_type_str:
655
- if head_arr_obj.array.ndim > 1:
656
- print('"' + hname + '" can not be written as character arrays ndim>1 are not yet supported')
657
- return
658
- HarFileIO._writeHeader1C(fp, hname, head_arr_obj)
659
- else:
660
- raise TypeError('Can not write data in Header: "' +
661
- hname + '" as data style '+header_type_str+' does not match any known Header type')
662
- fp.flush()
663
-
664
-
665
-
666
- @staticmethod
667
- def _writeHeader7D(fp: BinaryIO, hname : str, head_arr_obj: HeaderArrayObj):
668
- hasElements = head_arr_obj.sets.defined()
669
- dataFill = float(np.count_nonzero(head_arr_obj.array)) / head_arr_obj.array.size
670
-
671
- if dataFill > 0.4:
672
- head_arr_obj.storage_type = 'FULL'
673
- else:
674
- head_arr_obj.storage_type = 'SPSE'
675
-
676
- shape7D = [head_arr_obj.array.shape[i] if i < head_arr_obj.array.ndim else 1 for i in range(0, 7)]
677
-
678
- HarFileIO._writeHeaderName(fp, hname)
679
- HeaderType = 'RE'
680
-
681
- secRecList = [' ', HeaderType, head_arr_obj.storage_type, head_arr_obj.long_name, 7]
682
- secRecList.extend(shape7D)
683
- HarFileIO._writeSecondRecord(fp, secRecList)
684
- HarFileIO._writeSetElInfo(fp, head_arr_obj)
685
-
686
- if head_arr_obj.storage_type == 'FULL':
687
- HarFileIO._write7DFullArray(fp, np.asfortranarray(head_arr_obj.array), 'f')
688
- else:
689
- HarFileIO._write7DSparseArray(fp, np.asfortranarray(head_arr_obj.array), 'f')
690
-
691
- @staticmethod
692
- def _writeHeader2D(fp: BinaryIO, hname : str, head_arr_obj: HeaderArrayObj):
693
- HarFileIO._writeHeaderName(fp, hname)
694
- typeString = str(head_arr_obj.array.dtype)
695
- shape2D = [head_arr_obj.array.shape[i] if i < head_arr_obj.array.ndim else 1 for i in range(0, 2)]
696
- if typeString == 'int32':
697
- dtype = 'i'
698
- secRecList = [' ', '2I', 'FULL', head_arr_obj.long_name, 2]
699
- elif typeString == 'float32':
700
- secRecList = [' ', '2R', 'FULL', head_arr_obj.long_name, 2]
701
- dtype = 'f'
702
- else:
703
- raise TypeError("Can only write 32bit float or int to 2D arrays")
704
- secRecList.extend(shape2D)
705
-
706
- HarFileIO._writeSecondRecord(fp, secRecList)
707
- HarFileIO._write2DArray(fp, np.asfortranarray(head_arr_obj.array), dtype)
708
-
709
- @staticmethod
710
- def _writeHeader1C(fp: BinaryIO, hname : str, head_arr_obj: HeaderArrayObj):
711
-
712
- HarFileIO._writeHeaderName(fp, hname)
713
- typeString = str(head_arr_obj.array.dtype)
714
- no_chars = int(typeString[2:])
715
- secRecList = [' ', '1C', 'FULL', head_arr_obj.long_name, 2, head_arr_obj.array.size, no_chars]
716
- HarFileIO._writeSecondRecord(fp, secRecList)
717
- HarFileIO._write1CArray(fp, np.asfortranarray(head_arr_obj.array), head_arr_obj.array.size, no_chars)
718
-
719
- @staticmethod
720
- def _writeHeaderName(fp: BinaryIO, name: str):
721
-
722
- if len(name) > 4:
723
- raise ValueError('Header name ' + name + ' is longer than 4 characters long. Header array not written to file.')
724
-
725
- name=name.ljust(4)
726
- dataForm = '=i4si'
727
- packed = struct.pack(dataForm, 4, tb(name), 4)
728
- fp.write(packed)
729
-
730
- @staticmethod
731
- def _writeSecondRecord(fp: BinaryIO, inList):
732
- nint = len(inList) - 4
733
-
734
- if len(inList[3]) != 70:
735
- raise ValueError("'long_name' must be precisely 70 characters long. 'long_name' is: %s (%d characters long)." % (inList[3], len(inList[3])))
736
-
737
- inList = [tb(x) if isinstance(x, str) else x for x in inList]
738
- dataForm = '=i4s2s4s70s' + 'i' * nint + 'i' # For reading it is "=4s2s4s70si"
739
- byteLen = struct.calcsize(dataForm) - 8
740
- inList.append(byteLen)
741
- inList.insert(0, byteLen)
742
- packed = struct.pack(dataForm, *inList)
743
- fp.write(packed)
744
-
745
- @staticmethod
746
- def _write7DFullArray(fp, array, dtype):
747
- StEndList = []
748
- for i, j in HarFileIO._slice_inds(array, 7996):
749
- StEndList.append([i[:], j[:]])
750
- nrec = len(StEndList) * 2 + 1
751
- dataForm = '=i4sii7ii'
752
- nbyte = struct.calcsize(dataForm) - 8
753
- writeList = [nbyte, tb(' '), nrec, 7]
754
- writeList.extend([array.shape[i] if i < array.ndim else 1 for i in range(0, 7)])
755
- writeList.append(nbyte)
756
-
757
- fp.write(struct.pack(dataForm, *writeList))
758
-
759
- array1=array.flatten('F')
760
- nWritten=0
761
- for StEnd in StEndList:
762
- nrec = nrec - 1
763
- st = StEnd[0]
764
- end = StEnd[1]
765
-
766
- PosList = [[st[i] + 1, end[i]][ind] if i < array.ndim else [1, 1][ind] for i in range(0, 7) for ind in
767
- range(0, 2)]
768
- dataForm = '=i4s16i'
769
- nbyte = struct.calcsize(dataForm) - 8
770
- writeList = [nbyte, tb(' '), nrec]
771
- writeList.extend(PosList)
772
- writeList.append(nbyte)
773
- fp.write(struct.pack(dataForm, *writeList))
774
-
775
- nrec = nrec - 1
776
- ndata = 1
777
- for i, j in zip(st, end): ndata *= (j - i)
778
- if dtype == 'f' or dtype == 'i': nbyte = ndata * 4 + 8
779
-
780
- dataForm = '=i4si'
781
- fp.write(struct.pack(dataForm, nbyte, tb(' '), nrec))
782
- dataForm = '=' + str(ndata) + dtype
783
- fp.write(struct.pack(dataForm, *array1[nWritten:nWritten+ndata].flatten('F')))
784
- nWritten+=ndata
785
- dataForm = '=i'
786
- fp.write(struct.pack(dataForm, nbyte))
787
-
788
- @staticmethod
789
- def _write7DSparseArray(fp, array, dtype):
790
- NNonZero = np.count_nonzero(array)
791
- Comment = 80 * ' '
792
- dataForm = '=i4siii80si'
793
- fp.write(struct.pack(dataForm, 96, tb(' '), NNonZero, 4, 4, tb(Comment), 96))
794
- maxData = 3996
795
- nrec = (NNonZero - 1) // maxData + 1
796
- ndata = 0
797
-
798
- if NNonZero == 0:
799
- fp.write(struct.pack('=i4siiii', 16, tb(' '), 1, 0, 0, 16))
800
- return
801
-
802
- indexList=maxData*[None]
803
- valList=maxData*[None]
804
- tmp=array.flatten('F')
805
- nzind=np.nonzero(tmp)
806
- for i in nzind[0]:
807
- ndata += 1
808
- indexList[ndata-1]=i+1
809
- valList[ndata-1]=tmp[i]
810
- if ndata == maxData:
811
- HarFileIO._writeSparseList(fp, NNonZero, dtype, indexList, ndata, nrec, valList)
812
- nrec = nrec - 1
813
- ndata = 0
814
-
815
- if ndata != 0:
816
- indexList=indexList[0:ndata]
817
- valList=valList[0:ndata]
818
- HarFileIO._writeSparseList(fp, NNonZero, dtype, indexList, ndata, nrec, valList)
819
-
820
- @staticmethod
821
- def _write2DArray(fp, array, dtype):
822
- dataForm = "=i4siiiiiii"
823
- maxData = 7991
824
- nrec = (array.size - 1) // maxData + 1
825
- ndata = 0
826
- indexTuple=(None,)
827
- nbyte=0
828
- for st, end in HarFileIO._slice_inds(array, maxData):
829
- if array.ndim == 1:
830
- indexTuple = (slice(st[0], end[0]))
831
- ndata = (end[0] - st[0])
832
- nbyte = ndata * 4 + 32
833
- fp.write(struct.pack(dataForm, nbyte, tb(' '), nrec, array.size, 1, st[0] + 1, end[0], 1, 1))
834
- elif array.ndim == 2:
835
- indexTuple = (slice(st[0], end[0]), slice(st[1], end[1]))
836
- ndata = (end[0] - st[0]) * (end[1] - st[1])
837
- nbyte = ndata * 4 + 32
838
- fp.write(struct.pack(dataForm, nbyte, tb(' '), nrec, array.shape[0],
839
- array.shape[1], st[0] + 1, end[0], st[1] + 1, end[1]))
840
-
841
- dataForm1 = '=' + str(ndata) + dtype
842
- fp.write(struct.pack(dataForm1, *array[indexTuple].flatten('F')))
843
- fp.write(struct.pack('=i', nbyte))
844
- nrec = nrec - 1
845
-
846
- @staticmethod
847
- def _write1CArray(fp : BinaryIO, array : np.array, vecDim : int, strLen : int):
848
- maxwrt = 29996
849
- maxPerLine = maxwrt // strLen
850
- nrec = (vecDim - 1) // maxPerLine + 1
851
- nLeft = vecDim
852
- nDone = 0
853
- if nrec==0:
854
- dataForm = '=i4siiii'
855
- fp.write(struct.pack(dataForm, 16, tb(' '), 1, 0, 0,16))
856
- while nrec > 0:
857
- dataForm = '=i4siii'
858
- nOnRec = min(nLeft, maxPerLine)
859
- ndata = 16 + nOnRec * strLen
860
- fp.write(struct.pack(dataForm, ndata, tb(' '), nrec, vecDim, nOnRec))
861
- dataForm = '=' + str(ndata - 16) + 'si'
862
- packStr = tb(''.join([array[i].ljust(strLen) for i in range(nDone,nDone + nOnRec)]))
863
- fp.write(struct.pack(dataForm, packStr, ndata))
864
- nrec -= 1
865
- nLeft -= nOnRec
866
- nDone += nOnRec
867
-
868
- @staticmethod
869
- def _writeSparseList(fp, NNonZero, dtype, indexList, ndata, nrec, valList):
870
- dataForm = '=i4siii'
871
- nbyte = ndata * 2 * 4 + 16
872
- fp.write(struct.pack(dataForm, nbyte, tb(' '), nrec, NNonZero, ndata))
873
- dataForm = '=' + str(ndata) + 'i'
874
- fp.write(struct.pack(dataForm, *indexList))
875
- dataForm = '=' + str(ndata) + dtype
876
- fp.write(struct.pack(dataForm, *valList))
877
- fp.write(struct.pack('=i', nbyte))
878
-
879
- @staticmethod
880
- def _writeSetElInfo(fp, header_arr_obj: HeaderArrayObj):
881
-
882
- sets = [setDim.name for setDim in header_arr_obj.sets.dims]
883
- indexTypes = [setDim.status for setDim in header_arr_obj.sets.dims]
884
- Elements = [setDim.dim_desc for setDim in header_arr_obj.sets.dims]
885
-
886
- CName = header_arr_obj.coeff_name
887
- tmp = {}
888
- elList = []
889
- if all(item is None for item in sets):
890
- nToWrite = 0
891
- nSets = len(sets)
892
- nElement = 0
893
- setsKnown= 0 # represents a fortran logical
894
- else:
895
- setsKnown=1
896
- statusStr = ''
897
- outputElements = []
898
- for i, j, setEls in zip(sets, indexTypes, Elements):
899
- if j == 'k':
900
- if not i in tmp:
901
- outputElements.append(setEls)
902
- tmp[i] = setEls
903
- statusStr += 'k'
904
- elif j == 'e':
905
- elList.append(setEls[0])
906
- statusStr += 'e'
907
- else:
908
- statusStr += 'u'
909
- nToWrite = len(tmp)
910
- nElement = len(elList)
911
- ElementStr = tb(''.join(elList))
912
- statusStr = tb(statusStr)
913
- SetStr = tb(''.join([item.ljust(12) if not item is None else " "*12 for item in sets]))
914
- nSets = len(sets)
915
-
916
- dataForm = '=i4siii12si'
917
- if setsKnown == 1: dataForm += str(nSets * 13) + 's' + str(nSets) + 'i'
918
- dataForm += 'i'
919
- if nElement > 0: dataForm += str(nElement * 12)
920
- dataForm += 'i'
921
- nbyte = struct.calcsize(dataForm) - 8
922
-
923
- writeList = [nbyte, tb(' '), nToWrite, 1, nSets, tb(CName.ljust(12)), setsKnown]
924
-
925
- if setsKnown == 1:
926
- writeList.append(SetStr + statusStr)
927
- writeList.extend([0]*len(Elements) )
928
-
929
- writeList.append(nElement)
930
- if nElement > 0: writeList.append(ElementStr)
931
- writeList.append(nbyte)
932
-
933
- fp.write(struct.pack(dataForm, *writeList))
934
-
935
- if nToWrite > 0:
936
- for Els in outputElements:
937
- array = np.array(Els)
938
- HarFileIO._write1CArray(fp, array, len(Els), 12)
939
-
940
- @staticmethod
941
- def _slice_inds(a, size):
942
- if a.ndim==0:
943
- yield [0],[1]
944
- return
945
- stride = [i // a.dtype.itemsize for i in a.strides]
946
- offset = [i * j for i, j in zip(stride, a.shape)]
947
- ndim = len(offset)
948
- for inc_dim, off in enumerate(offset):
949
- if off > size: break
950
- nslice = size // stride[inc_dim]
951
- increment = int(size // stride[inc_dim])
952
- tot_iter = int(math.ceil(float(a.shape[inc_dim]) / nslice) * offset[-1] / offset[inc_dim])
953
-
954
- end_index = [0 if i == inc_dim else 1 for i in range(0, ndim)]
955
- end_index[0:inc_dim] = a.shape[0:inc_dim]
956
-
957
- start_index = [0] * ndim
958
-
959
- for i in range(0, tot_iter):
960
- if end_index[inc_dim] == a.shape[inc_dim]:
961
- start_index[inc_dim] = 0
962
- end_index[inc_dim] = increment
963
- for j in range(inc_dim + 1, ndim):
964
- if end_index[j] == a.shape[j]:
965
- start_index[j] = 0
966
- end_index[j] = 1
967
- else:
968
- start_index[j] = end_index[j]
969
- end_index[j] += 1
970
- break
971
- else:
972
- start_index[inc_dim] = end_index[inc_dim]
973
- end_index[inc_dim] = min(a.shape[inc_dim], end_index[inc_dim] + increment)
974
- yield start_index, end_index