shepherd-data 2023.2.1__py3-none-any.whl → 2023.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- shepherd_data/__init__.py +2 -1
- shepherd_data/calibration.py +17 -9
- shepherd_data/cli.py +24 -12
- shepherd_data/ivonne.py +35 -24
- shepherd_data/mppt.py +12 -7
- shepherd_data/reader.py +118 -77
- shepherd_data/writer.py +111 -95
- {shepherd_data-2023.2.1.dist-info → shepherd_data-2023.3.1.dist-info}/METADATA +40 -30
- shepherd_data-2023.3.1.dist-info/RECORD +14 -0
- {shepherd_data-2023.2.1.dist-info → shepherd_data-2023.3.1.dist-info}/WHEEL +1 -1
- shepherd_data-2023.2.1.dist-info/RECORD +0 -14
- {shepherd_data-2023.2.1.dist-info → shepherd_data-2023.3.1.dist-info}/LICENSE +0 -0
- {shepherd_data-2023.2.1.dist-info → shepherd_data-2023.3.1.dist-info}/entry_points.txt +0 -0
- {shepherd_data-2023.2.1.dist-info → shepherd_data-2023.3.1.dist-info}/top_level.txt +0 -0
- {shepherd_data-2023.2.1.dist-info → shepherd_data-2023.3.1.dist-info}/zip-safe +0 -0
shepherd_data/reader.py
CHANGED
|
@@ -10,7 +10,8 @@ from datetime import datetime
|
|
|
10
10
|
from itertools import product
|
|
11
11
|
from pathlib import Path
|
|
12
12
|
from typing import Dict
|
|
13
|
-
from typing import
|
|
13
|
+
from typing import Generator
|
|
14
|
+
from typing import List
|
|
14
15
|
from typing import Optional
|
|
15
16
|
from typing import Union
|
|
16
17
|
|
|
@@ -36,45 +37,47 @@ class Reader:
|
|
|
36
37
|
"""
|
|
37
38
|
|
|
38
39
|
samples_per_buffer: int = 10_000
|
|
39
|
-
|
|
40
|
-
sample_interval_ns: int = int(10**9 // samplerate_sps)
|
|
41
|
-
sample_interval_s: float = 1 / samplerate_sps
|
|
42
|
-
|
|
43
|
-
max_elements: int = (
|
|
44
|
-
40 * samplerate_sps
|
|
45
|
-
) # per iteration (40s full res, < 200 MB RAM use)
|
|
40
|
+
samplerate_sps_default: int = 100_000
|
|
46
41
|
|
|
47
42
|
mode_dtype_dict = {
|
|
48
43
|
"harvester": ["ivsample", "ivcurve", "isc_voc"],
|
|
49
44
|
"emulator": ["ivsample"],
|
|
50
45
|
}
|
|
51
46
|
|
|
52
|
-
runtime_s: float = None
|
|
53
|
-
file_size: int = None
|
|
54
|
-
data_rate: float = None
|
|
55
|
-
|
|
56
|
-
_logger: logging.Logger = logging.getLogger("SHPData.Reader")
|
|
57
|
-
|
|
58
|
-
h5file: h5py.File = None
|
|
59
|
-
ds_time: h5py.Dataset = None
|
|
60
|
-
ds_voltage: h5py.Dataset = None
|
|
61
|
-
ds_current: h5py.Dataset = None
|
|
62
|
-
_cal: Dict[str, dict] = None
|
|
63
|
-
|
|
64
47
|
def __init__(self, file_path: Optional[Path], verbose: Optional[bool] = True):
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
48
|
+
if not hasattr(self, "_file_path"):
|
|
49
|
+
self._file_path: Optional[Path] = None
|
|
50
|
+
if isinstance(file_path, (Path, str)):
|
|
51
|
+
self._file_path = Path(file_path)
|
|
52
|
+
|
|
53
|
+
if not hasattr(self, "_logger"):
|
|
54
|
+
self._logger: logging.Logger = logging.getLogger("SHPData.Reader")
|
|
68
55
|
if verbose is not None:
|
|
69
56
|
self._logger.setLevel(logging.INFO if verbose else logging.WARNING)
|
|
70
57
|
|
|
71
|
-
|
|
72
|
-
|
|
58
|
+
self.samplerate_sps: int = 100_000
|
|
59
|
+
self.sample_interval_ns: int = int(10**9 // self.samplerate_sps)
|
|
60
|
+
self.sample_interval_s: float = 1 / self.samplerate_sps
|
|
61
|
+
|
|
62
|
+
self.max_elements: int = (
|
|
63
|
+
40 * self.samplerate_sps
|
|
64
|
+
) # per iteration (40s full res, < 200 MB RAM use)
|
|
65
|
+
|
|
66
|
+
# init stats
|
|
67
|
+
self.runtime_s: float = 0
|
|
68
|
+
self.file_size: int = 0
|
|
69
|
+
self.data_rate: float = 0
|
|
70
|
+
|
|
71
|
+
# open file (if not already done by writer)
|
|
72
|
+
if not hasattr(self, "h5file"):
|
|
73
|
+
if not isinstance(self._file_path, Path):
|
|
74
|
+
raise ValueError("Provide a valid Path-Object to Reader!")
|
|
73
75
|
if not self._file_path.exists():
|
|
74
76
|
raise FileNotFoundError(
|
|
75
77
|
errno.ENOENT, os.strerror(errno.ENOENT), self._file_path.name
|
|
76
78
|
)
|
|
77
|
-
|
|
79
|
+
|
|
80
|
+
self.h5file = h5py.File(self._file_path, "r") # = readonly
|
|
78
81
|
|
|
79
82
|
if self.is_valid():
|
|
80
83
|
self._logger.info("File is available now")
|
|
@@ -83,22 +86,29 @@ class Reader:
|
|
|
83
86
|
"File is faulty! Will try to open but there might be dragons"
|
|
84
87
|
)
|
|
85
88
|
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
self.
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
"
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
89
|
+
if not isinstance(self.h5file, h5py.File):
|
|
90
|
+
raise TypeError("Type of opened file is not h5py.File")
|
|
91
|
+
|
|
92
|
+
self.ds_time: h5py.Dataset = self.h5file["data"]["time"]
|
|
93
|
+
self.ds_voltage: h5py.Dataset = self.h5file["data"]["voltage"]
|
|
94
|
+
self.ds_current: h5py.Dataset = self.h5file["data"]["current"]
|
|
95
|
+
|
|
96
|
+
if not hasattr(self, "_cal"):
|
|
97
|
+
self._cal: Dict[str, Dict[str, float]] = {
|
|
98
|
+
"voltage": {
|
|
99
|
+
"gain": self.ds_voltage.attrs["gain"],
|
|
100
|
+
"offset": self.ds_voltage.attrs["offset"],
|
|
101
|
+
},
|
|
102
|
+
"current": {
|
|
103
|
+
"gain": self.ds_current.attrs["gain"],
|
|
104
|
+
"offset": self.ds_current.attrs["offset"],
|
|
105
|
+
},
|
|
106
|
+
}
|
|
107
|
+
|
|
99
108
|
self._refresh_file_stats()
|
|
100
109
|
|
|
101
|
-
if not
|
|
110
|
+
if file_path is not None:
|
|
111
|
+
# file opened by this reader
|
|
102
112
|
self._logger.info(
|
|
103
113
|
"Reading data from '%s'\n"
|
|
104
114
|
"\t- runtime %s s\n"
|
|
@@ -113,10 +123,12 @@ class Reader:
|
|
|
113
123
|
round(self.file_size / 2**20),
|
|
114
124
|
round(self.data_rate / 2**10),
|
|
115
125
|
)
|
|
126
|
+
|
|
127
|
+
def __enter__(self):
|
|
116
128
|
return self
|
|
117
129
|
|
|
118
|
-
def __exit__(self, *exc):
|
|
119
|
-
if
|
|
130
|
+
def __exit__(self, *exc): # type: ignore
|
|
131
|
+
if isinstance(self._file_path, Path):
|
|
120
132
|
self.h5file.close()
|
|
121
133
|
|
|
122
134
|
def __repr__(self):
|
|
@@ -124,7 +136,7 @@ class Reader:
|
|
|
124
136
|
self.get_metadata(minimal=True), default_flow_style=False, sort_keys=False
|
|
125
137
|
)
|
|
126
138
|
|
|
127
|
-
def _refresh_file_stats(self) ->
|
|
139
|
+
def _refresh_file_stats(self) -> None:
|
|
128
140
|
"""update internal states, helpful after resampling or other changes in data-group"""
|
|
129
141
|
self.h5file.flush()
|
|
130
142
|
if self.ds_time.shape[0] > 1:
|
|
@@ -132,12 +144,15 @@ class Reader:
|
|
|
132
144
|
self.samplerate_sps = max(int(10**9 // self.sample_interval_ns), 1)
|
|
133
145
|
self.sample_interval_s = 1.0 / self.samplerate_sps
|
|
134
146
|
self.runtime_s = round(self.ds_time.shape[0] / self.samplerate_sps, 1)
|
|
135
|
-
|
|
147
|
+
if isinstance(self._file_path, Path):
|
|
148
|
+
self.file_size = self._file_path.stat().st_size
|
|
149
|
+
else:
|
|
150
|
+
self.file_size = 0
|
|
136
151
|
self.data_rate = self.file_size / self.runtime_s if self.runtime_s > 0 else 0
|
|
137
152
|
|
|
138
153
|
def read_buffers(
|
|
139
|
-
self, start_n: int = 0, end_n: int = None, is_raw: bool = False
|
|
140
|
-
) -> tuple:
|
|
154
|
+
self, start_n: int = 0, end_n: Optional[int] = None, is_raw: bool = False
|
|
155
|
+
) -> Generator[tuple, None, None]:
|
|
141
156
|
"""Generator that reads the specified range of buffers from the hdf5 file.
|
|
142
157
|
can be configured on first call
|
|
143
158
|
|
|
@@ -183,7 +198,7 @@ class Reader:
|
|
|
183
198
|
:return:
|
|
184
199
|
"""
|
|
185
200
|
if "window_samples" in self.h5file["data"].attrs:
|
|
186
|
-
return self.h5file["data"].attrs["window_samples"]
|
|
201
|
+
return int(self.h5file["data"].attrs["window_samples"])
|
|
187
202
|
return 0
|
|
188
203
|
|
|
189
204
|
def get_mode(self) -> str:
|
|
@@ -215,7 +230,7 @@ class Reader:
|
|
|
215
230
|
"window_samples": self.get_window_samples(),
|
|
216
231
|
}
|
|
217
232
|
|
|
218
|
-
def data_timediffs(self) ->
|
|
233
|
+
def data_timediffs(self) -> List[float]:
|
|
219
234
|
"""calculate list of (unique) time-deltas between buffers [s]
|
|
220
235
|
-> optimized version that only looks at the start of each buffer
|
|
221
236
|
|
|
@@ -348,7 +363,9 @@ class Reader:
|
|
|
348
363
|
self._logger.warning("Hostname was not set (@Validator)")
|
|
349
364
|
return True
|
|
350
365
|
|
|
351
|
-
def get_metadata(
|
|
366
|
+
def get_metadata(
|
|
367
|
+
self, node: Union[h5py.Dataset, h5py.Group, None] = None, minimal: bool = False
|
|
368
|
+
) -> Dict[str, dict]:
|
|
352
369
|
"""recursive FN to capture the structure of the file
|
|
353
370
|
|
|
354
371
|
:param node: starting node, leave free to go through whole file
|
|
@@ -370,14 +387,18 @@ class Reader:
|
|
|
370
387
|
}
|
|
371
388
|
if node.name == "/data/time":
|
|
372
389
|
metadata["_dataset_info"]["time_diffs_s"] = self.data_timediffs()
|
|
390
|
+
# TODO: already convert to str to calm the typechecker?
|
|
391
|
+
# or construct a pydantic-class
|
|
373
392
|
elif "int" in str(node.dtype):
|
|
374
393
|
metadata["_dataset_info"]["statistics"] = self._dset_statistics(node)
|
|
394
|
+
# TODO: put this into metadata["_dataset_statistics"] ??
|
|
375
395
|
for attr in node.attrs.keys():
|
|
376
396
|
attr_value = node.attrs[attr]
|
|
377
397
|
if isinstance(attr_value, str):
|
|
378
398
|
with contextlib.suppress(yaml.YAMLError):
|
|
379
399
|
attr_value = yaml.safe_load(attr_value)
|
|
380
400
|
elif "int" in str(type(attr_value)):
|
|
401
|
+
# TODO: why not isinstance? can it be list[int] other complex type?
|
|
381
402
|
attr_value = int(attr_value)
|
|
382
403
|
else:
|
|
383
404
|
attr_value = float(attr_value)
|
|
@@ -396,22 +417,27 @@ class Reader:
|
|
|
396
417
|
|
|
397
418
|
return metadata
|
|
398
419
|
|
|
399
|
-
def save_metadata(self, node=None) -> dict:
|
|
420
|
+
def save_metadata(self, node: Union[h5py.Dataset, h5py.Group, None] = None) -> dict:
|
|
400
421
|
"""get structure of file and dump content to yaml-file with same name as original
|
|
401
422
|
|
|
402
423
|
:param node: starting node, leave free to go through whole file
|
|
403
424
|
:return: structure of that node with everything inside it
|
|
404
425
|
"""
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
426
|
+
if isinstance(self._file_path, Path):
|
|
427
|
+
yml_path = Path(self._file_path).absolute().with_suffix(".yml")
|
|
428
|
+
if yml_path.exists():
|
|
429
|
+
self._logger.info("%s already exists, will skip", yml_path)
|
|
430
|
+
return {}
|
|
431
|
+
metadata = self.get_metadata(
|
|
432
|
+
node
|
|
433
|
+
) # {"h5root": self.get_metadata(self.h5file)}
|
|
434
|
+
with open(yml_path, "w", encoding="utf-8-sig") as yfd:
|
|
435
|
+
yaml.safe_dump(metadata, yfd, default_flow_style=False, sort_keys=False)
|
|
436
|
+
else:
|
|
437
|
+
metadata = {}
|
|
412
438
|
return metadata
|
|
413
439
|
|
|
414
|
-
def __getitem__(self, key):
|
|
440
|
+
def __getitem__(self, key: str):
|
|
415
441
|
"""returns attribute or (if none found) a handle for a group or dataset (if found)
|
|
416
442
|
|
|
417
443
|
:param key: attribute, group, dataset
|
|
@@ -453,7 +479,9 @@ class Reader:
|
|
|
453
479
|
energy_ws = [_calc_energy(i) for i in job_iter]
|
|
454
480
|
return float(sum(energy_ws))
|
|
455
481
|
|
|
456
|
-
def _dset_statistics(
|
|
482
|
+
def _dset_statistics(
|
|
483
|
+
self, dset: h5py.Dataset, cal: Optional[dict] = None
|
|
484
|
+
) -> Dict[str, float]:
|
|
457
485
|
"""some basic stats for a provided dataset
|
|
458
486
|
:param dset: dataset to evaluate
|
|
459
487
|
:param cal: calibration (if wanted)
|
|
@@ -495,7 +523,8 @@ class Reader:
|
|
|
495
523
|
if len(stats_list) < 1:
|
|
496
524
|
return {}
|
|
497
525
|
stats_df = pd.DataFrame(stats_list)
|
|
498
|
-
stats
|
|
526
|
+
stats: Dict[str, float] = {
|
|
527
|
+
# TODO: wrong calculation for ndim-datasets with n>1
|
|
499
528
|
"mean": float(stats_df.loc[:, "mean"].mean()),
|
|
500
529
|
"min": float(stats_df.loc[:, "min"].min()),
|
|
501
530
|
"max": float(stats_df.loc[:, "max"].max()),
|
|
@@ -514,6 +543,8 @@ class Reader:
|
|
|
514
543
|
if h5_group["time"].shape[0] < 1:
|
|
515
544
|
self._logger.warning("%s is empty, no csv generated", h5_group.name)
|
|
516
545
|
return 0
|
|
546
|
+
if not isinstance(self._file_path, Path):
|
|
547
|
+
return 0
|
|
517
548
|
csv_path = self._file_path.with_suffix(f".{h5_group.name.strip('/')}.csv")
|
|
518
549
|
if csv_path.exists():
|
|
519
550
|
self._logger.warning("%s already exists, will skip", csv_path)
|
|
@@ -555,6 +586,8 @@ class Reader:
|
|
|
555
586
|
if h5_group["time"].shape[0] < 1:
|
|
556
587
|
self._logger.warning("%s is empty, no log generated", h5_group.name)
|
|
557
588
|
return 0
|
|
589
|
+
if not isinstance(self._file_path, Path):
|
|
590
|
+
return 0
|
|
558
591
|
log_path = self._file_path.with_suffix(f".{h5_group.name.strip('/')}.log")
|
|
559
592
|
if log_path.exists():
|
|
560
593
|
self._logger.warning("%s already exists, will skip", log_path)
|
|
@@ -585,7 +618,7 @@ class Reader:
|
|
|
585
618
|
data_src: h5py.Dataset,
|
|
586
619
|
data_dst: Union[None, h5py.Dataset, np.ndarray],
|
|
587
620
|
start_n: int = 0,
|
|
588
|
-
end_n: int = None,
|
|
621
|
+
end_n: Optional[int] = None,
|
|
589
622
|
ds_factor: float = 5,
|
|
590
623
|
is_time: bool = False,
|
|
591
624
|
) -> Union[h5py.Dataset, np.ndarray]:
|
|
@@ -603,12 +636,13 @@ class Reader:
|
|
|
603
636
|
self._logger.warning("Downsampling-Function was not written for IVCurves")
|
|
604
637
|
ds_factor = max(1, math.floor(ds_factor))
|
|
605
638
|
|
|
606
|
-
if end_n
|
|
607
|
-
|
|
639
|
+
if isinstance(end_n, (int, float)):
|
|
640
|
+
_end_n = min(data_src.shape[0], round(end_n))
|
|
608
641
|
else:
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
642
|
+
_end_n = data_src.shape[0]
|
|
643
|
+
|
|
644
|
+
start_n = min(_end_n, round(start_n))
|
|
645
|
+
data_len = _end_n - start_n # TODO: one-off to calculation below ?
|
|
612
646
|
if data_len == 0:
|
|
613
647
|
self._logger.warning("downsampling failed because of data_len = 0")
|
|
614
648
|
iblock_len = min(self.max_elements, data_len)
|
|
@@ -664,7 +698,7 @@ class Reader:
|
|
|
664
698
|
data_src: h5py.Dataset,
|
|
665
699
|
data_dst: Union[None, h5py.Dataset, np.ndarray],
|
|
666
700
|
start_n: int = 0,
|
|
667
|
-
end_n: int = None,
|
|
701
|
+
end_n: Optional[int] = None,
|
|
668
702
|
samplerate_dst: float = 1000,
|
|
669
703
|
is_time: bool = False,
|
|
670
704
|
) -> Union[h5py.Dataset, np.ndarray]:
|
|
@@ -683,12 +717,13 @@ class Reader:
|
|
|
683
717
|
if self.get_datatype() == "ivcurve":
|
|
684
718
|
self._logger.warning("Resampling-Function was not written for IVCurves")
|
|
685
719
|
|
|
686
|
-
if end_n
|
|
687
|
-
|
|
720
|
+
if isinstance(end_n, (int, float)):
|
|
721
|
+
_end_n = min(data_src.shape[0], round(end_n))
|
|
688
722
|
else:
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
723
|
+
_end_n = data_src.shape[0]
|
|
724
|
+
|
|
725
|
+
start_n = min(_end_n, round(start_n))
|
|
726
|
+
data_len = _end_n - start_n
|
|
692
727
|
if data_len == 0:
|
|
693
728
|
self._logger.warning("resampling failed because of data_len = 0")
|
|
694
729
|
fs_ratio = samplerate_dst / self.samplerate_sps
|
|
@@ -762,7 +797,10 @@ class Reader:
|
|
|
762
797
|
return data_dst
|
|
763
798
|
|
|
764
799
|
def generate_plot_data(
|
|
765
|
-
self,
|
|
800
|
+
self,
|
|
801
|
+
start_s: Optional[float] = None,
|
|
802
|
+
end_s: Optional[float] = None,
|
|
803
|
+
relative_ts: bool = True,
|
|
766
804
|
) -> Dict:
|
|
767
805
|
"""provides down-sampled iv-data that can be feed into plot_to_file()
|
|
768
806
|
|
|
@@ -838,11 +876,11 @@ class Reader:
|
|
|
838
876
|
|
|
839
877
|
def plot_to_file(
|
|
840
878
|
self,
|
|
841
|
-
start_s: float = None,
|
|
842
|
-
end_s: float = None,
|
|
879
|
+
start_s: Optional[float] = None,
|
|
880
|
+
end_s: Optional[float] = None,
|
|
843
881
|
width: int = 20,
|
|
844
882
|
height: int = 10,
|
|
845
|
-
) ->
|
|
883
|
+
) -> None:
|
|
846
884
|
"""creates (down-sampled) IV-Plot
|
|
847
885
|
-> omitting start- and end-time will use the whole duration
|
|
848
886
|
|
|
@@ -851,6 +889,9 @@ class Reader:
|
|
|
851
889
|
:param width: plot-width
|
|
852
890
|
:param height: plot-height
|
|
853
891
|
"""
|
|
892
|
+
if not isinstance(self._file_path, Path):
|
|
893
|
+
return
|
|
894
|
+
|
|
854
895
|
data = [self.generate_plot_data(start_s, end_s)]
|
|
855
896
|
|
|
856
897
|
start_str = f"{data[0]['start_s']:.3f}".replace(".", "s")
|
|
@@ -868,7 +909,7 @@ class Reader:
|
|
|
868
909
|
|
|
869
910
|
@staticmethod
|
|
870
911
|
def multiplot_to_file(
|
|
871
|
-
data:
|
|
912
|
+
data: list, plot_path: Path, width: int = 20, height: int = 10
|
|
872
913
|
) -> Optional[Path]:
|
|
873
914
|
"""creates (down-sampled) IV-Multi-Plot
|
|
874
915
|
|