shepherd-data 2022.9.3__py3-none-any.whl → 2023.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
shepherd_data/reader.py CHANGED
@@ -10,7 +10,8 @@ from datetime import datetime
10
10
  from itertools import product
11
11
  from pathlib import Path
12
12
  from typing import Dict
13
- from typing import NoReturn
13
+ from typing import Generator
14
+ from typing import List
14
15
  from typing import Optional
15
16
  from typing import Union
16
17
 
@@ -36,45 +37,47 @@ class Reader:
36
37
  """
37
38
 
38
39
  samples_per_buffer: int = 10_000
39
- samplerate_sps: int = 100_000
40
- sample_interval_ns: int = int(10**9 // samplerate_sps)
41
- sample_interval_s: float = 1 / samplerate_sps
42
-
43
- max_elements: int = (
44
- 40 * samplerate_sps
45
- ) # per iteration (40s full res, < 200 MB RAM use)
40
+ samplerate_sps_default: int = 100_000
46
41
 
47
42
  mode_dtype_dict = {
48
43
  "harvester": ["ivsample", "ivcurve", "isc_voc"],
49
44
  "emulator": ["ivsample"],
50
45
  }
51
46
 
52
- runtime_s: float = None
53
- file_size: int = None
54
- data_rate: float = None
55
-
56
- _logger: logging.Logger = logging.getLogger("SHPData.Reader")
57
-
58
- h5file: h5py.File = None
59
- ds_time: h5py.Dataset = None
60
- ds_voltage: h5py.Dataset = None
61
- ds_current: h5py.Dataset = None
62
- _cal: Dict[str, dict] = None
63
-
64
47
  def __init__(self, file_path: Optional[Path], verbose: Optional[bool] = True):
65
- self._skip_open = file_path is None # for access by writer-class
66
- if not self._skip_open:
67
- self._file_path = Path(file_path)
48
+ if not hasattr(self, "_file_path"):
49
+ self._file_path: Optional[Path] = None
50
+ if isinstance(file_path, (Path, str)):
51
+ self._file_path = Path(file_path)
52
+
53
+ if not hasattr(self, "_logger"):
54
+ self._logger: logging.Logger = logging.getLogger("SHPData.Reader")
68
55
  if verbose is not None:
69
56
  self._logger.setLevel(logging.INFO if verbose else logging.WARNING)
70
57
 
71
- def __enter__(self):
72
- if not self._skip_open:
58
+ self.samplerate_sps: int = 100_000
59
+ self.sample_interval_ns: int = int(10**9 // self.samplerate_sps)
60
+ self.sample_interval_s: float = 1 / self.samplerate_sps
61
+
62
+ self.max_elements: int = (
63
+ 40 * self.samplerate_sps
64
+ ) # per iteration (40s full res, < 200 MB RAM use)
65
+
66
+ # init stats
67
+ self.runtime_s: float = 0
68
+ self.file_size: int = 0
69
+ self.data_rate: float = 0
70
+
71
+ # open file (if not already done by writer)
72
+ if not hasattr(self, "h5file"):
73
+ if not isinstance(self._file_path, Path):
74
+ raise ValueError("Provide a valid Path-Object to Reader!")
73
75
  if not self._file_path.exists():
74
76
  raise FileNotFoundError(
75
77
  errno.ENOENT, os.strerror(errno.ENOENT), self._file_path.name
76
78
  )
77
- self.h5file = h5py.File(self._file_path, "r")
79
+
80
+ self.h5file = h5py.File(self._file_path, "r") # = readonly
78
81
 
79
82
  if self.is_valid():
80
83
  self._logger.info("File is available now")
@@ -83,22 +86,29 @@ class Reader:
83
86
  "File is faulty! Will try to open but there might be dragons"
84
87
  )
85
88
 
86
- self.ds_time = self.h5file["data"]["time"]
87
- self.ds_voltage = self.h5file["data"]["voltage"]
88
- self.ds_current = self.h5file["data"]["current"]
89
- self._cal = {
90
- "voltage": {
91
- "gain": self.ds_voltage.attrs["gain"],
92
- "offset": self.ds_voltage.attrs["offset"],
93
- },
94
- "current": {
95
- "gain": self.ds_current.attrs["gain"],
96
- "offset": self.ds_current.attrs["offset"],
97
- },
98
- }
89
+ if not isinstance(self.h5file, h5py.File):
90
+ raise TypeError("Type of opened file is not h5py.File")
91
+
92
+ self.ds_time: h5py.Dataset = self.h5file["data"]["time"]
93
+ self.ds_voltage: h5py.Dataset = self.h5file["data"]["voltage"]
94
+ self.ds_current: h5py.Dataset = self.h5file["data"]["current"]
95
+
96
+ if not hasattr(self, "_cal"):
97
+ self._cal: Dict[str, Dict[str, float]] = {
98
+ "voltage": {
99
+ "gain": self.ds_voltage.attrs["gain"],
100
+ "offset": self.ds_voltage.attrs["offset"],
101
+ },
102
+ "current": {
103
+ "gain": self.ds_current.attrs["gain"],
104
+ "offset": self.ds_current.attrs["offset"],
105
+ },
106
+ }
107
+
99
108
  self._refresh_file_stats()
100
109
 
101
- if not self._skip_open:
110
+ if file_path is not None:
111
+ # file opened by this reader
102
112
  self._logger.info(
103
113
  "Reading data from '%s'\n"
104
114
  "\t- runtime %s s\n"
@@ -113,10 +123,12 @@ class Reader:
113
123
  round(self.file_size / 2**20),
114
124
  round(self.data_rate / 2**10),
115
125
  )
126
+
127
+ def __enter__(self):
116
128
  return self
117
129
 
118
- def __exit__(self, *exc):
119
- if not self._skip_open:
130
+ def __exit__(self, *exc): # type: ignore
131
+ if isinstance(self._file_path, Path):
120
132
  self.h5file.close()
121
133
 
122
134
  def __repr__(self):
@@ -124,7 +136,7 @@ class Reader:
124
136
  self.get_metadata(minimal=True), default_flow_style=False, sort_keys=False
125
137
  )
126
138
 
127
- def _refresh_file_stats(self) -> NoReturn:
139
+ def _refresh_file_stats(self) -> None:
128
140
  """update internal states, helpful after resampling or other changes in data-group"""
129
141
  self.h5file.flush()
130
142
  if self.ds_time.shape[0] > 1:
@@ -132,12 +144,15 @@ class Reader:
132
144
  self.samplerate_sps = max(int(10**9 // self.sample_interval_ns), 1)
133
145
  self.sample_interval_s = 1.0 / self.samplerate_sps
134
146
  self.runtime_s = round(self.ds_time.shape[0] / self.samplerate_sps, 1)
135
- self.file_size = self._file_path.stat().st_size
147
+ if isinstance(self._file_path, Path):
148
+ self.file_size = self._file_path.stat().st_size
149
+ else:
150
+ self.file_size = 0
136
151
  self.data_rate = self.file_size / self.runtime_s if self.runtime_s > 0 else 0
137
152
 
138
153
  def read_buffers(
139
- self, start_n: int = 0, end_n: int = None, is_raw: bool = False
140
- ) -> tuple:
154
+ self, start_n: int = 0, end_n: Optional[int] = None, is_raw: bool = False
155
+ ) -> Generator[tuple, None, None]:
141
156
  """Generator that reads the specified range of buffers from the hdf5 file.
142
157
  can be configured on first call
143
158
 
@@ -183,7 +198,7 @@ class Reader:
183
198
  :return:
184
199
  """
185
200
  if "window_samples" in self.h5file["data"].attrs:
186
- return self.h5file["data"].attrs["window_samples"]
201
+ return int(self.h5file["data"].attrs["window_samples"])
187
202
  return 0
188
203
 
189
204
  def get_mode(self) -> str:
@@ -215,7 +230,7 @@ class Reader:
215
230
  "window_samples": self.get_window_samples(),
216
231
  }
217
232
 
218
- def data_timediffs(self) -> list:
233
+ def data_timediffs(self) -> List[float]:
219
234
  """calculate list of (unique) time-deltas between buffers [s]
220
235
  -> optimized version that only looks at the start of each buffer
221
236
 
@@ -348,7 +363,9 @@ class Reader:
348
363
  self._logger.warning("Hostname was not set (@Validator)")
349
364
  return True
350
365
 
351
- def get_metadata(self, node=None, minimal: bool = False) -> dict:
366
+ def get_metadata(
367
+ self, node: Union[h5py.Dataset, h5py.Group, None] = None, minimal: bool = False
368
+ ) -> Dict[str, dict]:
352
369
  """recursive FN to capture the structure of the file
353
370
 
354
371
  :param node: starting node, leave free to go through whole file
@@ -370,14 +387,18 @@ class Reader:
370
387
  }
371
388
  if node.name == "/data/time":
372
389
  metadata["_dataset_info"]["time_diffs_s"] = self.data_timediffs()
390
+ # TODO: already convert to str to calm the typechecker?
391
+ # or construct a pydantic-class
373
392
  elif "int" in str(node.dtype):
374
393
  metadata["_dataset_info"]["statistics"] = self._dset_statistics(node)
394
+ # TODO: put this into metadata["_dataset_statistics"] ??
375
395
  for attr in node.attrs.keys():
376
396
  attr_value = node.attrs[attr]
377
397
  if isinstance(attr_value, str):
378
398
  with contextlib.suppress(yaml.YAMLError):
379
399
  attr_value = yaml.safe_load(attr_value)
380
400
  elif "int" in str(type(attr_value)):
401
+ # TODO: why not isinstance? can it be list[int] other complex type?
381
402
  attr_value = int(attr_value)
382
403
  else:
383
404
  attr_value = float(attr_value)
@@ -396,22 +417,27 @@ class Reader:
396
417
 
397
418
  return metadata
398
419
 
399
- def save_metadata(self, node=None) -> dict:
420
+ def save_metadata(self, node: Union[h5py.Dataset, h5py.Group, None] = None) -> dict:
400
421
  """get structure of file and dump content to yaml-file with same name as original
401
422
 
402
423
  :param node: starting node, leave free to go through whole file
403
424
  :return: structure of that node with everything inside it
404
425
  """
405
- yml_path = Path(self._file_path).absolute().with_suffix(".yml")
406
- if yml_path.exists():
407
- self._logger.info("%s already exists, will skip", yml_path)
408
- return {}
409
- metadata = self.get_metadata(node) # {"h5root": self.get_metadata(self.h5file)}
410
- with open(yml_path, "w", encoding="utf-8-sig") as yfd:
411
- yaml.safe_dump(metadata, yfd, default_flow_style=False, sort_keys=False)
426
+ if isinstance(self._file_path, Path):
427
+ yml_path = Path(self._file_path).absolute().with_suffix(".yml")
428
+ if yml_path.exists():
429
+ self._logger.info("%s already exists, will skip", yml_path)
430
+ return {}
431
+ metadata = self.get_metadata(
432
+ node
433
+ ) # {"h5root": self.get_metadata(self.h5file)}
434
+ with open(yml_path, "w", encoding="utf-8-sig") as yfd:
435
+ yaml.safe_dump(metadata, yfd, default_flow_style=False, sort_keys=False)
436
+ else:
437
+ metadata = {}
412
438
  return metadata
413
439
 
414
- def __getitem__(self, key):
440
+ def __getitem__(self, key: str):
415
441
  """returns attribute or (if none found) a handle for a group or dataset (if found)
416
442
 
417
443
  :param key: attribute, group, dataset
@@ -453,7 +479,9 @@ class Reader:
453
479
  energy_ws = [_calc_energy(i) for i in job_iter]
454
480
  return float(sum(energy_ws))
455
481
 
456
- def _dset_statistics(self, dset: h5py.Dataset, cal: dict = None) -> dict:
482
+ def _dset_statistics(
483
+ self, dset: h5py.Dataset, cal: Optional[dict] = None
484
+ ) -> Dict[str, float]:
457
485
  """some basic stats for a provided dataset
458
486
  :param dset: dataset to evaluate
459
487
  :param cal: calibration (if wanted)
@@ -495,7 +523,8 @@ class Reader:
495
523
  if len(stats_list) < 1:
496
524
  return {}
497
525
  stats_df = pd.DataFrame(stats_list)
498
- stats = { # TODO: wrong calculation for ndim-datasets with n>1
526
+ stats: Dict[str, float] = {
527
+ # TODO: wrong calculation for ndim-datasets with n>1
499
528
  "mean": float(stats_df.loc[:, "mean"].mean()),
500
529
  "min": float(stats_df.loc[:, "min"].min()),
501
530
  "max": float(stats_df.loc[:, "max"].max()),
@@ -514,6 +543,8 @@ class Reader:
514
543
  if h5_group["time"].shape[0] < 1:
515
544
  self._logger.warning("%s is empty, no csv generated", h5_group.name)
516
545
  return 0
546
+ if not isinstance(self._file_path, Path):
547
+ return 0
517
548
  csv_path = self._file_path.with_suffix(f".{h5_group.name.strip('/')}.csv")
518
549
  if csv_path.exists():
519
550
  self._logger.warning("%s already exists, will skip", csv_path)
@@ -555,6 +586,8 @@ class Reader:
555
586
  if h5_group["time"].shape[0] < 1:
556
587
  self._logger.warning("%s is empty, no log generated", h5_group.name)
557
588
  return 0
589
+ if not isinstance(self._file_path, Path):
590
+ return 0
558
591
  log_path = self._file_path.with_suffix(f".{h5_group.name.strip('/')}.log")
559
592
  if log_path.exists():
560
593
  self._logger.warning("%s already exists, will skip", log_path)
@@ -585,7 +618,7 @@ class Reader:
585
618
  data_src: h5py.Dataset,
586
619
  data_dst: Union[None, h5py.Dataset, np.ndarray],
587
620
  start_n: int = 0,
588
- end_n: int = None,
621
+ end_n: Optional[int] = None,
589
622
  ds_factor: float = 5,
590
623
  is_time: bool = False,
591
624
  ) -> Union[h5py.Dataset, np.ndarray]:
@@ -603,12 +636,13 @@ class Reader:
603
636
  self._logger.warning("Downsampling-Function was not written for IVCurves")
604
637
  ds_factor = max(1, math.floor(ds_factor))
605
638
 
606
- if end_n is None:
607
- end_n = data_src.shape[0]
639
+ if isinstance(end_n, (int, float)):
640
+ _end_n = min(data_src.shape[0], round(end_n))
608
641
  else:
609
- end_n = min(data_src.shape[0], round(end_n))
610
- start_n = min(end_n, round(start_n))
611
- data_len = end_n - start_n # TODO: one-off to calculation below ?
642
+ _end_n = data_src.shape[0]
643
+
644
+ start_n = min(_end_n, round(start_n))
645
+ data_len = _end_n - start_n # TODO: one-off to calculation below ?
612
646
  if data_len == 0:
613
647
  self._logger.warning("downsampling failed because of data_len = 0")
614
648
  iblock_len = min(self.max_elements, data_len)
@@ -664,7 +698,7 @@ class Reader:
664
698
  data_src: h5py.Dataset,
665
699
  data_dst: Union[None, h5py.Dataset, np.ndarray],
666
700
  start_n: int = 0,
667
- end_n: int = None,
701
+ end_n: Optional[int] = None,
668
702
  samplerate_dst: float = 1000,
669
703
  is_time: bool = False,
670
704
  ) -> Union[h5py.Dataset, np.ndarray]:
@@ -683,12 +717,13 @@ class Reader:
683
717
  if self.get_datatype() == "ivcurve":
684
718
  self._logger.warning("Resampling-Function was not written for IVCurves")
685
719
 
686
- if end_n is None:
687
- end_n = data_src.shape[0]
720
+ if isinstance(end_n, (int, float)):
721
+ _end_n = min(data_src.shape[0], round(end_n))
688
722
  else:
689
- end_n = min(data_src.shape[0], round(end_n))
690
- start_n = min(end_n, round(start_n))
691
- data_len = end_n - start_n
723
+ _end_n = data_src.shape[0]
724
+
725
+ start_n = min(_end_n, round(start_n))
726
+ data_len = _end_n - start_n
692
727
  if data_len == 0:
693
728
  self._logger.warning("resampling failed because of data_len = 0")
694
729
  fs_ratio = samplerate_dst / self.samplerate_sps
@@ -762,7 +797,10 @@ class Reader:
762
797
  return data_dst
763
798
 
764
799
  def generate_plot_data(
765
- self, start_s: float = None, end_s: float = None, relative_ts: bool = True
800
+ self,
801
+ start_s: Optional[float] = None,
802
+ end_s: Optional[float] = None,
803
+ relative_ts: bool = True,
766
804
  ) -> Dict:
767
805
  """provides down-sampled iv-data that can be feed into plot_to_file()
768
806
 
@@ -838,11 +876,11 @@ class Reader:
838
876
 
839
877
  def plot_to_file(
840
878
  self,
841
- start_s: float = None,
842
- end_s: float = None,
879
+ start_s: Optional[float] = None,
880
+ end_s: Optional[float] = None,
843
881
  width: int = 20,
844
882
  height: int = 10,
845
- ) -> NoReturn:
883
+ ) -> None:
846
884
  """creates (down-sampled) IV-Plot
847
885
  -> omitting start- and end-time will use the whole duration
848
886
 
@@ -851,6 +889,9 @@ class Reader:
851
889
  :param width: plot-width
852
890
  :param height: plot-height
853
891
  """
892
+ if not isinstance(self._file_path, Path):
893
+ return
894
+
854
895
  data = [self.generate_plot_data(start_s, end_s)]
855
896
 
856
897
  start_str = f"{data[0]['start_s']:.3f}".replace(".", "s")
@@ -868,7 +909,7 @@ class Reader:
868
909
 
869
910
  @staticmethod
870
911
  def multiplot_to_file(
871
- data: Union[list], plot_path: Path, width: int = 20, height: int = 10
912
+ data: list, plot_path: Path, width: int = 20, height: int = 10
872
913
  ) -> Optional[Path]:
873
914
  """creates (down-sampled) IV-Multi-Plot
874
915