simcats-datasets 2.4.0__tar.gz → 2.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. simcats_datasets-2.6.0/PKG-INFO +163 -0
  2. {simcats_datasets-2.4.0 → simcats_datasets-2.6.0}/README.md +1 -1
  3. {simcats_datasets-2.4.0 → simcats_datasets-2.6.0}/pyproject.toml +5 -5
  4. simcats_datasets-2.6.0/simcats_datasets/__init__.py +2 -0
  5. {simcats_datasets-2.4.0 → simcats_datasets-2.6.0}/simcats_datasets/generation/_create_dataset.py +118 -30
  6. {simcats_datasets-2.4.0 → simcats_datasets-2.6.0}/simcats_datasets/generation/_create_simulated_dataset.py +168 -69
  7. {simcats_datasets-2.4.0 → simcats_datasets-2.6.0}/simcats_datasets/loading/_load_dataset.py +67 -14
  8. {simcats_datasets-2.4.0 → simcats_datasets-2.6.0}/simcats_datasets/loading/load_ground_truth.py +219 -1
  9. {simcats_datasets-2.4.0 → simcats_datasets-2.6.0}/simcats_datasets/loading/pytorch.py +78 -29
  10. {simcats_datasets-2.4.0 → simcats_datasets-2.6.0}/simcats_datasets/support_functions/clip_line_to_rectangle.py +15 -4
  11. {simcats_datasets-2.4.0 → simcats_datasets-2.6.0}/simcats_datasets/support_functions/convert_lines.py +34 -0
  12. {simcats_datasets-2.4.0 → simcats_datasets-2.6.0}/simcats_datasets/support_functions/data_preprocessing.py +112 -1
  13. simcats_datasets-2.6.0/simcats_datasets/support_functions/get_coulomb_oscillation_area_boundaries.py +471 -0
  14. simcats_datasets-2.6.0/simcats_datasets/support_functions/metadata_utils.py +62 -0
  15. simcats_datasets-2.6.0/simcats_datasets/support_functions/pytorch_format_output.py +169 -0
  16. simcats_datasets-2.6.0/simcats_datasets.egg-info/PKG-INFO +163 -0
  17. {simcats_datasets-2.4.0 → simcats_datasets-2.6.0}/simcats_datasets.egg-info/SOURCES.txt +2 -0
  18. {simcats_datasets-2.4.0 → simcats_datasets-2.6.0}/simcats_datasets.egg-info/requires.txt +1 -1
  19. simcats_datasets-2.4.0/PKG-INFO +0 -837
  20. simcats_datasets-2.4.0/simcats_datasets/__init__.py +0 -2
  21. simcats_datasets-2.4.0/simcats_datasets/support_functions/pytorch_format_output.py +0 -170
  22. simcats_datasets-2.4.0/simcats_datasets.egg-info/PKG-INFO +0 -837
  23. {simcats_datasets-2.4.0 → simcats_datasets-2.6.0}/LICENSE +0 -0
  24. {simcats_datasets-2.4.0 → simcats_datasets-2.6.0}/setup.cfg +0 -0
  25. {simcats_datasets-2.4.0 → simcats_datasets-2.6.0}/setup.py +0 -0
  26. {simcats_datasets-2.4.0 → simcats_datasets-2.6.0}/simcats_datasets/generation/__init__.py +0 -0
  27. {simcats_datasets-2.4.0 → simcats_datasets-2.6.0}/simcats_datasets/loading/__init__.py +0 -0
  28. {simcats_datasets-2.4.0 → simcats_datasets-2.6.0}/simcats_datasets/support_functions/__init__.py +0 -0
  29. {simcats_datasets-2.4.0 → simcats_datasets-2.6.0}/simcats_datasets/support_functions/_json_encoders.py +0 -0
  30. {simcats_datasets-2.4.0 → simcats_datasets-2.6.0}/simcats_datasets/support_functions/get_lead_transition_labels.py +0 -0
  31. {simcats_datasets-2.4.0 → simcats_datasets-2.6.0}/simcats_datasets.egg-info/dependency_links.txt +0 -0
  32. {simcats_datasets-2.4.0 → simcats_datasets-2.6.0}/simcats_datasets.egg-info/top_level.txt +0 -0
@@ -0,0 +1,163 @@
1
+ Metadata-Version: 2.4
2
+ Name: simcats-datasets
3
+ Version: 2.6.0
4
+ Summary: SimCATS-Datasets is a Python package that simplifies the creation and loading of SimCATS datasets.
5
+ Author-email: Fabian Hader <f.hader@fz-juelich.de>, Fabian Fuchs <f.fuchs@fz-juelich.de>, Karin Havemann <k.havemann@fz-juelich.de>, Sarah Fleitmann <s.fleitmann@fz-juelich.de>, Jan Vogelbruch <j.vogelbruch@fz-juelich.de>
6
+ License-Expression: GPL-3.0-or-later
7
+ Project-URL: homepage, https://github.com/f-hader/SimCATS-Datasets
8
+ Project-URL: documentation, https://simcats-datasets.readthedocs.io
9
+ Project-URL: source, https://github.com/f-hader/SimCATS-Datasets
10
+ Project-URL: tracker, https://github.com/f-hader/SimCATS-Datasets/issues
11
+ Classifier: Development Status :: 5 - Production/Stable
12
+ Classifier: Intended Audience :: Science/Research
13
+ Classifier: Programming Language :: Python
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.7
16
+ Classifier: Programming Language :: Python :: 3.8
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Topic :: Scientific/Engineering
21
+ Classifier: Typing :: Typed
22
+ Requires-Python: >=3.7
23
+ Description-Content-Type: text/markdown
24
+ License-File: LICENSE
25
+ Requires-Dist: bezier
26
+ Requires-Dist: bm3d
27
+ Requires-Dist: h5py
28
+ Requires-Dist: hdf5storage
29
+ Requires-Dist: numpy
30
+ Requires-Dist: opencv-python
31
+ Requires-Dist: pandas
32
+ Requires-Dist: parallelbar
33
+ Requires-Dist: parse
34
+ Requires-Dist: scikit-image
35
+ Requires-Dist: simcats>=2.0.0
36
+ Requires-Dist: torch
37
+ Requires-Dist: tqdm
38
+ Requires-Dist: xarray
39
+ Dynamic: license-file
40
+
41
+ <h1 align="center">
42
+ <img src="https://raw.githubusercontent.com/f-hader/SimCATS-Datasets/main/SimCATS-Datasets_symbol.svg" alt="SimCATS logo">
43
+ <br>
44
+ </h1>
45
+
46
+ <div align="center">
47
+ <a href="https://github.com/f-hader/SimCATS-Datasets/blob/main/LICENSE">
48
+ <img src="https://img.shields.io/badge/License-GPLv3-blue.svg" alt="License: GPLv3"/>
49
+ </a>
50
+ <a href="https://pypi.org/project/simcats-datasets/">
51
+ <img src="https://img.shields.io/pypi/v/simcats-datasets.svg" alt="PyPi Latest Release"/>
52
+ </a>
53
+ <a href="https://simcats-datasets.readthedocs.io/en/latest/">
54
+ <img src="https://img.shields.io/readthedocs/simcats-datasets" alt="Read the Docs"/>
55
+ </a>
56
+ <a href="https://doi.org/10.1109/TQE.2024.3445967">
57
+ <img src="https://img.shields.io/badge/DOI (SimCATS Paper)-10.1109/TQE.2024.3445967-007ec6.svg" alt="DOI Paper"/>
58
+ </a>
59
+ <a href="https://doi.org/10.5281/zenodo.13862231">
60
+ <img src="https://img.shields.io/badge/DOI (Code)-10.5281/zenodo.13862231-007ec6.svg" alt="DOI Code"/>
61
+ </a>
62
+ </div>
63
+
64
+ # SimCATS-Datasets
65
+
66
+ `SimCATS-Datasets` is a Python package that simplifies the creation and loading of `SimCATS` datasets. Please have a look at
67
+ [this repository](https://github.com/f-hader/SimCATS) regarding `SimCATS` itself.
68
+
69
+ ## Installation
70
+
71
+ The framework supports Python versions 3.7 - 3.11 and installs via pip:
72
+ ```
73
+ pip install simcats-datasets
74
+ ```
75
+
76
+ Alternatively, the `SimCATS-Datasets` package can be installed by cloning the GitHub repository, navigating to the
77
+ folder containing the `setup.py` file, and executing
78
+ ```
79
+ pip install .
80
+ ```
81
+
82
+ For installation in development/editable mode, use the option `-e`.
83
+
84
+ <!-- start sec:documentation -->
85
+ ## Documentation
86
+
87
+ The official documentation is hosted on [ReadtheDocs](https://simcats-datasets.readthedocs.io) but can also be built
88
+ locally. To do this, first install the packages `sphinx`, `sphinx-rtd-theme`, `sphinx-autoapi`, `myst-nb `, and
89
+ `jupytext` with
90
+
91
+ ```
92
+ pip install sphinx sphinx-rtd-theme sphinx-autoapi myst-nb jupytext
93
+ ```
94
+
95
+ and then, in the `docs` folder, execute the following command:
96
+
97
+ ```
98
+ .\make html
99
+ ```
100
+
101
+ To view the generated HTML documentation, open the file `docs\build\html\index.html`.
102
+ <!-- end sec:documentation -->
103
+
104
+
105
+ ## Loading Datasets
106
+
107
+ Datasets created with `SimCATS-Datasets` are stored in HDF5 files. These datasets can be loaded using the function
108
+ `load_dataset` from `simcats_datasets.loading`.
109
+
110
+ The return value of the function is a named tuple. The fields can be accessed by their name or index. As with normal
111
+ tuples, it is also possible to unpack the returned fields directly into separate variables. The available fields
112
+ depend on which data was specified to be loaded. Please look at the docstring for further information.
113
+
114
+ Additionally, `SimCATS-Datasets` offers a pytorch dataset (see `torch.utils.data.Dataset`) implementation called
115
+ `SimcatsDataset`. It allows the direct use of `SimCATS` datasets for machine learning purposes with Torch and can be
116
+ imported from `simcats_datasets.loading.pytorch`.
117
+
118
+ ## Creating Datasets
119
+
120
+ To create a simulated dataset, import `create_simulated_dataset` from `simcats_datasets.generation`. This function
121
+ allows the creation of simulated CSDs with ground truth very easily. It is also possible to add further CSDs to already
122
+ existing datasets. The function will detect the existing dataset automatically. For the function's usage, please have a
123
+ look at its docstring.
124
+
125
+ | :warning: WARNING |
126
+ |:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
127
+ | The functionalities for creating and extending simulated datasets using SimCATS expect that the SimCATS simulation uses the IdealCSDInterface implementation called IdealCSDGeometric. Other implementations might cause problems because the expected information for creating labeled lines etc. might be unavailable. |
128
+
129
+
130
+ Alternatively, to using `create_simulated_dataset` and directly simulating a dataset with `SimCATS`, it is also possible
131
+ to create a `SimCATS-Dataset` compatible dataset with existing data (for example, experimentally measured data or data
132
+ simulated with other frameworks). This can be done using `create_dataset` from `simcats_datasets.generation`.
133
+
134
+ ## Citations
135
+
136
+ ```bibtex
137
+ @article{hader2024simcats,
138
+ author={Hader, Fabian and Fleitmann, Sarah and Vogelbruch, Jan and Geck, Lotte and Waasen, Stefan van},
139
+ journal={IEEE Transactions on Quantum Engineering},
140
+ title={Simulation of Charge Stability Diagrams for Automated Tuning Solutions (SimCATS)},
141
+ year={2024},
142
+ volume={5},
143
+ pages={1-14},
144
+ doi={10.1109/TQE.2024.3445967}
145
+ }
146
+ ```
147
+
148
+ ## License, CLA, and Copyright
149
+
150
+ [![CC BY-NC-SA 4.0][gplv3-shield]][gplv3]
151
+
152
+ This work is licensed under a
153
+ [GNU General Public License 3][gplv3].
154
+
155
+ [![GPLv3][gplv3-image]][gplv3]
156
+
157
+ [gplv3]: https://www.gnu.org/licenses/gpl-3.0.html
158
+ [gplv3-image]: https://www.gnu.org/graphics/gplv3-127x51.png
159
+ [gplv3-shield]: https://img.shields.io/badge/License-GPLv3-blue.svg
160
+
161
+ Contributions must follow the Contributor License Agreement. For more information, see the [CONTRIBUTING.md](https://github.com/f-hader/SimCATS-Datasets/blob/main/CONTRIBUTING.md) file at the top of the GitHub repository.
162
+
163
+ Copyright © 2026 Peter Grünberg Institute - Integrated Computing Architectures (ICA / PGI-4), Forschungszentrum Jülich GmbH
@@ -120,4 +120,4 @@ This work is licensed under a
120
120
 
121
121
  Contributions must follow the Contributor License Agreement. For more information, see the [CONTRIBUTING.md](https://github.com/f-hader/SimCATS-Datasets/blob/main/CONTRIBUTING.md) file at the top of the GitHub repository.
122
122
 
123
- Copyright © 2024 Forschungszentrum Jülich GmbH - Central Institute of Engineering, Electronics and Analytics (ZEA) - Electronic Systems (ZEA-2)
123
+ Copyright © 2026 Peter Grünberg Institute - Integrated Computing Architectures (ICA / PGI-4), Forschungszentrum Jülich GmbH
@@ -1,11 +1,12 @@
1
1
  [build-system]
2
- requires = ["setuptools>=61.0"]
2
+ requires = ["setuptools>=77.0"]
3
3
  build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "simcats-datasets"
7
- version = "2.4.0" # also change in docs/source/conf.py and __init__
8
- license = { file="LICENSE" }
7
+ version = "2.6.0" # also change in docs/source/conf.py and __init__
8
+ license = "GPL-3.0-or-later"
9
+ license-files = ["LICENSE"]
9
10
  authors = [
10
11
  { name="Fabian Hader", email="f.hader@fz-juelich.de" },
11
12
  { name="Fabian Fuchs", email="f.fuchs@fz-juelich.de" },
@@ -19,7 +20,6 @@ description = """\
19
20
  readme = "README.md"
20
21
  requires-python = ">=3.7"
21
22
  classifiers = [
22
- 'License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)',
23
23
  'Development Status :: 5 - Production/Stable',
24
24
  'Intended Audience :: Science/Research',
25
25
  'Programming Language :: Python',
@@ -43,7 +43,7 @@ dependencies = [
43
43
  "parallelbar",
44
44
  "parse",
45
45
  "scikit-image",
46
- "simcats>=1.2.0",
46
+ "simcats>=2.0.0",
47
47
  "torch",
48
48
  "tqdm",
49
49
  "xarray"
@@ -0,0 +1,2 @@
1
+ __all__ = []
2
+ __version__ = "2.6.0"
@@ -16,10 +16,13 @@ __all__ = []
16
16
 
17
17
 
18
18
  def create_dataset(dataset_path: str,
19
- csds: List[np.ndarray],
19
+ csds: Optional[List[np.ndarray]] = None,
20
+ sensor_scans: Optional[List[np.ndarray]] = None,
20
21
  occupations: Optional[List[np.ndarray]] = None,
21
22
  tct_masks: Optional[List[np.ndarray]] = None,
22
23
  ct_by_dot_masks: Optional[List[np.ndarray]] = None,
24
+ sensor_regime_masks: Optional[List[np.ndarray]] = None,
25
+ sensor_peak_center_masks: Optional[List[np.ndarray]] = None,
23
26
  line_coordinates: Optional[List[np.ndarray]] = None,
24
27
  line_labels: Optional[List[dict]] = None,
25
28
  metadata: Optional[List[dict]] = None,
@@ -27,60 +30,103 @@ def create_dataset(dataset_path: str,
27
30
  max_len_line_labels_chunk: Optional[int] = None,
28
31
  max_len_metadata_chunk: Optional[int] = None,
29
32
  dtype_csd: np.dtype = np.float32,
33
+ dtype_sensor_scan: np.dtype = np.float32,
30
34
  dtype_occ: np.dtype = np.float32,
31
35
  dtype_tct: np.dtype = np.uint8,
32
36
  dtype_ct_by_dot: np.dtype = np.uint8,
37
+ dtype_sensor_regime_masks: np.dtype = np.uint8,
38
+ dtype_sensor_peak_center_masks: np.dtype = np.uint8,
33
39
  dtype_line_coordinates: np.dtype = np.float32) -> None:
34
40
  """Function for creating simcats_datasets v2 format datasets from given data.
35
41
 
36
42
  Args:
37
43
  dataset_path: The path where the new (v2) HDF5 dataset will be stored.
38
- csds: The list of CSDs to use for creating the dataset.
44
+ csds: The list of CSDs to use for creating the dataset. A dataset can have either CSDs or sensor scans, but
45
+ never both. Default is None.
46
+ sensor_scans: The list of sensor scans to use for creating the dataset. A dataset can have either CSDs or sensor
47
+ scans, but never both. Default is None.
39
48
  occupations: List of occupations to use for creating the dataset. Defaults to None.
40
49
  tct_masks: List of TCT masks to use for creating the dataset. Defaults to None.
41
50
  ct_by_dot_masks: List of CT by dot masks to use for creating the dataset. Defaults to None.
51
+ sensor_regime_masks: List of sensor regime masks to use for creating the dataset. Defaults to None.
52
+ sensor_peak_center_masks: List of sensor peak center masks to use for creating the dataset. Defaults to None.
42
53
  line_coordinates: List of line coordinates to use for creating the dataset. Defaults to None.
43
54
  line_labels: List of line labels to use for creating the dataset. Defaults to None.
44
55
  metadata: List of metadata to use for creating the dataset. Defaults to None.
45
56
  max_len_line_coordinates_chunk: The expected maximal length for line coordinates in number of float values (each
46
- line requires 4 floats). If None, it is set to the largest value of the CSD shape. Default is None.
57
+ line requires 4 floats). If None, it is set to the largest value of the CSD (or sensor scan) shape. Default
58
+ is None.
47
59
  max_len_line_labels_chunk: The expected maximal length for line labels in number of uint8/char values (each line
48
60
  label, encoded as utf-8 json, should require at most 80 chars). If None, it is set to the largest value of
49
- the CSD shape * 20 (matching with allowed number of line coords). Default is None.
61
+ the CSD (or sensor scan) shape * 20 (matching with allowed number of line coords). Default is None.
50
62
  max_len_metadata_chunk: The expected maximal length for metadata in number of uint8/char values (each metadata
51
63
  dict, encoded as utf-8 json, should require at most 8000 chars, expected rather something like 4000, but
52
64
  could get larger for dot jumps metadata of high resolution scans). If None, it is set to 8000. Default is
53
65
  None.
54
66
  dtype_csd: Specifies the dtype to be used for saving CSDs. Default is np.float32.
67
+ dtype_sensor_scan: Specifies the dtype to be used for saving sensor scans. Default is np.float32.
55
68
  dtype_occ: Specifies the dtype to be used for saving Occupations. Default is np.float32.
56
69
  dtype_tct: Specifies the dtype to be used for saving TCTs. Default is np.uint8.
57
70
  dtype_ct_by_dot: Specifies the dtype to be used for saving CT by dot masks. Default is np.uint8.
71
+ dtype_sensor_regime_masks: Specifies the dtype to be used for saving sensor regime masks. Default is np.uint8.
72
+ dtype_sensor_peak_center_masks: Specifies the dtype to be used for saving sensor peak center masks. Default is
73
+ np.uint8.
58
74
  dtype_line_coordinates: Specifies the dtype to be used for saving line coordinates. Default is np.float32.
59
75
  """
60
76
  # Create path where the dataset will be saved (if folder doesn't exist already)
61
77
  Path(dirname(dataset_path)).mkdir(parents=True, exist_ok=True)
62
78
 
79
+ # check if the dataset to be created is a csd or sensor_scan dataset
80
+ if csds is not None and sensor_scans is None:
81
+ csd_dataset = True
82
+ elif csds is None and sensor_scans is not None:
83
+ csd_dataset = False
84
+ else:
85
+ raise ValueError("A dataset can contain either CSDs or sensor scans but never both! Exactly one of the two has "
86
+ "to be None.")
87
+
63
88
  with h5py.File(dataset_path, "a") as hdf5_file:
64
89
  # get the number of total ids. This is especially required if a large dataset is loaded and saved step by step
65
- num_ids = len(csds)
90
+ if csd_dataset:
91
+ num_ids = len(csds)
92
+ else:
93
+ num_ids = len(sensor_scans)
66
94
 
67
- # process CSDs
68
- # save an example CSD to get shape and dtype
69
- temp_csd = csds[0].copy()
70
- # use chunks as this will speed up reading later! One chunk is set to be exactly one image (optimized to load
71
- # one image at a time during training)
72
- ds = hdf5_file.require_dataset(name='csds', shape=(0, *temp_csd.shape), dtype=dtype_csd,
73
- maxshape=(None, *temp_csd.shape))
95
+ # get a temp copy of a csd or sensor scan (to get the shape) and retrieve the corresponding HDF5 dataset
96
+ if csd_dataset:
97
+ # process CSDs
98
+ # save an example CSD to get shape and dtype
99
+ temp_data = csds[0].copy()
100
+ # use chunks as this will speed up reading later! One chunk is set to be exactly one image (optimized to
101
+ # load one image at a time during training)
102
+ ds = hdf5_file.require_dataset(name='csds',
103
+ shape=(0, *temp_data.shape),
104
+ dtype=dtype_csd,
105
+ maxshape=(None, *temp_data.shape))
106
+ else:
107
+ # process sensor scans
108
+ # save an example sensor scan to get shape and dtype
109
+ temp_data = sensor_scans[0].copy()
110
+ # use chunks as this will speed up reading later! One chunk is set to be exactly one image (optimized to
111
+ # load one image at a time during training)
112
+ ds = hdf5_file.require_dataset(name='sensor_scans',
113
+ shape=(0, *temp_data.shape),
114
+ dtype=dtype_sensor_scan,
115
+ maxshape=(None, *temp_data.shape))
74
116
  # determine index offset if there is already data in the dataset
75
117
  id_offset = ds.shape[0]
76
118
  # resize datasets to fit new data
77
119
  ds.resize(ds.shape[0] + num_ids, axis=0)
78
- ds[id_offset:] = np.array(csds).astype(dtype_csd)
120
+ # Add new CSDs or sensor scans to the dataset
121
+ if csd_dataset:
122
+ ds[id_offset:] = np.array(csds).astype(dtype_csd)
123
+ else:
124
+ ds[id_offset:] = np.array(sensor_scans).astype(dtype_sensor_scan)
79
125
  if occupations is not None:
80
126
  if len(occupations) != num_ids:
81
127
  raise ValueError(
82
- f"Number of new occupation arrays ({len(occupations)}) does not match the number of new CSDs "
83
- f"({num_ids}).")
128
+ f"Number of new occupation arrays ({len(occupations)}) does not match the number of new CSDs or "
129
+ f"sensor scans ({num_ids}).")
84
130
  # process Occupations
85
131
  # save an example occ to get shape
86
132
  temp_occ = occupations[0].copy()
@@ -91,15 +137,15 @@ def create_dataset(dataset_path: str,
91
137
  if ds.shape[0] != id_offset:
92
138
  raise ValueError(
93
139
  f"Number of already stored occupation arrays ({ds.shape[0]}) does not match the number of already "
94
- f"stored CSDs ({id_offset}).")
140
+ f"stored CSDs or sensor scans ({id_offset}).")
95
141
  # resize datasets to fit new data
96
142
  ds.resize(ds.shape[0] + num_ids, axis=0)
97
143
  ds[id_offset:] = np.array(occupations).astype(dtype_occ)
98
144
  if tct_masks is not None:
99
145
  if len(tct_masks) != num_ids:
100
146
  raise ValueError(
101
- f"Number of new TCT mask arrays ({len(tct_masks)}) does not match the number of new CSDs "
102
- f"({num_ids}).")
147
+ f"Number of new TCT mask arrays ({len(tct_masks)}) does not match the number of new CSDs or sensor "
148
+ f"scans ({num_ids}).")
103
149
  # process tct masks
104
150
  # save an example tct to get shape and dtype
105
151
  temp_tct = tct_masks[0].copy()
@@ -110,7 +156,7 @@ def create_dataset(dataset_path: str,
110
156
  if ds.shape[0] != id_offset:
111
157
  raise ValueError(
112
158
  f"Number of already stored TCT mask arrays ({ds.shape[0]}) does not match the number of already "
113
- f"stored CSDs ({id_offset}).")
159
+ f"stored CSDs or sensor scans ({id_offset}).")
114
160
  # resize datasets to fit new data
115
161
  ds.resize(ds.shape[0] + num_ids, axis=0)
116
162
  ds[id_offset:] = np.array(tct_masks).astype(dtype_tct)
@@ -118,7 +164,7 @@ def create_dataset(dataset_path: str,
118
164
  if len(ct_by_dot_masks) != num_ids:
119
165
  raise ValueError(
120
166
  f"Number of new CT by dot mask arrays ({len(ct_by_dot_masks)}) does not match the number of new "
121
- f"CSDs ({num_ids}).")
167
+ f"CSDs or sensor scans ({num_ids}).")
122
168
  # process tct masks
123
169
  # save an example tct to get shape and dtype
124
170
  temp_ct_by_dot = ct_by_dot_masks[0].copy()
@@ -129,19 +175,60 @@ def create_dataset(dataset_path: str,
129
175
  if ds.shape[0] != id_offset:
130
176
  raise ValueError(
131
177
  f"Number of already stored CT by dot mask arrays ({ds.shape[0]}) does not match the number of "
132
- f"already stored CSDs ({id_offset}).")
178
+ f"already stored CSDs or sensor scans ({id_offset}).")
133
179
  # resize datasets to fit new data
134
180
  ds.resize(ds.shape[0] + num_ids, axis=0)
135
181
  ds[id_offset:] = np.array(ct_by_dot_masks).astype(dtype_tct)
182
+ if sensor_regime_masks is not None:
183
+ if len(sensor_regime_masks) != num_ids:
184
+ raise ValueError(
185
+ f"Number of new sensor regime mask arrays ({len(sensor_regime_masks)}) does not match the number "
186
+ f"of new CSDs or sensor scans ({num_ids}).")
187
+ # process sensor regime masks
188
+ # save an example sensor regime mask to get shape and dtype
189
+ temp_sensor_regime_mask = sensor_regime_masks[0].copy()
190
+ # use chunks as this will speed up reading later! One chunk is set to be exactly one image (optimized to
191
+ # load one image at a time during training)
192
+ ds = hdf5_file.require_dataset(name='sensor_regime_masks', shape=(0, *temp_sensor_regime_mask.shape),
193
+ dtype=dtype_sensor_regime_masks,
194
+ maxshape=(None, *temp_sensor_regime_mask.shape))
195
+ if ds.shape[0] != id_offset:
196
+ raise ValueError(
197
+ f"Number of already stored sensor regime mask arrays ({ds.shape[0]}) does not match the number of "
198
+ f"already stored CSDs or sensor scans ({id_offset}).")
199
+ # resize datasets to fit new data
200
+ ds.resize(ds.shape[0] + num_ids, axis=0)
201
+ ds[id_offset:] = np.array(sensor_regime_masks).astype(dtype_sensor_regime_masks)
202
+ if sensor_peak_center_masks is not None:
203
+ if len(sensor_peak_center_masks) != num_ids:
204
+ raise ValueError(
205
+ f"Number of new sensor peak center mask arrays ({len(sensor_peak_center_masks)}) does not match "
206
+ f"the number of new CSDs or sensor scans ({num_ids}).")
207
+ # process sensor peak center masks
208
+ # save an example sensor peak center mask to get shape and dtype
209
+ temp_sensor_peak_center_mask = sensor_peak_center_masks[0].copy()
210
+ # use chunks as this will speed up reading later! One chunk is set to be exactly one image (optimized to
211
+ # load one image at a time during training)
212
+ ds = hdf5_file.require_dataset(name='sensor_peak_center_masks',
213
+ shape=(0, *temp_sensor_peak_center_mask.shape),
214
+ dtype=dtype_sensor_peak_center_masks,
215
+ maxshape=(None, *temp_sensor_peak_center_mask.shape))
216
+ if ds.shape[0] != id_offset:
217
+ raise ValueError(
218
+ f"Number of already stored sensor peak center mask arrays ({ds.shape[0]}) does not match the "
219
+ f"number of already stored CSDs or sensor scans ({id_offset}).")
220
+ # resize datasets to fit new data
221
+ ds.resize(ds.shape[0] + num_ids, axis=0)
222
+ ds[id_offset:] = np.array(sensor_peak_center_masks).astype(dtype_sensor_peak_center_masks)
136
223
  if line_coordinates is not None:
137
224
  if len(line_coordinates) != num_ids:
138
225
  raise ValueError(
139
226
  f"Number of new line coordinates ({len(line_coordinates)}) does not match the number of new "
140
- f"CSDs ({num_ids}).")
227
+ f"CSDs or sensor scans ({num_ids}).")
141
228
  # retrieve fixed length for chunks
142
229
  if max_len_line_coordinates_chunk is None:
143
230
  # calculate max expected length (max_number_of_lines * 4 entries, max number estimated as max(shape)/4)
144
- max_len = max(temp_csd.shape)
231
+ max_len = max(temp_data.shape)
145
232
  else:
146
233
  max_len = max_len_line_coordinates_chunk
147
234
  # use chunks as this will speed up reading later! One chunk is set to be exactly one image (optimized to
@@ -151,7 +238,7 @@ def create_dataset(dataset_path: str,
151
238
  if ds.shape[0] != id_offset:
152
239
  raise ValueError(
153
240
  f"Number of already stored line coordinates ({ds.shape[0]}) does not match the number of already "
154
- f"stored CSDs ({id_offset}).")
241
+ f"stored CSDs or sensor scans ({id_offset}).")
155
242
  # resize datasets to fit new data
156
243
  ds.resize(ds.shape[0] + num_ids, axis=0)
157
244
  # process line coordinates
@@ -163,13 +250,13 @@ def create_dataset(dataset_path: str,
163
250
  if line_labels is not None:
164
251
  if len(line_labels) != num_ids:
165
252
  raise ValueError(
166
- f"Number of new line labels ({len(line_labels)}) does not match the number of new CSDs "
167
- f"({num_ids}).")
253
+ f"Number of new line labels ({len(line_labels)}) does not match the number of new CSDs or sensor "
254
+ f"scans ({num_ids}).")
168
255
  # retrieve fixed length for chunks
169
256
  if max_len_line_labels_chunk is None:
170
257
  # calculate max expected length (max_number_of_lines * 80 uint8 numbers, max number estimated as
171
258
  # max(shape)/4)
172
- max_len = max(temp_csd.shape) * 20
259
+ max_len = max(temp_data.shape) * 20
173
260
  else:
174
261
  max_len = max_len_line_labels_chunk
175
262
  # use chunks as this will speed up reading later! One chunk is set to be exactly one image (optimized to
@@ -179,7 +266,7 @@ def create_dataset(dataset_path: str,
179
266
  if ds.shape[0] != id_offset:
180
267
  raise ValueError(
181
268
  f"Number of already stored line labels ({ds.shape[0]}) does not match the number of already stored "
182
- f"CSDs ({id_offset}).")
269
+ f"CSDs or sensor scans ({id_offset}).")
183
270
  # resize datasets to fit new data
184
271
  ds.resize(ds.shape[0] + num_ids, axis=0)
185
272
  # process line labels
@@ -193,7 +280,8 @@ def create_dataset(dataset_path: str,
193
280
  if metadata is not None:
194
281
  if len(metadata) != num_ids:
195
282
  raise ValueError(
196
- f"Number of new metadata ({len(metadata)}) does not match the number of new CSDs ({num_ids}).")
283
+ f"Number of new metadata ({len(metadata)}) does not match the number of new CSDs or sensor scans "
284
+ f"({num_ids}).")
197
285
  # retrieve fixed length for chunks
198
286
  if max_len_metadata_chunk is None:
199
287
  # set len to 8000 uint8 numbers, that should already include some extra safety (expected smth. like
@@ -208,7 +296,7 @@ def create_dataset(dataset_path: str,
208
296
  if ds.shape[0] != id_offset:
209
297
  raise ValueError(
210
298
  f"Number of already stored metadata ({ds.shape[0]}) does not match the number of already stored "
211
- f"CSDs ({id_offset}).")
299
+ f"CSDs or sensor scans ({id_offset}).")
212
300
  # resize datasets to fit new data
213
301
  ds.resize(ds.shape[0] + num_ids, axis=0)
214
302
  # process metadata