simcats-datasets 2.5.0__tar.gz → 2.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. simcats_datasets-2.6.0/PKG-INFO +163 -0
  2. {simcats_datasets-2.5.0 → simcats_datasets-2.6.0}/README.md +1 -1
  3. {simcats_datasets-2.5.0 → simcats_datasets-2.6.0}/pyproject.toml +5 -5
  4. simcats_datasets-2.6.0/simcats_datasets/__init__.py +2 -0
  5. {simcats_datasets-2.5.0 → simcats_datasets-2.6.0}/simcats_datasets/generation/_create_dataset.py +50 -0
  6. {simcats_datasets-2.5.0 → simcats_datasets-2.6.0}/simcats_datasets/generation/_create_simulated_dataset.py +168 -69
  7. {simcats_datasets-2.5.0 → simcats_datasets-2.6.0}/simcats_datasets/loading/_load_dataset.py +24 -0
  8. {simcats_datasets-2.5.0 → simcats_datasets-2.6.0}/simcats_datasets/loading/load_ground_truth.py +213 -2
  9. {simcats_datasets-2.5.0 → simcats_datasets-2.6.0}/simcats_datasets/loading/pytorch.py +9 -2
  10. {simcats_datasets-2.5.0 → simcats_datasets-2.6.0}/simcats_datasets/support_functions/clip_line_to_rectangle.py +15 -4
  11. {simcats_datasets-2.5.0 → simcats_datasets-2.6.0}/simcats_datasets/support_functions/convert_lines.py +34 -0
  12. {simcats_datasets-2.5.0 → simcats_datasets-2.6.0}/simcats_datasets/support_functions/data_preprocessing.py +112 -1
  13. simcats_datasets-2.6.0/simcats_datasets/support_functions/get_coulomb_oscillation_area_boundaries.py +471 -0
  14. simcats_datasets-2.6.0/simcats_datasets/support_functions/metadata_utils.py +62 -0
  15. simcats_datasets-2.6.0/simcats_datasets.egg-info/PKG-INFO +163 -0
  16. {simcats_datasets-2.5.0 → simcats_datasets-2.6.0}/simcats_datasets.egg-info/SOURCES.txt +2 -0
  17. {simcats_datasets-2.5.0 → simcats_datasets-2.6.0}/simcats_datasets.egg-info/requires.txt +1 -1
  18. simcats_datasets-2.5.0/PKG-INFO +0 -837
  19. simcats_datasets-2.5.0/simcats_datasets/__init__.py +0 -2
  20. simcats_datasets-2.5.0/simcats_datasets.egg-info/PKG-INFO +0 -837
  21. {simcats_datasets-2.5.0 → simcats_datasets-2.6.0}/LICENSE +0 -0
  22. {simcats_datasets-2.5.0 → simcats_datasets-2.6.0}/setup.cfg +0 -0
  23. {simcats_datasets-2.5.0 → simcats_datasets-2.6.0}/setup.py +0 -0
  24. {simcats_datasets-2.5.0 → simcats_datasets-2.6.0}/simcats_datasets/generation/__init__.py +0 -0
  25. {simcats_datasets-2.5.0 → simcats_datasets-2.6.0}/simcats_datasets/loading/__init__.py +0 -0
  26. {simcats_datasets-2.5.0 → simcats_datasets-2.6.0}/simcats_datasets/support_functions/__init__.py +0 -0
  27. {simcats_datasets-2.5.0 → simcats_datasets-2.6.0}/simcats_datasets/support_functions/_json_encoders.py +0 -0
  28. {simcats_datasets-2.5.0 → simcats_datasets-2.6.0}/simcats_datasets/support_functions/get_lead_transition_labels.py +0 -0
  29. {simcats_datasets-2.5.0 → simcats_datasets-2.6.0}/simcats_datasets/support_functions/pytorch_format_output.py +0 -0
  30. {simcats_datasets-2.5.0 → simcats_datasets-2.6.0}/simcats_datasets.egg-info/dependency_links.txt +0 -0
  31. {simcats_datasets-2.5.0 → simcats_datasets-2.6.0}/simcats_datasets.egg-info/top_level.txt +0 -0
@@ -0,0 +1,163 @@
1
+ Metadata-Version: 2.4
2
+ Name: simcats-datasets
3
+ Version: 2.6.0
4
+ Summary: SimCATS-Datasets is a Python package that simplifies the creation and loading of SimCATS datasets.
5
+ Author-email: Fabian Hader <f.hader@fz-juelich.de>, Fabian Fuchs <f.fuchs@fz-juelich.de>, Karin Havemann <k.havemann@fz-juelich.de>, Sarah Fleitmann <s.fleitmann@fz-juelich.de>, Jan Vogelbruch <j.vogelbruch@fz-juelich.de>
6
+ License-Expression: GPL-3.0-or-later
7
+ Project-URL: homepage, https://github.com/f-hader/SimCATS-Datasets
8
+ Project-URL: documentation, https://simcats-datasets.readthedocs.io
9
+ Project-URL: source, https://github.com/f-hader/SimCATS-Datasets
10
+ Project-URL: tracker, https://github.com/f-hader/SimCATS-Datasets/issues
11
+ Classifier: Development Status :: 5 - Production/Stable
12
+ Classifier: Intended Audience :: Science/Research
13
+ Classifier: Programming Language :: Python
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.7
16
+ Classifier: Programming Language :: Python :: 3.8
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Topic :: Scientific/Engineering
21
+ Classifier: Typing :: Typed
22
+ Requires-Python: >=3.7
23
+ Description-Content-Type: text/markdown
24
+ License-File: LICENSE
25
+ Requires-Dist: bezier
26
+ Requires-Dist: bm3d
27
+ Requires-Dist: h5py
28
+ Requires-Dist: hdf5storage
29
+ Requires-Dist: numpy
30
+ Requires-Dist: opencv-python
31
+ Requires-Dist: pandas
32
+ Requires-Dist: parallelbar
33
+ Requires-Dist: parse
34
+ Requires-Dist: scikit-image
35
+ Requires-Dist: simcats>=2.0.0
36
+ Requires-Dist: torch
37
+ Requires-Dist: tqdm
38
+ Requires-Dist: xarray
39
+ Dynamic: license-file
40
+
41
+ <h1 align="center">
42
+ <img src="https://raw.githubusercontent.com/f-hader/SimCATS-Datasets/main/SimCATS-Datasets_symbol.svg" alt="SimCATS logo">
43
+ <br>
44
+ </h1>
45
+
46
+ <div align="center">
47
+ <a href="https://github.com/f-hader/SimCATS-Datasets/blob/main/LICENSE">
48
+ <img src="https://img.shields.io/badge/License-GPLv3-blue.svg" alt="License: GPLv3"/>
49
+ </a>
50
+ <a href="https://pypi.org/project/simcats-datasets/">
51
+ <img src="https://img.shields.io/pypi/v/simcats-datasets.svg" alt="PyPi Latest Release"/>
52
+ </a>
53
+ <a href="https://simcats-datasets.readthedocs.io/en/latest/">
54
+ <img src="https://img.shields.io/readthedocs/simcats-datasets" alt="Read the Docs"/>
55
+ </a>
56
+ <a href="https://doi.org/10.1109/TQE.2024.3445967">
57
+ <img src="https://img.shields.io/badge/DOI (SimCATS Paper)-10.1109/TQE.2024.3445967-007ec6.svg" alt="DOI Paper"/>
58
+ </a>
59
+ <a href="https://doi.org/10.5281/zenodo.13862231">
60
+ <img src="https://img.shields.io/badge/DOI (Code)-10.5281/zenodo.13862231-007ec6.svg" alt="DOI Code"/>
61
+ </a>
62
+ </div>
63
+
64
+ # SimCATS-Datasets
65
+
66
+ `SimCATS-Datasets` is a Python package that simplifies the creation and loading of `SimCATS` datasets. Please have a look at
67
+ [this repository](https://github.com/f-hader/SimCATS) regarding `SimCATS` itself.
68
+
69
+ ## Installation
70
+
71
+ The framework supports Python versions 3.7 - 3.11 and installs via pip:
72
+ ```
73
+ pip install simcats-datasets
74
+ ```
75
+
76
+ Alternatively, the `SimCATS-Datasets` package can be installed by cloning the GitHub repository, navigating to the
77
+ folder containing the `setup.py` file, and executing
78
+ ```
79
+ pip install .
80
+ ```
81
+
82
+ For installation in development/editable mode, use the option `-e`.
83
+
84
+ <!-- start sec:documentation -->
85
+ ## Documentation
86
+
87
+ The official documentation is hosted on [ReadtheDocs](https://simcats-datasets.readthedocs.io) but can also be built
88
+ locally. To do this, first install the packages `sphinx`, `sphinx-rtd-theme`, `sphinx-autoapi`, `myst-nb `, and
89
+ `jupytext` with
90
+
91
+ ```
92
+ pip install sphinx sphinx-rtd-theme sphinx-autoapi myst-nb jupytext
93
+ ```
94
+
95
+ and then, in the `docs` folder, execute the following command:
96
+
97
+ ```
98
+ .\make html
99
+ ```
100
+
101
+ To view the generated HTML documentation, open the file `docs\build\html\index.html`.
102
+ <!-- end sec:documentation -->
103
+
104
+
105
+ ## Loading Datasets
106
+
107
+ Datasets created with `SimCATS-Datasets` are stored in HDF5 files. These datasets can be loaded using the function
108
+ `load_dataset` from `simcats_datasets.loading`.
109
+
110
+ The return value of the function is a named tuple. The fields can be accessed by their name or index. As with normal
111
+ tuples, it is also possible to unpack the returned fields directly into separate variables. The available fields
112
+ depend on which data was specified to be loaded. Please look at the docstring for further information.
113
+
114
+ Additionally, `SimCATS-Datasets` offers a pytorch dataset (see `torch.utils.data.Dataset`) implementation called
115
+ `SimcatsDataset`. It allows the direct use of `SimCATS` datasets for machine learning purposes with Torch and can be
116
+ imported from `simcats_datasets.loading.pytorch`.
117
+
118
+ ## Creating Datasets
119
+
120
+ To create a simulated dataset, import `create_simulated_dataset` from `simcats_datasets.generation`. This function
121
+ allows the creation of simulated CSDs with ground truth very easily. It is also possible to add further CSDs to already
122
+ existing datasets. The function will detect the existing dataset automatically. For the function's usage, please have a
123
+ look at its docstring.
124
+
125
+ | :warning: WARNING |
126
+ |:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
127
+ | The functionalities for creating and extending simulated datasets using SimCATS expect that the SimCATS simulation uses the IdealCSDInterface implementation called IdealCSDGeometric. Other implementations might cause problems because the expected information for creating labeled lines etc. might be unavailable. |
128
+
129
+
130
+ Alternatively, to using `create_simulated_dataset` and directly simulating a dataset with `SimCATS`, it is also possible
131
+ to create a `SimCATS-Dataset` compatible dataset with existing data (for example, experimentally measured data or data
132
+ simulated with other frameworks). This can be done using `create_dataset` from `simcats_datasets.generation`.
133
+
134
+ ## Citations
135
+
136
+ ```bibtex
137
+ @article{hader2024simcats,
138
+ author={Hader, Fabian and Fleitmann, Sarah and Vogelbruch, Jan and Geck, Lotte and Waasen, Stefan van},
139
+ journal={IEEE Transactions on Quantum Engineering},
140
+ title={Simulation of Charge Stability Diagrams for Automated Tuning Solutions (SimCATS)},
141
+ year={2024},
142
+ volume={5},
143
+ pages={1-14},
144
+ doi={10.1109/TQE.2024.3445967}
145
+ }
146
+ ```
147
+
148
+ ## License, CLA, and Copyright
149
+
150
+ [![CC BY-NC-SA 4.0][gplv3-shield]][gplv3]
151
+
152
+ This work is licensed under a
153
+ [GNU General Public License 3][gplv3].
154
+
155
+ [![GPLv3][gplv3-image]][gplv3]
156
+
157
+ [gplv3]: https://www.gnu.org/licenses/gpl-3.0.html
158
+ [gplv3-image]: https://www.gnu.org/graphics/gplv3-127x51.png
159
+ [gplv3-shield]: https://img.shields.io/badge/License-GPLv3-blue.svg
160
+
161
+ Contributions must follow the Contributor License Agreement. For more information, see the [CONTRIBUTING.md](https://github.com/f-hader/SimCATS-Datasets/blob/main/CONTRIBUTING.md) file at the top of the GitHub repository.
162
+
163
+ Copyright © 2026 Peter Grünberg Institute - Integrated Computing Architectures (ICA / PGI-4), Forschungszentrum Jülich GmbH
@@ -120,4 +120,4 @@ This work is licensed under a
120
120
 
121
121
  Contributions must follow the Contributor License Agreement. For more information, see the [CONTRIBUTING.md](https://github.com/f-hader/SimCATS-Datasets/blob/main/CONTRIBUTING.md) file at the top of the GitHub repository.
122
122
 
123
- Copyright © 2024 Forschungszentrum Jülich GmbH - Central Institute of Engineering, Electronics and Analytics (ZEA) - Electronic Systems (ZEA-2)
123
+ Copyright © 2026 Peter Grünberg Institute - Integrated Computing Architectures (ICA / PGI-4), Forschungszentrum Jülich GmbH
@@ -1,11 +1,12 @@
1
1
  [build-system]
2
- requires = ["setuptools>=61.0"]
2
+ requires = ["setuptools>=77.0"]
3
3
  build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "simcats-datasets"
7
- version = "2.5.0" # also change in docs/source/conf.py and __init__
8
- license = { file="LICENSE" }
7
+ version = "2.6.0" # also change in docs/source/conf.py and __init__
8
+ license = "GPL-3.0-or-later"
9
+ license-files = ["LICENSE"]
9
10
  authors = [
10
11
  { name="Fabian Hader", email="f.hader@fz-juelich.de" },
11
12
  { name="Fabian Fuchs", email="f.fuchs@fz-juelich.de" },
@@ -19,7 +20,6 @@ description = """\
19
20
  readme = "README.md"
20
21
  requires-python = ">=3.7"
21
22
  classifiers = [
22
- 'License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)',
23
23
  'Development Status :: 5 - Production/Stable',
24
24
  'Intended Audience :: Science/Research',
25
25
  'Programming Language :: Python',
@@ -43,7 +43,7 @@ dependencies = [
43
43
  "parallelbar",
44
44
  "parse",
45
45
  "scikit-image",
46
- "simcats>=1.2.0",
46
+ "simcats>=2.0.0",
47
47
  "torch",
48
48
  "tqdm",
49
49
  "xarray"
@@ -0,0 +1,2 @@
1
+ __all__ = []
2
+ __version__ = "2.6.0"
@@ -21,6 +21,8 @@ def create_dataset(dataset_path: str,
21
21
  occupations: Optional[List[np.ndarray]] = None,
22
22
  tct_masks: Optional[List[np.ndarray]] = None,
23
23
  ct_by_dot_masks: Optional[List[np.ndarray]] = None,
24
+ sensor_regime_masks: Optional[List[np.ndarray]] = None,
25
+ sensor_peak_center_masks: Optional[List[np.ndarray]] = None,
24
26
  line_coordinates: Optional[List[np.ndarray]] = None,
25
27
  line_labels: Optional[List[dict]] = None,
26
28
  metadata: Optional[List[dict]] = None,
@@ -32,6 +34,8 @@ def create_dataset(dataset_path: str,
32
34
  dtype_occ: np.dtype = np.float32,
33
35
  dtype_tct: np.dtype = np.uint8,
34
36
  dtype_ct_by_dot: np.dtype = np.uint8,
37
+ dtype_sensor_regime_masks: np.dtype = np.uint8,
38
+ dtype_sensor_peak_center_masks: np.dtype = np.uint8,
35
39
  dtype_line_coordinates: np.dtype = np.float32) -> None:
36
40
  """Function for creating simcats_datasets v2 format datasets from given data.
37
41
 
@@ -44,6 +48,8 @@ def create_dataset(dataset_path: str,
44
48
  occupations: List of occupations to use for creating the dataset. Defaults to None.
45
49
  tct_masks: List of TCT masks to use for creating the dataset. Defaults to None.
46
50
  ct_by_dot_masks: List of CT by dot masks to use for creating the dataset. Defaults to None.
51
+ sensor_regime_masks: List of sensor regime masks to use for creating the dataset. Defaults to None.
52
+ sensor_peak_center_masks: List of sensor peak center masks to use for creating the dataset. Defaults to None.
47
53
  line_coordinates: List of line coordinates to use for creating the dataset. Defaults to None.
48
54
  line_labels: List of line labels to use for creating the dataset. Defaults to None.
49
55
  metadata: List of metadata to use for creating the dataset. Defaults to None.
@@ -62,6 +68,9 @@ def create_dataset(dataset_path: str,
62
68
  dtype_occ: Specifies the dtype to be used for saving Occupations. Default is np.float32.
63
69
  dtype_tct: Specifies the dtype to be used for saving TCTs. Default is np.uint8.
64
70
  dtype_ct_by_dot: Specifies the dtype to be used for saving CT by dot masks. Default is np.uint8.
71
+ dtype_sensor_regime_masks: Specifies the dtype to be used for saving sensor regime masks. Default is np.uint8.
72
+ dtype_sensor_peak_center_masks: Specifies the dtype to be used for saving sensor peak center masks. Default is
73
+ np.uint8.
65
74
  dtype_line_coordinates: Specifies the dtype to be used for saving line coordinates. Default is np.float32.
66
75
  """
67
76
  # Create path where the dataset will be saved (if folder doesn't exist already)
@@ -170,6 +179,47 @@ def create_dataset(dataset_path: str,
170
179
  # resize datasets to fit new data
171
180
  ds.resize(ds.shape[0] + num_ids, axis=0)
172
181
  ds[id_offset:] = np.array(ct_by_dot_masks).astype(dtype_tct)
182
+ if sensor_regime_masks is not None:
183
+ if len(sensor_regime_masks) != num_ids:
184
+ raise ValueError(
185
+ f"Number of new sensor regime mask arrays ({len(sensor_regime_masks)}) does not match the number "
186
+ f"of new CSDs or sensor scans ({num_ids}).")
187
+ # process sensor regime masks
188
+ # save an example sensor regime mask to get shape and dtype
189
+ temp_sensor_regime_mask = sensor_regime_masks[0].copy()
190
+ # use chunks as this will speed up reading later! One chunk is set to be exactly one image (optimized to
191
+ # load one image at a time during training)
192
+ ds = hdf5_file.require_dataset(name='sensor_regime_masks', shape=(0, *temp_sensor_regime_mask.shape),
193
+ dtype=dtype_sensor_regime_masks,
194
+ maxshape=(None, *temp_sensor_regime_mask.shape))
195
+ if ds.shape[0] != id_offset:
196
+ raise ValueError(
197
+ f"Number of already stored sensor regime mask arrays ({ds.shape[0]}) does not match the number of "
198
+ f"already stored CSDs or sensor scans ({id_offset}).")
199
+ # resize datasets to fit new data
200
+ ds.resize(ds.shape[0] + num_ids, axis=0)
201
+ ds[id_offset:] = np.array(sensor_regime_masks).astype(dtype_sensor_regime_masks)
202
+ if sensor_peak_center_masks is not None:
203
+ if len(sensor_peak_center_masks) != num_ids:
204
+ raise ValueError(
205
+ f"Number of new sensor peak center mask arrays ({len(sensor_peak_center_masks)}) does not match "
206
+ f"the number of new CSDs or sensor scans ({num_ids}).")
207
+ # process sensor peak center masks
208
+ # save an example sensor peak center mask to get shape and dtype
209
+ temp_sensor_peak_center_mask = sensor_peak_center_masks[0].copy()
210
+ # use chunks as this will speed up reading later! One chunk is set to be exactly one image (optimized to
211
+ # load one image at a time during training)
212
+ ds = hdf5_file.require_dataset(name='sensor_peak_center_masks',
213
+ shape=(0, *temp_sensor_peak_center_mask.shape),
214
+ dtype=dtype_sensor_peak_center_masks,
215
+ maxshape=(None, *temp_sensor_peak_center_mask.shape))
216
+ if ds.shape[0] != id_offset:
217
+ raise ValueError(
218
+ f"Number of already stored sensor peak center mask arrays ({ds.shape[0]}) does not match the "
219
+ f"number of already stored CSDs or sensor scans ({id_offset}).")
220
+ # resize datasets to fit new data
221
+ ds.resize(ds.shape[0] + num_ids, axis=0)
222
+ ds[id_offset:] = np.array(sensor_peak_center_masks).astype(dtype_sensor_peak_center_masks)
173
223
  if line_coordinates is not None:
174
224
  if len(line_coordinates) != num_ids:
175
225
  raise ValueError(
@@ -16,6 +16,14 @@ import numpy as np
16
16
 
17
17
  # parallel
18
18
  from parallelbar import progress_imap
19
+ from tqdm import tqdm
20
+
21
+ from simcats_datasets.loading import load_dataset
22
+ from simcats_datasets.loading.load_ground_truth import load_ct_by_dot_masks
23
+ # label creation based on line intersection
24
+ from simcats_datasets.support_functions.get_lead_transition_labels import get_lead_transition_labels
25
+ from simcats_datasets.support_functions.get_coulomb_oscillation_area_boundaries import get_coulomb_oscillation_area_boundaries
26
+ from simcats_datasets.support_functions._json_encoders import NumpyEncoder
19
27
 
20
28
  # for SimCATS simulation
21
29
  from simcats import Simulation, default_configs
@@ -25,27 +33,22 @@ from simcats.support_functions import (
25
33
  NormalSamplingRange,
26
34
  UniformSamplingRange, ExponentialSamplingRange,
27
35
  )
28
- from tqdm import tqdm
29
-
30
- from simcats_datasets.loading import load_dataset
31
- from simcats_datasets.loading.load_ground_truth import load_ct_by_dot_masks
32
- # label creation based on line intersection
33
- from simcats_datasets.support_functions.get_lead_transition_labels import get_lead_transition_labels
34
- from simcats_datasets.support_functions._json_encoders import NumpyEncoder
35
36
 
36
37
  __all__ = []
37
38
 
38
39
 
39
40
  def _simulate(args: Tuple) -> Tuple:
40
- """Method to simulate a csd with the given args. Required for parallel simulation in create_cimulated_dataset.
41
+ """Method to simulate a CSD or sensor scan with the given args. Required for parallel simulation in create_cimulated_dataset.
41
42
 
42
43
  Args:
43
- args: Tuple of sample_range_g1, sample_range_g2, volt_range, simcats_config, resolution.
44
+ args: Tuple of sample_range_g1, sample_range_g2, sample_range_sensor_g1, sample_range_sensor_g2, volt_range,
45
+ simcats_config, resolution, sensor_scan_dataset, reset_sensor_offset_mu_sens_in_csds.
44
46
 
45
47
  Returns:
46
- Tuple of csd, occ, lead_trans, metadata, line_points, labels.
48
+ Tuple of measurement, occupation_mask, lead_transition_mask, metadata, line_points, labels.
47
49
  """
48
- sample_range_g1, sample_range_g2, volt_range, simcats_config, resolution = args
50
+ (sample_range_g1, sample_range_g2, sample_range_sensor_g1, sample_range_sensor_g2, volt_range, simcats_config,
51
+ resolution, sensor_scan_dataset, reset_sensor_offset_mu_sens_in_csds) = args
49
52
 
50
53
  # random number generator used for sampling volt ranges.
51
54
  # !Must be generated here! Else same for every process!
@@ -75,23 +78,53 @@ def _simulate(args: Tuple) -> Tuple:
75
78
  sim = Simulation(**simcats_config)
76
79
 
77
80
  # sample voltage ranges
78
- g1_start = rng.uniform(low=sample_range_g1[0], high=sample_range_g1[1])
79
- g2_start = rng.uniform(low=sample_range_g2[0], high=sample_range_g2[1])
80
- g1_range = np.array([g1_start, g1_start + volt_range[0]])
81
- g2_range = np.array([g2_start, g2_start + volt_range[1]])
81
+ g1_start = rng.uniform(low=sample_range_g1[0], high=sample_range_g1[1]) if sample_range_g1 is not None else None
82
+ g2_start = rng.uniform(low=sample_range_g2[0], high=sample_range_g2[1]) if sample_range_g2 is not None else None
83
+ sensor_g1_start = rng.uniform(low=sample_range_sensor_g1[0], high=sample_range_sensor_g1[1]) if sample_range_sensor_g1 is not None else None
84
+ sensor_g2_start = rng.uniform(low=sample_range_sensor_g2[0], high=sample_range_sensor_g2[1]) if sample_range_sensor_g2 is not None else None
85
+
82
86
  # perform simulation
83
- csd, occ, lead_trans, metadata = sim.measure(
84
- sweep_range_g1=g1_range, sweep_range_g2=g2_range, resolution=resolution
85
- )
86
- # calculate lead_transition labels
87
- ideal_csd_conf = metadata["ideal_csd_config"]
88
- line_points, labels = get_lead_transition_labels(
89
- sweep_range_g1=g1_range,
90
- sweep_range_g2=g2_range,
91
- ideal_csd_config=ideal_csd_conf,
92
- lead_transition_mask=lead_trans,
93
- )
94
- return csd, occ, lead_trans, metadata, line_points, labels
87
+ if not sensor_scan_dataset:
88
+ g1_range = np.array([g1_start, g1_start + volt_range[0]])
89
+ g2_range = np.array([g2_start, g2_start + volt_range[1]])
90
+ if reset_sensor_offset_mu_sens_in_csds:
91
+ # calculate potential to reset offset_mu_sens
92
+ occupations, _ = sim.ideal_csd_config.get_csd_data(volt_limits_g1=g1_range,
93
+ volt_limits_g2=g2_range,
94
+ resolution=2)
95
+ potentials = simcats_config["sensor"].sensor_potential(occupations=occupations,
96
+ volt_limits_g1=g1_range,
97
+ volt_limits_g2=g2_range)
98
+ # the new offset is calculated as follows: offset - (potentials[0] - offset)
99
+ sim.sensor.offset_mu_sens = 2 * simcats_config["sensor"].offset_mu_sens - potentials[0]
100
+ measurement, occ, lead_trans, metadata = sim.measure(
101
+ sweep_range_g1=g1_range,
102
+ sweep_range_g2=g2_range,
103
+ volt_sensor_g1=sensor_g1_start,
104
+ volt_sensor_g2=sensor_g2_start,
105
+ resolution=resolution
106
+ )
107
+ # calculate lead_transition labels
108
+ ideal_csd_conf = metadata["ideal_csd_config"]
109
+ line_points, labels = get_lead_transition_labels(
110
+ sweep_range_g1=g1_range,
111
+ sweep_range_g2=g2_range,
112
+ ideal_csd_config=ideal_csd_conf,
113
+ lead_transition_mask=lead_trans,
114
+ )
115
+ else:
116
+ sensor_g1_range = np.array([sensor_g1_start, sensor_g1_start + volt_range[0]])
117
+ sensor_g2_range = np.array([sensor_g2_start, sensor_g2_start + volt_range[1]])
118
+ measurement, occ, lead_trans, metadata = sim.measure_sensor_scan(
119
+ sweep_range_sensor_g1=sensor_g1_range,
120
+ sweep_range_sensor_g2=sensor_g2_range,
121
+ volt_g1=g1_start,
122
+ volt_g2=g2_start,
123
+ resolution=resolution
124
+ )
125
+ line_points, labels = get_coulomb_oscillation_area_boundaries(metadata)
126
+
127
+ return measurement, occ, lead_trans, metadata, line_points, labels
95
128
 
96
129
 
97
130
  def create_simulated_dataset(
@@ -107,28 +140,36 @@ def create_simulated_dataset(
107
140
  max_len_line_labels_chunk: int = 2000,
108
141
  max_len_metadata_chunk: int = 8000,
109
142
  dtype_csd: np.dtype = np.float32,
143
+ dtype_sensor_scan: np.dtype = np.float32,
110
144
  dtype_occ: np.dtype = np.float32,
111
145
  dtype_tct: np.dtype = np.uint8,
146
+ dtype_sensor_regime_masks: np.dtype = np.uint8,
147
+ dtype_sensor_peak_center_masks: np.dtype = np.uint8,
112
148
  dtype_line_coordinates: np.dtype = np.float32,
149
+ sensor_scan_dataset: bool = False,
150
+ reset_sensor_offset_mu_sens_in_csds: bool = False,
113
151
  ) -> None:
114
152
  """Function for generating simulated datasets using SimCATS for simulations.
115
153
 
116
- **Warning**: This function expects that the simulation config uses IdealCSDGeometric from SimCATS. Other
117
- implementations are not guaranteed to work.
154
+ Datasets can either contain CSDs or sensor scans.
155
+
156
+ **Warning**: This function expects that the simulation config uses IdealCSDGeometric (from SimCATS) for CSD datasets
157
+ and SensorScanSensorGeneric (from SimCATS) for sensor scan datasets. Other implementations are not guaranteed to
158
+ work.
118
159
 
119
160
  Args:
120
161
  dataset_path: The path where the dataset will be stored. Can also be an already existing dataset, to which new
121
162
  data is added.
122
- simcats_config: Configuration for simcats simulation class. Default is the GaAs_v1 config provided by simcats.
123
- n_runs: Number of CSDs to be generated. Default is 10000.
124
- resolution: Pixel resolution for both axis of the CSDs, first number of columns (x), then number of rows (y).
125
- Default is np.array([100, 100]). \n
163
+ simcats_config: Configuration for SimCATS simulation class. Default is the GaAs_v1 config provided by SimCATS.
164
+ n_runs: Number of CSDs or sensor scans to be generated. Default is 10000.
165
+ resolution: Pixel resolution for both axis of the measurements, first number of columns (x), then number of rows
166
+ (y). Default is np.array([100, 100]). \n
126
167
  Example: \n
127
168
  [res_g1, res_g2]
128
- volt_range: Volt range for both axis of the CSDs. Individual CSDs with the specified size are randomly sampled
129
- in the voltage space. Default is np.array([0.03, 0.03]) (usually the scans from RWTH GaAs offler sample are
130
- 30mV x 30mV).
131
- tags: Additional tags for the data to be simulated, which will be added to the dataset DataFrame. Default is
169
+ volt_range: Volt range for both axis of the measurements. Individual measurements with the specified size are
170
+ randomly sampled in the voltage space (defined by the volt_limits in the SimCATS config). Default is
171
+ np.array([0.03, 0.03]) (usually the scans from RWTH GaAs offler sample are 30mV x 30mV).
172
+ tags: Additional tags for the data to be simulated, which will be added to the dataset metadata. Default is
132
173
  None. \n
133
174
  Example: \n
134
175
  {"tags": "shifted sensor, no noise", "sample": "GaAs"}.
@@ -139,9 +180,21 @@ def create_simulated_dataset(
139
180
  max_len_line_labels_chunk: Maximum number of chars for the line label dict. Default is 2000.
140
181
  max_len_metadata_chunk: Maximum number of chars for the metadata dict. Default is 8000.
141
182
  dtype_csd: Specifies the dtype to be used for saving CSDs. Default is np.float32.
183
+ dtype_sensor_scan: Specifies the dtype to be used for saving sensor scans. Default is np.float32.
142
184
  dtype_occ: Specifies the dtype to be used for saving Occupations. Default is np.float32.
143
185
  dtype_tct: Specifies the dtype to be used for saving TCTs. Default is np.uint8.
186
+ dtype_sensor_regime_masks: Specifies the dtype to be used for saving sensor regime masks. Default is np.uint8.
187
+ dtype_sensor_peak_center_masks: Specifies the dtype to be used for saving sensor peak center masks. Default is
188
+ np.uint8.
144
189
  dtype_line_coordinates: Specifies the dtype to be used for saving line coordinates. Default is np.float32.
190
+ sensor_scan_dataset: Determines whether to generate a sensor scan dataset (contains sensor scans instead of
191
+ CSDs). Default is False.
192
+ reset_sensor_offset_mu_sens_in_csds: Specifies whether to reset the sensor offset_mu_sens parameter before CSD
193
+ measurements. If this is activated, the offset of the sensor potential is reset so that the first pixel of
194
+ the CSD is exactly at the previously defined offset_mu_sens. Thus, this effectively resets the sensor to
195
+ start at the position defined by offset_mu_sens before starting to measure. It is intended to simulate that
196
+ the sensor is retuned to the defined position before each CSD. It has no effect for sensor scan datasets.
197
+ Default is False.
145
198
  """
146
199
  # set tags to an empty dict if none were supplied
147
200
  if tags is None:
@@ -150,36 +203,81 @@ def create_simulated_dataset(
150
203
  # Create path where the dataset will be saved (if folder doesn't exist already)
151
204
  Path(Path(dataset_path).parent).mkdir(parents=True, exist_ok=True)
152
205
 
206
+ # retrieve the allowed sampling ranges from the config and copy them (else we would change the config itself)
207
+ sample_range_g1 = simcats_config.get("volt_limits_g1", None)
208
+ sample_range_g1 = sample_range_g1.astype(np.float32) if sample_range_g1 is not None else None
209
+ sample_range_g2 = simcats_config.get("volt_limits_g2", None)
210
+ sample_range_g2 = sample_range_g2.astype(np.float32) if sample_range_g2 is not None else None
211
+ sample_range_sensor_g1 = simcats_config.get("volt_limits_sensor_g1", None)
212
+ sample_range_sensor_g1 = sample_range_sensor_g1.astype(np.float32) if sample_range_sensor_g1 is not None else None
213
+ sample_range_sensor_g2 = simcats_config.get("volt_limits_sensor_g2", None)
214
+ sample_range_sensor_g2 = sample_range_sensor_g2.astype(np.float32) if sample_range_sensor_g2 is not None else None
153
215
  # arange volt limits so that random sampling gives us a starting point that is at least the defined volt_range below
154
216
  # the maximum
155
- sample_range_g1 = simcats_config["volt_limits_g1"].copy()
156
- sample_range_g1[-1] -= volt_range[0]
157
- sample_range_g2 = simcats_config["volt_limits_g2"].copy()
158
- sample_range_g2[-1] -= volt_range[1]
217
+ if not sensor_scan_dataset:
218
+ measurement_type = "csds"
219
+ sample_range_g1[-1] -= volt_range[0]
220
+ sample_range_g2[-1] -= volt_range[1]
221
+ else:
222
+ measurement_type = "sensor_scans"
223
+ sample_range_sensor_g1[-1] -= volt_range[0]
224
+ sample_range_sensor_g2[-1] -= volt_range[1]
159
225
 
160
226
  with h5py.File(dataset_path, "a") as hdf5_file:
161
227
  # load datasets or create them if not already there
162
- csds = hdf5_file.require_dataset(
163
- name="csds",
164
- shape=(0, resolution[1], resolution[0]),
165
- chunks=(1, resolution[1], resolution[0]),
166
- dtype=dtype_csd,
167
- maxshape=(None, resolution[1], resolution[0]),
168
- )
169
- occupations = hdf5_file.require_dataset(
170
- name="occupations",
171
- shape=(0, resolution[1], resolution[0], 2),
172
- chunks=(1, resolution[1], resolution[0], 2),
173
- dtype=dtype_occ,
174
- maxshape=(None, resolution[1], resolution[0], 2),
175
- )
176
- tct_masks = hdf5_file.require_dataset(
177
- name="tct_masks",
178
- shape=(0, resolution[1], resolution[0]),
179
- chunks=(1, resolution[1], resolution[0]),
180
- dtype=dtype_tct,
181
- maxshape=(None, resolution[1], resolution[0]),
182
- )
228
+ if isinstance(resolution, int):
229
+ measurements = hdf5_file.require_dataset(
230
+ name=measurement_type,
231
+ shape=(0, resolution),
232
+ chunks=(1, resolution),
233
+ dtype=dtype_csd if not sensor_scan_dataset else dtype_sensor_scan,
234
+ maxshape=(None, resolution),
235
+ )
236
+ occupations = hdf5_file.require_dataset(
237
+ name="occupations" if not sensor_scan_dataset else "sensor_regime_masks",
238
+ shape=(0, resolution, 2) if not sensor_scan_dataset else (0, resolution),
239
+ chunks=(1, resolution, 2) if not sensor_scan_dataset else (1, resolution),
240
+ dtype=dtype_occ if not sensor_scan_dataset else dtype_sensor_regime_masks,
241
+ maxshape=(None, resolution, 2) if not sensor_scan_dataset else (None, resolution),
242
+ )
243
+ tct_masks = hdf5_file.require_dataset(
244
+ name="tct_masks" if not sensor_scan_dataset else "sensor_peak_center_masks",
245
+ shape=(0, resolution),
246
+ chunks=(1, resolution),
247
+ dtype=dtype_tct if not sensor_scan_dataset else dtype_sensor_peak_center_masks,
248
+ maxshape=(None, resolution),
249
+ )
250
+
251
+ elif len(resolution) == 2:
252
+ measurements = hdf5_file.require_dataset(
253
+ name=measurement_type,
254
+ shape=(0, resolution[1], resolution[0]),
255
+ chunks=(1, resolution[1], resolution[0]),
256
+ dtype=dtype_csd if not sensor_scan_dataset else dtype_sensor_scan,
257
+ maxshape=(None, resolution[1], resolution[0]),
258
+ )
259
+ occupations = hdf5_file.require_dataset(
260
+ name="occupations" if not sensor_scan_dataset else "sensor_regime_masks",
261
+ shape=(0, resolution[1], resolution[0], 2) if not sensor_scan_dataset else (
262
+ 0, resolution[1], resolution[0]),
263
+ chunks=(1, resolution[1], resolution[0], 2) if not sensor_scan_dataset else (
264
+ 1, resolution[1], resolution[0]),
265
+ dtype=dtype_occ if not sensor_scan_dataset else dtype_sensor_regime_masks,
266
+ maxshape=(None, resolution[1], resolution[0], 2) if not sensor_scan_dataset else (
267
+ None, resolution[1], resolution[0]),
268
+ )
269
+ tct_masks = hdf5_file.require_dataset(
270
+ name="tct_masks" if not sensor_scan_dataset else "sensor_peak_center_masks",
271
+ shape=(0, resolution[1], resolution[0]),
272
+ chunks=(1, resolution[1], resolution[0]),
273
+ dtype=dtype_tct if not sensor_scan_dataset else dtype_sensor_peak_center_masks,
274
+ maxshape=(None, resolution[1], resolution[0]),
275
+ )
276
+ else:
277
+ raise ValueError(
278
+ "An invalid resolution was given. The resolution should either be an integer or a one dimensional numpy"
279
+ " array with two elements.")
280
+
183
281
  line_coords = hdf5_file.require_dataset(
184
282
  name="line_coordinates",
185
283
  shape=(0, max_len_line_coordinates_chunk),
@@ -202,10 +300,10 @@ def create_simulated_dataset(
202
300
  maxshape=(None, max_len_metadata_chunk),
203
301
  )
204
302
  # determine index offset if there is already data in the dataset
205
- id_offset = csds.shape[0]
303
+ id_offset = measurements.shape[0]
206
304
 
207
305
  # resize datasets to fit new data
208
- csds.resize(csds.shape[0] + n_runs, axis=0)
306
+ measurements.resize(measurements.shape[0] + n_runs, axis=0)
209
307
  occupations.resize(occupations.shape[0] + n_runs, axis=0)
210
308
  tct_masks.resize(tct_masks.shape[0] + n_runs, axis=0)
211
309
  line_coords.resize(line_coords.shape[0] + n_runs, axis=0)
@@ -215,10 +313,11 @@ def create_simulated_dataset(
215
313
  # simulate and save data
216
314
  indices = range(id_offset, n_runs + id_offset)
217
315
  arguments = itertools.repeat(
218
- (sample_range_g1, sample_range_g2, volt_range, simcats_config, resolution),
316
+ (sample_range_g1, sample_range_g2, sample_range_sensor_g1, sample_range_sensor_g2, volt_range,
317
+ simcats_config, resolution, sensor_scan_dataset, reset_sensor_offset_mu_sens_in_csds),
219
318
  times=len(indices),
220
319
  )
221
- for index, (csd, occ, lead_trans, metadata, line_points, labels) in zip(
320
+ for index, (measurement, occ, lead_trans, metadata, line_points, labels) in zip(
222
321
  indices,
223
322
  progress_imap(
224
323
  func=_simulate,
@@ -230,9 +329,9 @@ def create_simulated_dataset(
230
329
  ),
231
330
  ):
232
331
  # save data
233
- csds[index] = csd.astype(dtype_csd)
234
- occupations[index] = occ.astype(dtype_occ)
235
- tct_masks[index] = lead_trans.astype(dtype_tct)
332
+ measurements[index] = measurement.astype(dtype_csd if not sensor_scan_dataset else dtype_sensor_scan)
333
+ occupations[index] = occ.astype(dtype_occ if not sensor_scan_dataset else dtype_sensor_regime_masks)
334
+ tct_masks[index] = lead_trans.astype(dtype_tct if not sensor_scan_dataset else dtype_sensor_peak_center_masks)
236
335
  line_coords[index] = np.pad(
237
336
  line_points.flatten(),
238
337
  ((0, max_len_line_coordinates_chunk - line_points.size)),