dclab 0.67.0__cp314-cp314t-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dclab might be problematic. Click here for more details.
- dclab/__init__.py +41 -0
- dclab/_version.py +34 -0
- dclab/cached.py +97 -0
- dclab/cli/__init__.py +10 -0
- dclab/cli/common.py +237 -0
- dclab/cli/task_compress.py +126 -0
- dclab/cli/task_condense.py +223 -0
- dclab/cli/task_join.py +229 -0
- dclab/cli/task_repack.py +98 -0
- dclab/cli/task_split.py +154 -0
- dclab/cli/task_tdms2rtdc.py +186 -0
- dclab/cli/task_verify_dataset.py +75 -0
- dclab/definitions/__init__.py +79 -0
- dclab/definitions/feat_const.py +202 -0
- dclab/definitions/feat_logic.py +182 -0
- dclab/definitions/meta_const.py +252 -0
- dclab/definitions/meta_logic.py +111 -0
- dclab/definitions/meta_parse.py +94 -0
- dclab/downsampling.cpython-314t-darwin.so +0 -0
- dclab/downsampling.pyx +230 -0
- dclab/external/__init__.py +4 -0
- dclab/external/packaging/LICENSE +3 -0
- dclab/external/packaging/LICENSE.APACHE +177 -0
- dclab/external/packaging/LICENSE.BSD +23 -0
- dclab/external/packaging/__init__.py +6 -0
- dclab/external/packaging/_structures.py +61 -0
- dclab/external/packaging/version.py +505 -0
- dclab/external/skimage/LICENSE +28 -0
- dclab/external/skimage/__init__.py +2 -0
- dclab/external/skimage/_find_contours.py +216 -0
- dclab/external/skimage/_find_contours_cy.cpython-314t-darwin.so +0 -0
- dclab/external/skimage/_find_contours_cy.pyx +188 -0
- dclab/external/skimage/_pnpoly.cpython-314t-darwin.so +0 -0
- dclab/external/skimage/_pnpoly.pyx +99 -0
- dclab/external/skimage/_shared/__init__.py +1 -0
- dclab/external/skimage/_shared/geometry.cpython-314t-darwin.so +0 -0
- dclab/external/skimage/_shared/geometry.pxd +6 -0
- dclab/external/skimage/_shared/geometry.pyx +55 -0
- dclab/external/skimage/measure.py +7 -0
- dclab/external/skimage/pnpoly.py +53 -0
- dclab/external/statsmodels/LICENSE +35 -0
- dclab/external/statsmodels/__init__.py +6 -0
- dclab/external/statsmodels/nonparametric/__init__.py +1 -0
- dclab/external/statsmodels/nonparametric/_kernel_base.py +203 -0
- dclab/external/statsmodels/nonparametric/kernel_density.py +165 -0
- dclab/external/statsmodels/nonparametric/kernels.py +36 -0
- dclab/features/__init__.py +9 -0
- dclab/features/bright.py +81 -0
- dclab/features/bright_bc.py +93 -0
- dclab/features/bright_perc.py +63 -0
- dclab/features/contour.py +161 -0
- dclab/features/emodulus/__init__.py +339 -0
- dclab/features/emodulus/load.py +252 -0
- dclab/features/emodulus/lut_HE-2D-FEM-22.txt +16432 -0
- dclab/features/emodulus/lut_HE-3D-FEM-22.txt +1276 -0
- dclab/features/emodulus/lut_LE-2D-FEM-19.txt +13082 -0
- dclab/features/emodulus/pxcorr.py +135 -0
- dclab/features/emodulus/scale_linear.py +247 -0
- dclab/features/emodulus/viscosity.py +260 -0
- dclab/features/fl_crosstalk.py +95 -0
- dclab/features/inert_ratio.py +377 -0
- dclab/features/volume.py +242 -0
- dclab/http_utils.py +322 -0
- dclab/isoelastics/__init__.py +468 -0
- dclab/isoelastics/iso_HE-2D-FEM-22-area_um-deform.txt +2440 -0
- dclab/isoelastics/iso_HE-2D-FEM-22-volume-deform.txt +2635 -0
- dclab/isoelastics/iso_HE-3D-FEM-22-area_um-deform.txt +1930 -0
- dclab/isoelastics/iso_HE-3D-FEM-22-volume-deform.txt +2221 -0
- dclab/isoelastics/iso_LE-2D-FEM-19-area_um-deform.txt +2151 -0
- dclab/isoelastics/iso_LE-2D-FEM-19-volume-deform.txt +2250 -0
- dclab/isoelastics/iso_LE-2D-ana-18-area_um-deform.txt +1266 -0
- dclab/kde/__init__.py +1 -0
- dclab/kde/base.py +459 -0
- dclab/kde/contours.py +222 -0
- dclab/kde/methods.py +313 -0
- dclab/kde_contours.py +10 -0
- dclab/kde_methods.py +11 -0
- dclab/lme4/__init__.py +5 -0
- dclab/lme4/lme4_template.R +94 -0
- dclab/lme4/rsetup.py +204 -0
- dclab/lme4/wrapr.py +386 -0
- dclab/polygon_filter.py +398 -0
- dclab/rtdc_dataset/__init__.py +15 -0
- dclab/rtdc_dataset/check.py +902 -0
- dclab/rtdc_dataset/config.py +533 -0
- dclab/rtdc_dataset/copier.py +353 -0
- dclab/rtdc_dataset/core.py +896 -0
- dclab/rtdc_dataset/export.py +867 -0
- dclab/rtdc_dataset/feat_anc_core/__init__.py +24 -0
- dclab/rtdc_dataset/feat_anc_core/af_basic.py +75 -0
- dclab/rtdc_dataset/feat_anc_core/af_emodulus.py +160 -0
- dclab/rtdc_dataset/feat_anc_core/af_fl_max_ctc.py +133 -0
- dclab/rtdc_dataset/feat_anc_core/af_image_contour.py +113 -0
- dclab/rtdc_dataset/feat_anc_core/af_ml_class.py +102 -0
- dclab/rtdc_dataset/feat_anc_core/ancillary_feature.py +320 -0
- dclab/rtdc_dataset/feat_anc_ml/__init__.py +32 -0
- dclab/rtdc_dataset/feat_anc_plugin/__init__.py +3 -0
- dclab/rtdc_dataset/feat_anc_plugin/plugin_feature.py +329 -0
- dclab/rtdc_dataset/feat_basin.py +762 -0
- dclab/rtdc_dataset/feat_temp.py +102 -0
- dclab/rtdc_dataset/filter.py +263 -0
- dclab/rtdc_dataset/fmt_dcor/__init__.py +7 -0
- dclab/rtdc_dataset/fmt_dcor/access_token.py +52 -0
- dclab/rtdc_dataset/fmt_dcor/api.py +173 -0
- dclab/rtdc_dataset/fmt_dcor/base.py +299 -0
- dclab/rtdc_dataset/fmt_dcor/basin.py +73 -0
- dclab/rtdc_dataset/fmt_dcor/logs.py +26 -0
- dclab/rtdc_dataset/fmt_dcor/tables.py +66 -0
- dclab/rtdc_dataset/fmt_dict.py +103 -0
- dclab/rtdc_dataset/fmt_hdf5/__init__.py +6 -0
- dclab/rtdc_dataset/fmt_hdf5/base.py +192 -0
- dclab/rtdc_dataset/fmt_hdf5/basin.py +30 -0
- dclab/rtdc_dataset/fmt_hdf5/events.py +276 -0
- dclab/rtdc_dataset/fmt_hdf5/feat_defect.py +164 -0
- dclab/rtdc_dataset/fmt_hdf5/logs.py +33 -0
- dclab/rtdc_dataset/fmt_hdf5/tables.py +60 -0
- dclab/rtdc_dataset/fmt_hierarchy/__init__.py +11 -0
- dclab/rtdc_dataset/fmt_hierarchy/base.py +278 -0
- dclab/rtdc_dataset/fmt_hierarchy/events.py +146 -0
- dclab/rtdc_dataset/fmt_hierarchy/hfilter.py +140 -0
- dclab/rtdc_dataset/fmt_hierarchy/mapper.py +134 -0
- dclab/rtdc_dataset/fmt_http.py +102 -0
- dclab/rtdc_dataset/fmt_s3.py +354 -0
- dclab/rtdc_dataset/fmt_tdms/__init__.py +476 -0
- dclab/rtdc_dataset/fmt_tdms/event_contour.py +264 -0
- dclab/rtdc_dataset/fmt_tdms/event_image.py +220 -0
- dclab/rtdc_dataset/fmt_tdms/event_mask.py +62 -0
- dclab/rtdc_dataset/fmt_tdms/event_trace.py +146 -0
- dclab/rtdc_dataset/fmt_tdms/exc.py +37 -0
- dclab/rtdc_dataset/fmt_tdms/naming.py +151 -0
- dclab/rtdc_dataset/load.py +77 -0
- dclab/rtdc_dataset/meta_table.py +25 -0
- dclab/rtdc_dataset/writer.py +1019 -0
- dclab/statistics.py +226 -0
- dclab/util.py +176 -0
- dclab/warn.py +15 -0
- dclab-0.67.0.dist-info/METADATA +153 -0
- dclab-0.67.0.dist-info/RECORD +142 -0
- dclab-0.67.0.dist-info/WHEEL +6 -0
- dclab-0.67.0.dist-info/entry_points.txt +8 -0
- dclab-0.67.0.dist-info/licenses/LICENSE +283 -0
- dclab-0.67.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1019 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Mapping
|
|
4
|
+
import copy
|
|
5
|
+
import json
|
|
6
|
+
import os
|
|
7
|
+
import pathlib
|
|
8
|
+
from typing import Dict, List, Literal, Tuple
|
|
9
|
+
import warnings
|
|
10
|
+
|
|
11
|
+
import h5py
|
|
12
|
+
import hdf5plugin
|
|
13
|
+
import numpy as np
|
|
14
|
+
|
|
15
|
+
from .. import definitions as dfn
|
|
16
|
+
from ..util import hashobj
|
|
17
|
+
from .._version import version
|
|
18
|
+
|
|
19
|
+
from .feat_anc_plugin import PlugInFeature
|
|
20
|
+
from .meta_table import MetaTable
|
|
21
|
+
|
|
22
|
+
#: DEPRECATED (use `CHUNK_SIZE_BYTES` instead)
|
|
23
|
+
CHUNK_SIZE = 100
|
|
24
|
+
|
|
25
|
+
#: Chunks size in bytes for storing HDF5 datasets
|
|
26
|
+
CHUNK_SIZE_BYTES = 1024**2 # 1MiB
|
|
27
|
+
|
|
28
|
+
#: features that should be written to the output file as uint32 values
|
|
29
|
+
FEATURES_UINT32 = [
|
|
30
|
+
"fl1_max",
|
|
31
|
+
"fl1_npeaks",
|
|
32
|
+
"fl2_max",
|
|
33
|
+
"fl2_npeaks",
|
|
34
|
+
"fl3_max",
|
|
35
|
+
"fl3_npeaks",
|
|
36
|
+
"index",
|
|
37
|
+
"ml_class",
|
|
38
|
+
"nevents",
|
|
39
|
+
]
|
|
40
|
+
|
|
41
|
+
#: features that should be written to the output file as uint64 values
|
|
42
|
+
FEATURES_UINT64 = [
|
|
43
|
+
"frame",
|
|
44
|
+
]
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class RTDCWriter:
|
|
48
|
+
def __init__(self,
|
|
49
|
+
path_or_h5file: str | pathlib.Path | h5py.File,
|
|
50
|
+
mode: Literal['append', 'replace', 'reset'] = "append",
|
|
51
|
+
compression_kwargs: Dict | Mapping = None,
|
|
52
|
+
compression: str = "deprecated"):
|
|
53
|
+
"""RT-DC data writer classe
|
|
54
|
+
|
|
55
|
+
Parameters
|
|
56
|
+
----------
|
|
57
|
+
path_or_h5file: str or pathlib.Path or h5py.Group
|
|
58
|
+
Path to an HDF5 file or an HDF5 file opened in write mode
|
|
59
|
+
mode: str
|
|
60
|
+
Defines how the data are stored:
|
|
61
|
+
|
|
62
|
+
- "append": append new feature data to existing h5py Datasets
|
|
63
|
+
- "replace": replace existing h5py Datasets with new features
|
|
64
|
+
(used for ancillary feature storage)
|
|
65
|
+
- "reset": do not keep any previous data
|
|
66
|
+
compression_kwargs: dict-like
|
|
67
|
+
Dictionary with the keys "compression" and "compression_opts"
|
|
68
|
+
which are passed to :func:`h5py.H5File.create_dataset`. The
|
|
69
|
+
default is Zstandard compression with the compression
|
|
70
|
+
level 5 `hdf5plugin.Zstd(clevel=5)`. To disable compression, use
|
|
71
|
+
`{"compression": None}`.
|
|
72
|
+
compression: str or None
|
|
73
|
+
Compression method used for data storage;
|
|
74
|
+
one of [None, "lzf", "gzip", "szip"].
|
|
75
|
+
|
|
76
|
+
.. deprecated:: 0.43.0
|
|
77
|
+
Use `compression_kwargs` instead.
|
|
78
|
+
"""
|
|
79
|
+
if mode not in ["append", "replace", "reset"]:
|
|
80
|
+
raise ValueError(f"Invalid mode '{mode}'!")
|
|
81
|
+
if compression != "deprecated":
|
|
82
|
+
warnings.warn("The `compression` kwarg is deprecated in favor of "
|
|
83
|
+
"`compression_kwargs`!",
|
|
84
|
+
DeprecationWarning)
|
|
85
|
+
if compression_kwargs is not None:
|
|
86
|
+
raise ValueError("You may not specify `compression` and "
|
|
87
|
+
"`compression_kwargs` at the same time!")
|
|
88
|
+
# be backwards-compatible
|
|
89
|
+
compression_kwargs = {"compression": compression}
|
|
90
|
+
if compression_kwargs is None:
|
|
91
|
+
compression_kwargs = hdf5plugin.Zstd(clevel=5)
|
|
92
|
+
|
|
93
|
+
self.mode = mode
|
|
94
|
+
self.compression_kwargs = compression_kwargs
|
|
95
|
+
if isinstance(path_or_h5file, h5py.Group):
|
|
96
|
+
self.owns_path = False
|
|
97
|
+
self.path = pathlib.Path(path_or_h5file.file.filename)
|
|
98
|
+
self.h5file = path_or_h5file
|
|
99
|
+
if mode == "reset":
|
|
100
|
+
raise ValueError("'reset' mode incompatible with h5py.Group!")
|
|
101
|
+
else:
|
|
102
|
+
self.owns_path = True
|
|
103
|
+
self.path = pathlib.Path(path_or_h5file)
|
|
104
|
+
self.h5file = h5py.File(path_or_h5file,
|
|
105
|
+
mode=("w" if mode == "reset" else "a"))
|
|
106
|
+
#: unfortunate necessity, as `len(h5py.Group)` can be really slow
|
|
107
|
+
self._group_sizes = {}
|
|
108
|
+
|
|
109
|
+
def __enter__(self):
|
|
110
|
+
return self
|
|
111
|
+
|
|
112
|
+
def __exit__(self, type, value, tb):
|
|
113
|
+
# close the HDF5 file
|
|
114
|
+
try:
|
|
115
|
+
self.h5file.require_group("events")
|
|
116
|
+
if len(self.h5file["events"]):
|
|
117
|
+
self.rectify_metadata()
|
|
118
|
+
self.version_brand()
|
|
119
|
+
except BaseException:
|
|
120
|
+
raise
|
|
121
|
+
finally:
|
|
122
|
+
# This is guaranteed to run if any exception is raised.
|
|
123
|
+
self.close()
|
|
124
|
+
|
|
125
|
+
@staticmethod
|
|
126
|
+
def get_best_nd_chunks(item_shape, item_dtype=np.float64):
|
|
127
|
+
"""Return best chunks for HDF5 datasets
|
|
128
|
+
|
|
129
|
+
Chunking has performance implications. It’s recommended to keep the
|
|
130
|
+
total size of dataset chunks between 10 KiB and 1 MiB. This number
|
|
131
|
+
defines the maximum chunk size as well as half the maximum cache
|
|
132
|
+
size for each dataset.
|
|
133
|
+
"""
|
|
134
|
+
# Note that `np.prod(()) == 1`
|
|
135
|
+
event_size = np.prod(item_shape) * np.dtype(item_dtype).itemsize
|
|
136
|
+
|
|
137
|
+
chunk_size = CHUNK_SIZE_BYTES / event_size
|
|
138
|
+
# Set minimum chunk size to 10 so that we can have at least some
|
|
139
|
+
# compression performance.
|
|
140
|
+
chunk_size_int = max(10, int(np.floor(chunk_size)))
|
|
141
|
+
return tuple([chunk_size_int] + list(item_shape))
|
|
142
|
+
|
|
143
|
+
def close(self):
|
|
144
|
+
"""Close the underlying HDF5 file if a path was given during init"""
|
|
145
|
+
if self.owns_path:
|
|
146
|
+
self.h5file.close()
|
|
147
|
+
|
|
148
|
+
def rectify_metadata(self):
|
|
149
|
+
"""Autocomplete the metadta of the RTDC-measurement
|
|
150
|
+
|
|
151
|
+
The following configuration keys are updated:
|
|
152
|
+
|
|
153
|
+
- experiment:event count
|
|
154
|
+
- fluorescence:samples per event
|
|
155
|
+
- imaging: roi size x (if image or mask is given)
|
|
156
|
+
- imaging: roi size y (if image or mask is given)
|
|
157
|
+
|
|
158
|
+
The following configuration keys are added if not present:
|
|
159
|
+
|
|
160
|
+
- fluorescence:channel count
|
|
161
|
+
"""
|
|
162
|
+
# set event count
|
|
163
|
+
feats = sorted(self.h5file.get("events", {}).keys())
|
|
164
|
+
if feats:
|
|
165
|
+
self.h5file.attrs["experiment:event count"] = len(
|
|
166
|
+
self.h5file["events"][feats[0]])
|
|
167
|
+
else:
|
|
168
|
+
raise ValueError(f"No features in '{self.path}'!")
|
|
169
|
+
|
|
170
|
+
# ignore empty features in the checks further below
|
|
171
|
+
for feat in feats[:]: # iterate over a copy of the list
|
|
172
|
+
obj = self.h5file["events"][feat]
|
|
173
|
+
if ((isinstance(feat, h5py.Dataset) and obj.shape[0] == 0) # ds
|
|
174
|
+
or len(obj) == 0): # groups
|
|
175
|
+
feats.remove(feat)
|
|
176
|
+
|
|
177
|
+
# set samples per event
|
|
178
|
+
if "trace" in feats:
|
|
179
|
+
traces = list(self.h5file["events"]["trace"].keys())
|
|
180
|
+
trsize = self.h5file["events"]["trace"][traces[0]].shape[1]
|
|
181
|
+
self.h5file.attrs["fluorescence:samples per event"] = trsize
|
|
182
|
+
|
|
183
|
+
# set channel count
|
|
184
|
+
chcount = sum(
|
|
185
|
+
["fl1_max" in feats, "fl2_max" in feats, "fl3_max" in feats])
|
|
186
|
+
if chcount:
|
|
187
|
+
if "fluorescence:channel count" not in self.h5file.attrs:
|
|
188
|
+
self.h5file.attrs["fluorescence:channel count"] = chcount
|
|
189
|
+
|
|
190
|
+
# set roi size x/y
|
|
191
|
+
if "image" in feats:
|
|
192
|
+
shape = self.h5file["events"]["image"][0].shape
|
|
193
|
+
elif "mask" in feats:
|
|
194
|
+
shape = self.h5file["events"]["mask"][0].shape
|
|
195
|
+
else:
|
|
196
|
+
shape = None
|
|
197
|
+
if shape is not None:
|
|
198
|
+
# update shape
|
|
199
|
+
self.h5file.attrs["imaging:roi size x"] = shape[1]
|
|
200
|
+
self.h5file.attrs["imaging:roi size y"] = shape[0]
|
|
201
|
+
|
|
202
|
+
def store_basin(self,
|
|
203
|
+
basin_name: str,
|
|
204
|
+
basin_type: Literal['file', 'internal', 'remote'],
|
|
205
|
+
basin_format: str,
|
|
206
|
+
basin_locs: List[str | pathlib.Path],
|
|
207
|
+
basin_descr: str | None = None,
|
|
208
|
+
basin_feats: List[str] = None,
|
|
209
|
+
basin_map: np.ndarray | Tuple[str, np.ndarray] = None,
|
|
210
|
+
basin_id: str = None,
|
|
211
|
+
internal_data: Dict | h5py.Group = None,
|
|
212
|
+
verify: bool = True,
|
|
213
|
+
perishable: bool = False,
|
|
214
|
+
):
|
|
215
|
+
"""Write basin information
|
|
216
|
+
|
|
217
|
+
Parameters
|
|
218
|
+
----------
|
|
219
|
+
basin_name: str
|
|
220
|
+
basin name; Names do not have to be unique.
|
|
221
|
+
basin_type: str
|
|
222
|
+
basin type (file or remote); Files are paths accessible by the
|
|
223
|
+
operating system (including e.g. network shares) whereas
|
|
224
|
+
remote locations normally require an active internet connection.
|
|
225
|
+
basin_format: str
|
|
226
|
+
The basin format must match the ``format`` property of an
|
|
227
|
+
:class:`.RTDCBase` subclass (e.g. "hdf5" or "dcor")
|
|
228
|
+
basin_locs: list
|
|
229
|
+
location of the basin as a string or (optionally)
|
|
230
|
+
a ``pathlib.Path``
|
|
231
|
+
basin_descr: str
|
|
232
|
+
optional string describing the basin
|
|
233
|
+
basin_feats: list of str
|
|
234
|
+
list of features this basin provides; You may use this to
|
|
235
|
+
restrict access to features for a specific basin.
|
|
236
|
+
basin_map: np.ndarray or tuple of (str, np.ndarray)
|
|
237
|
+
If this is an integer numpy array, it defines the mapping
|
|
238
|
+
of event indices from the basin dataset to the referring dataset
|
|
239
|
+
(the dataset being written to disk). Normally, the basinmap
|
|
240
|
+
feature used for storing the mapping information is inferred
|
|
241
|
+
from the currently defined basinmap features. However, if you
|
|
242
|
+
are incepting basins, then this might not be sufficient, and you
|
|
243
|
+
have to specify explicitly which basinmap feature to use. In such
|
|
244
|
+
a case, you may specify a tuple `(feature_name, mapping_array)`
|
|
245
|
+
where `feature_name` is the explicit mapping name, e.g.
|
|
246
|
+
`"basinmap3"`.
|
|
247
|
+
basin_id: str
|
|
248
|
+
Identifier of the basin. This is the string returned by
|
|
249
|
+
:meth:`.RTDCBase.get_measurement_identifier`. This is
|
|
250
|
+
a unique string that identifies the data within a basin.
|
|
251
|
+
If not specified and `verify=True`, this value is automatically
|
|
252
|
+
taken from the basin file.
|
|
253
|
+
internal_data: dict or instance of h5py.Group
|
|
254
|
+
A dictionary or an `h5py.Group` containing the basin data.
|
|
255
|
+
The data are copied to the "basin_events" group, if
|
|
256
|
+
`internal_data` is not an `h5py.Group` in the current HDF5 file.
|
|
257
|
+
This must be specified when storing internal basins, and it
|
|
258
|
+
must not be specified for any other basin type.
|
|
259
|
+
verify: bool
|
|
260
|
+
Whether to verify the basin before storing it; You might have
|
|
261
|
+
set this to False if you would like to write a basin that is
|
|
262
|
+
e.g. temporarily not available
|
|
263
|
+
perishable: bool
|
|
264
|
+
Whether the basin is perishable. If this is True, then a
|
|
265
|
+
warning will be issued, because perishable basins may not be
|
|
266
|
+
accessed (e.g. time-based URL for private S3 data).
|
|
267
|
+
|
|
268
|
+
Returns
|
|
269
|
+
-------
|
|
270
|
+
basin_hash: str
|
|
271
|
+
hash of the basin which serves as the name of the HDF5 dataset
|
|
272
|
+
stored in the output file
|
|
273
|
+
|
|
274
|
+
.. versionadded:: 0.58.0
|
|
275
|
+
"""
|
|
276
|
+
if perishable:
|
|
277
|
+
warnings.warn(f"Storing perishable basin {basin_name}")
|
|
278
|
+
if basin_type == "internal":
|
|
279
|
+
if internal_data is None:
|
|
280
|
+
raise ValueError(
|
|
281
|
+
"When writing an internal basin, you must specify "
|
|
282
|
+
"`internal_data` which is either a dictionary of numpy "
|
|
283
|
+
"arrays or an `h5py.Group` containing the relevant "
|
|
284
|
+
"datasets.")
|
|
285
|
+
if (isinstance(internal_data, dict)
|
|
286
|
+
or (isinstance(internal_data, h5py.Group)
|
|
287
|
+
and internal_data.file != self.h5file)):
|
|
288
|
+
# The data are not yet stored in this HDF5 file
|
|
289
|
+
for feat in basin_feats:
|
|
290
|
+
igroup = self.h5file.require_group("basin_events")
|
|
291
|
+
if feat in igroup:
|
|
292
|
+
raise ValueError(f"The feature '{feat}' already "
|
|
293
|
+
f"exists in the 'basin_events' group")
|
|
294
|
+
self.write_ndarray(group=igroup,
|
|
295
|
+
name=feat,
|
|
296
|
+
data=internal_data[feat])
|
|
297
|
+
# just override it with the default
|
|
298
|
+
basin_locs = ["basin_events"]
|
|
299
|
+
elif verify:
|
|
300
|
+
# Verify the existence of the data inside this HDF5 file
|
|
301
|
+
if basin_locs != ["basin_events"]:
|
|
302
|
+
warnings.warn("You specified an uncommon location for "
|
|
303
|
+
f"your internal basins: {basin_locs}. "
|
|
304
|
+
f"Please use 'basin_events' instead.")
|
|
305
|
+
for feat in basin_feats:
|
|
306
|
+
if feat not in self.h5file[basin_locs[0]]:
|
|
307
|
+
raise ValueError(f"Could not find feature '{feat}' in "
|
|
308
|
+
f"the group [{basin_locs[0]}]")
|
|
309
|
+
|
|
310
|
+
# Expand optional tuple for basin_map
|
|
311
|
+
if isinstance(basin_map, (list, tuple)) and len(basin_map) == 2:
|
|
312
|
+
basin_map_name, basin_map = basin_map
|
|
313
|
+
else:
|
|
314
|
+
basin_map_name = None
|
|
315
|
+
|
|
316
|
+
if verify and basin_type in ["file", "remote"]:
|
|
317
|
+
# We have to import this here to avoid circular imports
|
|
318
|
+
from .load import new_dataset
|
|
319
|
+
# Make sure the basin can be opened by dclab, verify its ID
|
|
320
|
+
ref_id = self.h5file.attrs.get("experiment:run identifier")
|
|
321
|
+
for loc in basin_locs:
|
|
322
|
+
with new_dataset(loc) as ds:
|
|
323
|
+
# We can open the file, which is great.
|
|
324
|
+
# Compare the IDs.
|
|
325
|
+
bn_id = ds.get_measurement_identifier()
|
|
326
|
+
# Check whether `basin_id` matches the actual basin
|
|
327
|
+
if basin_id:
|
|
328
|
+
if basin_id != bn_id:
|
|
329
|
+
raise ValueError(
|
|
330
|
+
f"Measurement identifier mismatch for "
|
|
331
|
+
f"{loc}: got {bn_id}, expected {basin_id=})!")
|
|
332
|
+
else:
|
|
333
|
+
# If `basin_id` was not specified, set it here for
|
|
334
|
+
# user convenience.
|
|
335
|
+
basin_id = bn_id or None
|
|
336
|
+
# Check whether the referrer ID matches the basin ID.
|
|
337
|
+
if ref_id:
|
|
338
|
+
if not (bn_id == ref_id
|
|
339
|
+
or (basin_map is not None
|
|
340
|
+
and ref_id.startswith(bn_id))):
|
|
341
|
+
raise ValueError(
|
|
342
|
+
f"Measurement identifier mismatch between "
|
|
343
|
+
f"{self.path} ({ref_id}) and {loc} ({bn_id})!")
|
|
344
|
+
if basin_feats:
|
|
345
|
+
for feat in basin_feats:
|
|
346
|
+
if not dfn.feature_exists(feat):
|
|
347
|
+
raise ValueError(f"Invalid feature: '{feat}'")
|
|
348
|
+
if basin_map is not None:
|
|
349
|
+
if (not isinstance(basin_map, np.ndarray)
|
|
350
|
+
and basin_map.dtype != np.uint64):
|
|
351
|
+
raise ValueError(
|
|
352
|
+
"The array specified in `basin_map` argument must be "
|
|
353
|
+
"a numpy array with the dtype `np.uint64`!")
|
|
354
|
+
|
|
355
|
+
# determine the basinmap to use
|
|
356
|
+
if basin_map is not None:
|
|
357
|
+
self.h5file.require_group("events")
|
|
358
|
+
if basin_map_name is None:
|
|
359
|
+
# We have to determine the basin_map_name to use for this
|
|
360
|
+
# mapped basin.
|
|
361
|
+
for ii in range(10): # basinmap0 to basinmap9
|
|
362
|
+
bm_cand = f"basinmap{ii}"
|
|
363
|
+
if bm_cand in self.h5file["events"]:
|
|
364
|
+
# There is a basin mapping defined in the file. Check
|
|
365
|
+
# whether it is identical to ours.
|
|
366
|
+
if np.all(self.h5file["events"][bm_cand] == basin_map):
|
|
367
|
+
# Great, we are done here.
|
|
368
|
+
basin_map_name = bm_cand
|
|
369
|
+
break
|
|
370
|
+
else:
|
|
371
|
+
# This mapping belongs to a different basin,
|
|
372
|
+
# try the next mapping.
|
|
373
|
+
continue
|
|
374
|
+
else:
|
|
375
|
+
# The mapping is not defined in the dataset, and we may
|
|
376
|
+
# write it to a new feature.
|
|
377
|
+
basin_map_name = bm_cand
|
|
378
|
+
self.store_feature(feat=basin_map_name, data=basin_map)
|
|
379
|
+
break
|
|
380
|
+
else:
|
|
381
|
+
raise ValueError(
|
|
382
|
+
"You have exhausted the usage of mapped basins for "
|
|
383
|
+
"the current dataset. Please revise your analysis "
|
|
384
|
+
"pipeline.")
|
|
385
|
+
else:
|
|
386
|
+
if basin_map_name not in self.h5file["events"]:
|
|
387
|
+
# Write the explicit basin mapping into the file.
|
|
388
|
+
self.store_feature(feat=basin_map_name, data=basin_map)
|
|
389
|
+
elif not np.all(
|
|
390
|
+
self.h5file["events"][basin_map_name] == basin_map):
|
|
391
|
+
# This is a sanity check that we have to perform.
|
|
392
|
+
raise ValueError(
|
|
393
|
+
f"The basin mapping feature {basin_map_name} you "
|
|
394
|
+
f"specified explicitly already exists in "
|
|
395
|
+
f"{self.h5file} and they do not match. I assume "
|
|
396
|
+
f"you are trying explicitly write to a basinmap that "
|
|
397
|
+
f"is already used elsewhere.")
|
|
398
|
+
else:
|
|
399
|
+
# Classic, simple case
|
|
400
|
+
basin_map_name = "same"
|
|
401
|
+
|
|
402
|
+
b_data = {
|
|
403
|
+
"description": basin_descr,
|
|
404
|
+
"format": basin_format,
|
|
405
|
+
"name": basin_name,
|
|
406
|
+
"type": basin_type,
|
|
407
|
+
"features": None if basin_feats is None else sorted(basin_feats),
|
|
408
|
+
"mapping": basin_map_name,
|
|
409
|
+
"perishable": perishable,
|
|
410
|
+
"identifier": basin_id,
|
|
411
|
+
}
|
|
412
|
+
if basin_type == "file":
|
|
413
|
+
flocs = []
|
|
414
|
+
for pp in basin_locs:
|
|
415
|
+
pp = pathlib.Path(pp)
|
|
416
|
+
if verify:
|
|
417
|
+
flocs.append(str(pp.resolve()))
|
|
418
|
+
# Also store the relative path for user convenience.
|
|
419
|
+
# Don't use pathlib.Path.relative_to, because that
|
|
420
|
+
# only has `walk_up` since Python 3.12.
|
|
421
|
+
# Also, just look in subdirectories which simplifies
|
|
422
|
+
# path resolution.
|
|
423
|
+
this_parent = str(self.path.parent) + os.sep
|
|
424
|
+
path_parent = str(pp.parent) + os.sep
|
|
425
|
+
if path_parent.startswith(this_parent):
|
|
426
|
+
flocs.append(str(pp).replace(this_parent, "", 1))
|
|
427
|
+
else:
|
|
428
|
+
# We already did (or did not upon user request) verify
|
|
429
|
+
# the path. Just pass it on to the list.
|
|
430
|
+
flocs.append(str(pp))
|
|
431
|
+
b_data["paths"] = flocs
|
|
432
|
+
elif basin_type == "internal":
|
|
433
|
+
b_data["paths"] = basin_locs
|
|
434
|
+
elif basin_type == "remote":
|
|
435
|
+
b_data["urls"] = [str(p) for p in basin_locs]
|
|
436
|
+
else:
|
|
437
|
+
raise ValueError(f"Unknown basin type '{basin_type}'")
|
|
438
|
+
|
|
439
|
+
b_lines = json.dumps(b_data, indent=2, sort_keys=True).split("\n")
|
|
440
|
+
basins = self.h5file.require_group("basins")
|
|
441
|
+
key = hashobj(b_lines)
|
|
442
|
+
if key not in basins:
|
|
443
|
+
self.write_text(basins, key, b_lines)
|
|
444
|
+
return key
|
|
445
|
+
|
|
446
|
+
def store_feature(self, feat, data, shape=None):
|
|
447
|
+
"""Write feature data
|
|
448
|
+
|
|
449
|
+
Parameters
|
|
450
|
+
----------
|
|
451
|
+
feat: str
|
|
452
|
+
feature name
|
|
453
|
+
data: np.ndarray or list or dict
|
|
454
|
+
feature data
|
|
455
|
+
shape: tuple of int
|
|
456
|
+
For non-scalar features, this is the shape of the
|
|
457
|
+
feature for one event (e.g. `(90, 250)` for an "image".
|
|
458
|
+
Usually, you do not have to specify this value, but you
|
|
459
|
+
do need it in case of plugin features that don't have
|
|
460
|
+
the "feature shape" set or in case of temporary features.
|
|
461
|
+
If you don't specify it, then the shape is guessed based
|
|
462
|
+
on the data you provide and a UserWarning will be issued.
|
|
463
|
+
"""
|
|
464
|
+
if not dfn.feature_exists(feat):
|
|
465
|
+
raise ValueError(f"Undefined feature '{feat}'!")
|
|
466
|
+
|
|
467
|
+
events = self.h5file.require_group("events")
|
|
468
|
+
|
|
469
|
+
# replace data?
|
|
470
|
+
if feat in events and self.mode == "replace":
|
|
471
|
+
if feat == "trace":
|
|
472
|
+
for tr_name in data.keys():
|
|
473
|
+
if tr_name in events[feat]:
|
|
474
|
+
del events[feat][tr_name]
|
|
475
|
+
else:
|
|
476
|
+
del events[feat]
|
|
477
|
+
|
|
478
|
+
if feat in FEATURES_UINT32:
|
|
479
|
+
dtype = np.uint32
|
|
480
|
+
elif feat in FEATURES_UINT64:
|
|
481
|
+
dtype = np.uint64
|
|
482
|
+
else:
|
|
483
|
+
dtype = None
|
|
484
|
+
|
|
485
|
+
if feat == "index":
|
|
486
|
+
# By design, the index must be a simple enumeration.
|
|
487
|
+
# We enforce that by not trusting the user. If you need
|
|
488
|
+
# a different index, please take a look at the index_online
|
|
489
|
+
# feature.
|
|
490
|
+
nev = len(data)
|
|
491
|
+
if "index" in events:
|
|
492
|
+
nev0 = len(events["index"])
|
|
493
|
+
else:
|
|
494
|
+
nev0 = 0
|
|
495
|
+
self.write_ndarray(group=events,
|
|
496
|
+
name="index",
|
|
497
|
+
data=np.arange(nev0 + 1, nev0 + nev + 1),
|
|
498
|
+
dtype=dtype)
|
|
499
|
+
elif dfn.scalar_feature_exists(feat):
|
|
500
|
+
self.write_ndarray(group=events,
|
|
501
|
+
name=feat,
|
|
502
|
+
data=np.atleast_1d(data),
|
|
503
|
+
dtype=dtype)
|
|
504
|
+
elif feat == "contour":
|
|
505
|
+
self.write_ragged(group=events, name=feat, data=data)
|
|
506
|
+
elif feat in ["image", "image_bg", "mask", "qpi_oah", "qpi_oah_bg"]:
|
|
507
|
+
self.write_image_grayscale(group=events,
|
|
508
|
+
name=feat,
|
|
509
|
+
data=data,
|
|
510
|
+
is_boolean=(feat == "mask"))
|
|
511
|
+
elif feat in ["qpi_amp", "qpi_pha"]:
|
|
512
|
+
self.write_image_float32(group=events,
|
|
513
|
+
name=feat,
|
|
514
|
+
data=data)
|
|
515
|
+
elif feat == "trace":
|
|
516
|
+
for tr_name in data.keys():
|
|
517
|
+
# verify trace names
|
|
518
|
+
if tr_name not in dfn.FLUOR_TRACES:
|
|
519
|
+
raise ValueError(f"Unknown trace key: '{tr_name}'!")
|
|
520
|
+
# write trace
|
|
521
|
+
self.write_ndarray(group=events.require_group("trace"),
|
|
522
|
+
name=tr_name,
|
|
523
|
+
data=np.atleast_2d(data[tr_name]),
|
|
524
|
+
dtype=dtype
|
|
525
|
+
)
|
|
526
|
+
else:
|
|
527
|
+
if not shape:
|
|
528
|
+
# OK, so we are dealing with a plugin feature or a temporary
|
|
529
|
+
# feature here. Now, we don't know the exact shape of that
|
|
530
|
+
# feature, but we give the user the option to advertise
|
|
531
|
+
# the shape of the feature in the plugin.
|
|
532
|
+
# First, try to obtain the shape from the PluginFeature
|
|
533
|
+
# (if that exists).
|
|
534
|
+
for pf in PlugInFeature.get_instances(feat):
|
|
535
|
+
if isinstance(pf, PlugInFeature):
|
|
536
|
+
shape = pf.plugin_feature_info.get("feature shape")
|
|
537
|
+
if shape is not None:
|
|
538
|
+
break # This is good.
|
|
539
|
+
else:
|
|
540
|
+
# Temporary features will have to live with this warning.
|
|
541
|
+
warnings.warn(
|
|
542
|
+
"There is no information about the shape of the "
|
|
543
|
+
+ f"feature '{feat}'. I am going out on a limb "
|
|
544
|
+
+ "for you and assume that you are storing "
|
|
545
|
+
+ "multiple events at a time. If this works, "
|
|
546
|
+
+ f"you could put the shape `{data[0].shape}` "
|
|
547
|
+
+ 'in the `info["feature shapes"]` key of '
|
|
548
|
+
+ "your plugin feature.")
|
|
549
|
+
shape = data.shape[1:]
|
|
550
|
+
if shape == data.shape:
|
|
551
|
+
data = data.reshape(1, *shape)
|
|
552
|
+
elif shape == data.shape[1:]:
|
|
553
|
+
pass
|
|
554
|
+
else:
|
|
555
|
+
raise ValueError(f"Bad shape for {feat}! Expeted {shape}, "
|
|
556
|
+
+ f"but got {data.shape[1:]}!")
|
|
557
|
+
self.write_ndarray(group=events, name=feat, data=data, dtype=dtype)
|
|
558
|
+
|
|
559
|
+
def store_log(self, name, lines):
|
|
560
|
+
"""Write log data
|
|
561
|
+
|
|
562
|
+
Parameters
|
|
563
|
+
----------
|
|
564
|
+
name: str
|
|
565
|
+
name of the log entry
|
|
566
|
+
lines: list of str or str
|
|
567
|
+
the text lines of the log
|
|
568
|
+
"""
|
|
569
|
+
log_group = self.h5file.require_group("logs")
|
|
570
|
+
self.write_text(group=log_group, name=name, lines=lines)
|
|
571
|
+
|
|
572
|
+
def store_metadata(self, meta):
|
|
573
|
+
"""Store RT-DC metadata
|
|
574
|
+
|
|
575
|
+
Parameters
|
|
576
|
+
----------
|
|
577
|
+
meta: dict-like
|
|
578
|
+
The metadata to store. Each key depicts a metadata section
|
|
579
|
+
name whose data is given as a dictionary, e.g.::
|
|
580
|
+
|
|
581
|
+
meta = {"imaging": {"exposure time": 20,
|
|
582
|
+
"flash duration": 2,
|
|
583
|
+
...
|
|
584
|
+
},
|
|
585
|
+
"setup": {"channel width": 20,
|
|
586
|
+
"chip region": "channel",
|
|
587
|
+
...
|
|
588
|
+
},
|
|
589
|
+
...
|
|
590
|
+
}
|
|
591
|
+
|
|
592
|
+
Only section key names and key values therein registered
|
|
593
|
+
in dclab are allowed and are converted to the pre-defined
|
|
594
|
+
dtype. Only sections from the
|
|
595
|
+
:const:`dclab.definitions.CFG_METADATA` dictionary are
|
|
596
|
+
stored. If you have custom metadata, you can use the "user"
|
|
597
|
+
section.
|
|
598
|
+
"""
|
|
599
|
+
meta = copy.deepcopy(meta)
|
|
600
|
+
# Ignore/remove tdms section
|
|
601
|
+
meta.pop("fmt_tdms", None)
|
|
602
|
+
# Check meta data
|
|
603
|
+
for sec in meta:
|
|
604
|
+
if sec == "user":
|
|
605
|
+
# user-defined metadata are always written.
|
|
606
|
+
# Any errors (incompatibilities with HDF5 attributes)
|
|
607
|
+
# are the user's responsibility
|
|
608
|
+
continue
|
|
609
|
+
elif sec not in dfn.CFG_METADATA:
|
|
610
|
+
# only allow writing of meta data that are not editable
|
|
611
|
+
# by the user (not dclab.dfn.CFG_ANALYSIS)
|
|
612
|
+
raise ValueError(
|
|
613
|
+
f"Meta data section not defined in dclab: {sec}")
|
|
614
|
+
for ck in meta[sec]:
|
|
615
|
+
if not dfn.config_key_exists(sec, ck):
|
|
616
|
+
raise ValueError(
|
|
617
|
+
f"Meta key not defined in dclab: {sec}:{ck}")
|
|
618
|
+
|
|
619
|
+
# update version
|
|
620
|
+
old_version = meta.get("setup", {}).get("software version", "")
|
|
621
|
+
new_version = self.version_brand(
|
|
622
|
+
old_version=old_version or None,
|
|
623
|
+
write_attribute=False
|
|
624
|
+
)
|
|
625
|
+
meta.setdefault("setup", {})["software version"] = new_version
|
|
626
|
+
|
|
627
|
+
# Write metadata
|
|
628
|
+
for sec in meta:
|
|
629
|
+
for ck in meta[sec]:
|
|
630
|
+
idk = f"{sec}:{ck}"
|
|
631
|
+
value = meta[sec][ck]
|
|
632
|
+
if isinstance(value, bytes):
|
|
633
|
+
# We never store byte attribute values.
|
|
634
|
+
# In this case, `convfunc` should be `str` or `lcstr` or
|
|
635
|
+
# somesuch. But we don't test that, because no other
|
|
636
|
+
# datatype competes with str for bytes.
|
|
637
|
+
value = value.decode("utf-8")
|
|
638
|
+
if sec == "user":
|
|
639
|
+
# store user-defined metadata as-is
|
|
640
|
+
self.h5file.attrs[idk] = value
|
|
641
|
+
else:
|
|
642
|
+
# pipe the metadata through the hard-coded converter
|
|
643
|
+
# functions
|
|
644
|
+
convfunc = dfn.get_config_value_func(sec, ck)
|
|
645
|
+
self.h5file.attrs[idk] = convfunc(value)
|
|
646
|
+
|
|
647
|
+
def store_table(self, name, cmp_array, h5_attrs=None):
|
|
648
|
+
"""Store a compound array table
|
|
649
|
+
|
|
650
|
+
Tables are semi-metadata. They may contain information collected
|
|
651
|
+
during a measurement (but with a lower temporal resolution) or
|
|
652
|
+
other tabular data relevant for a dataset. Tables have named
|
|
653
|
+
columns. Therefore, they can be represented as a numy recarray,
|
|
654
|
+
and they should be stored as such in an HDF5 file (compund dataset).
|
|
655
|
+
|
|
656
|
+
Parameters
|
|
657
|
+
----------
|
|
658
|
+
name: str
|
|
659
|
+
Name of the table
|
|
660
|
+
cmp_array: np.recarray, h5py.Dataset, np.ndarray, or dict
|
|
661
|
+
If a np.recarray or h5py.Dataset are provided, then they
|
|
662
|
+
are written as-is to the file. If a dictionary is provided,
|
|
663
|
+
then the dictionary is converted into a numpy recarray.
|
|
664
|
+
If a numpy array is provided, then the array is written
|
|
665
|
+
as a raw table (no column names) to the file.
|
|
666
|
+
h5_attrs: dict, optional
|
|
667
|
+
Attributes to store alongside the corresponding HDF5 dataset
|
|
668
|
+
"""
|
|
669
|
+
if h5_attrs is None:
|
|
670
|
+
h5_attrs = {}
|
|
671
|
+
|
|
672
|
+
# Convert MetaTable to numpy data
|
|
673
|
+
if isinstance(cmp_array, MetaTable):
|
|
674
|
+
h5_attrs.update(cmp_array.meta)
|
|
675
|
+
cmp_array = cmp_array.__array__()
|
|
676
|
+
|
|
677
|
+
# Handle individual cases
|
|
678
|
+
if isinstance(cmp_array, np.recarray):
|
|
679
|
+
# A table is a compound array (np.recarray). If we are here,
|
|
680
|
+
# this means that the user passed an instance of np.recarray.
|
|
681
|
+
pass
|
|
682
|
+
elif isinstance(cmp_array, h5py.Dataset):
|
|
683
|
+
# An instance of h5py.Dataset (which we trust to be a proper
|
|
684
|
+
# compound dataset at this point). No additional steps needed.
|
|
685
|
+
h5_attrs.update(cmp_array.attrs)
|
|
686
|
+
pass
|
|
687
|
+
elif isinstance(cmp_array, np.ndarray):
|
|
688
|
+
# A numpy array was passed. This usually means we have something
|
|
689
|
+
# that we can look at, so we add image tags.
|
|
690
|
+
h5_attrs['CLASS'] = np.bytes_('IMAGE')
|
|
691
|
+
h5_attrs['IMAGE_VERSION'] = np.bytes_('1.2')
|
|
692
|
+
h5_attrs['IMAGE_SUBCLASS'] = np.bytes_('IMAGE_GRAYSCALE')
|
|
693
|
+
pass
|
|
694
|
+
elif isinstance(cmp_array, dict):
|
|
695
|
+
# The user passed a dict which we now have to convert to a
|
|
696
|
+
# compound dataset. We do this because we are user-convenient.
|
|
697
|
+
# The user should not need to wade through these steps:
|
|
698
|
+
columns = list(cmp_array.keys())
|
|
699
|
+
# Everything should be floats in a table.
|
|
700
|
+
ds_dt = np.dtype({'names': columns,
|
|
701
|
+
'formats': [np.float64] * len(columns)})
|
|
702
|
+
# We trust the user to provide a dictionary with one-dimensional
|
|
703
|
+
# lists or arrays of the same length.
|
|
704
|
+
tabsize = len(cmp_array[columns[0]])
|
|
705
|
+
tab_data = np.zeros((tabsize, len(columns)))
|
|
706
|
+
for ii, tab in enumerate(columns):
|
|
707
|
+
tab_data[:, ii] = cmp_array[tab]
|
|
708
|
+
# Now create a new compound array (discarding the old dict)
|
|
709
|
+
cmp_array = np.rec.array(tab_data, dtype=ds_dt)
|
|
710
|
+
else:
|
|
711
|
+
raise NotImplementedError(
|
|
712
|
+
f"Cannot convert {type(cmp_array)} to table!")
|
|
713
|
+
|
|
714
|
+
# data
|
|
715
|
+
group = self.h5file.require_group("tables")
|
|
716
|
+
tab = group.create_dataset(
|
|
717
|
+
name,
|
|
718
|
+
data=cmp_array,
|
|
719
|
+
fletcher32=True,
|
|
720
|
+
**self.compression_kwargs)
|
|
721
|
+
|
|
722
|
+
# metadata
|
|
723
|
+
if h5_attrs:
|
|
724
|
+
tab.attrs.update(h5_attrs)
|
|
725
|
+
|
|
726
|
+
def version_brand(self, old_version=None, write_attribute=True):
|
|
727
|
+
"""Perform version branding
|
|
728
|
+
|
|
729
|
+
Append a " | dclab X.Y.Z" to the "setup:software version"
|
|
730
|
+
attribute.
|
|
731
|
+
|
|
732
|
+
Parameters
|
|
733
|
+
----------
|
|
734
|
+
old_version: str or None
|
|
735
|
+
By default, the version string is taken from the HDF5 file.
|
|
736
|
+
If set to a string, then this version is used instead.
|
|
737
|
+
write_attribute: bool
|
|
738
|
+
If True (default), write the version string to the
|
|
739
|
+
"setup:software version" attribute
|
|
740
|
+
"""
|
|
741
|
+
if old_version is None:
|
|
742
|
+
old_version = self.h5file.attrs.get("setup:software version", "")
|
|
743
|
+
if isinstance(old_version, bytes):
|
|
744
|
+
old_version = old_version.decode("utf-8")
|
|
745
|
+
version_chain = [vv.strip() for vv in old_version.split("|")]
|
|
746
|
+
version_chain = [vv for vv in version_chain if vv]
|
|
747
|
+
cur_version = "dclab {}".format(version)
|
|
748
|
+
|
|
749
|
+
if version_chain:
|
|
750
|
+
if version_chain[-1] != cur_version:
|
|
751
|
+
version_chain.append(cur_version)
|
|
752
|
+
else:
|
|
753
|
+
version_chain = [cur_version]
|
|
754
|
+
new_version = " | ".join(version_chain)
|
|
755
|
+
if write_attribute:
|
|
756
|
+
self.h5file.attrs["setup:software version"] = new_version
|
|
757
|
+
else:
|
|
758
|
+
return new_version
|
|
759
|
+
|
|
760
|
+
def write_image_float32(self, group, name, data):
|
|
761
|
+
"""Write 32bit floating point image array
|
|
762
|
+
|
|
763
|
+
This function wraps :func:`RTDCWriter.write_ndarray`
|
|
764
|
+
and adds image attributes to the HDF5 file so HDFView
|
|
765
|
+
can display the images properly.
|
|
766
|
+
|
|
767
|
+
Parameters
|
|
768
|
+
----------
|
|
769
|
+
group: h5py.Group
|
|
770
|
+
parent group
|
|
771
|
+
name: str
|
|
772
|
+
name of the dataset containing the text
|
|
773
|
+
data: np.ndarray or list of np.ndarray
|
|
774
|
+
image data
|
|
775
|
+
"""
|
|
776
|
+
if isinstance(data, (list, tuple)):
|
|
777
|
+
# images may be in lists
|
|
778
|
+
data = np.atleast_2d(data)
|
|
779
|
+
|
|
780
|
+
if len(data.shape) == 2:
|
|
781
|
+
# put single event in 3D array
|
|
782
|
+
data = data[np.newaxis]
|
|
783
|
+
|
|
784
|
+
dset = self.write_ndarray(group=group, name=name, data=data,
|
|
785
|
+
dtype=np.float32)
|
|
786
|
+
|
|
787
|
+
# Create and Set image attributes:
|
|
788
|
+
# HDFView recognizes this as a series of images.
|
|
789
|
+
# Use np.bytes_ as per
|
|
790
|
+
# https://docs.h5py.org/en/stable/strings.html#compatibility
|
|
791
|
+
dset.attrs.create('CLASS', np.bytes_('IMAGE'))
|
|
792
|
+
dset.attrs.create('IMAGE_VERSION', np.bytes_('1.2'))
|
|
793
|
+
dset.attrs.create('IMAGE_SUBCLASS', np.bytes_('IMAGE_GRAYSCALE'))
|
|
794
|
+
|
|
795
|
+
def write_image_grayscale(self, group, name, data, is_boolean):
|
|
796
|
+
"""Write grayscale image data to and HDF5 dataset
|
|
797
|
+
|
|
798
|
+
This function wraps :func:`RTDCWriter.write_ndarray`
|
|
799
|
+
and adds image attributes to the HDF5 file so HDFView
|
|
800
|
+
can display the images properly.
|
|
801
|
+
|
|
802
|
+
Parameters
|
|
803
|
+
----------
|
|
804
|
+
group: h5py.Group
|
|
805
|
+
parent group
|
|
806
|
+
name: str
|
|
807
|
+
name of the dataset containing the text
|
|
808
|
+
data: np.ndarray or list of np.ndarray
|
|
809
|
+
image data
|
|
810
|
+
is_boolean: bool
|
|
811
|
+
whether the input data is of boolean nature
|
|
812
|
+
(e.g. mask data) - if so, data are converted to uint8
|
|
813
|
+
"""
|
|
814
|
+
if isinstance(data, (list, tuple)):
|
|
815
|
+
# images may be in lists
|
|
816
|
+
data = np.atleast_2d(data)
|
|
817
|
+
|
|
818
|
+
if len(data.shape) == 2:
|
|
819
|
+
# put single event in 3D array
|
|
820
|
+
data = data.reshape(1, data.shape[0], data.shape[1])
|
|
821
|
+
|
|
822
|
+
if is_boolean:
|
|
823
|
+
# convert binary (mask) data to uint8
|
|
824
|
+
if data.__class__.__name__ == "H5MaskEvent":
|
|
825
|
+
# (if we use `isinstance`, we get circular imports)
|
|
826
|
+
# Be smart and directly write back the original data
|
|
827
|
+
# (otherwise we would convert to bool and back to uint8).
|
|
828
|
+
data = data.h5dataset
|
|
829
|
+
elif data.dtype == bool:
|
|
830
|
+
# Convert binary input mask data to uint8 with max range
|
|
831
|
+
data = np.asarray(data, dtype=np.uint8) * 255
|
|
832
|
+
|
|
833
|
+
dset = self.write_ndarray(group=group, name=name, data=data,
|
|
834
|
+
dtype=np.uint8)
|
|
835
|
+
|
|
836
|
+
# Create and Set image attributes:
|
|
837
|
+
# HDFView recognizes this as a series of images.
|
|
838
|
+
# Use np.bytes_ as per
|
|
839
|
+
# https://docs.h5py.org/en/stable/strings.html#compatibility
|
|
840
|
+
dset.attrs.create('CLASS', np.bytes_('IMAGE'))
|
|
841
|
+
dset.attrs.create('IMAGE_VERSION', np.bytes_('1.2'))
|
|
842
|
+
dset.attrs.create('IMAGE_SUBCLASS', np.bytes_('IMAGE_GRAYSCALE'))
|
|
843
|
+
|
|
844
|
+
def write_ndarray(self, group, name, data, dtype=None):
|
|
845
|
+
"""Write n-dimensional array data to an HDF5 dataset
|
|
846
|
+
|
|
847
|
+
It is assumed that the shape of the array data is correct,
|
|
848
|
+
i.e. that the shape of `data` is
|
|
849
|
+
(number_events, feat_shape_1, ..., feat_shape_n).
|
|
850
|
+
|
|
851
|
+
Parameters
|
|
852
|
+
----------
|
|
853
|
+
group: h5py.Group
|
|
854
|
+
parent group
|
|
855
|
+
name: str
|
|
856
|
+
name of the dataset containing the text
|
|
857
|
+
data: np.ndarray
|
|
858
|
+
data
|
|
859
|
+
dtype: dtype
|
|
860
|
+
the dtype to use for storing the data
|
|
861
|
+
(defaults to `data.dtype`)
|
|
862
|
+
"""
|
|
863
|
+
if len(data) == 0:
|
|
864
|
+
raise ValueError(f"Empty data object for '{name}'")
|
|
865
|
+
|
|
866
|
+
if name not in group:
|
|
867
|
+
chunks = self.get_best_nd_chunks(item_shape=data.shape[1:],
|
|
868
|
+
item_dtype=data.dtype)
|
|
869
|
+
maxshape = tuple([None] + list(data.shape)[1:])
|
|
870
|
+
dset = group.create_dataset(
|
|
871
|
+
name,
|
|
872
|
+
shape=data.shape,
|
|
873
|
+
dtype=dtype or data.dtype,
|
|
874
|
+
maxshape=maxshape,
|
|
875
|
+
chunks=chunks,
|
|
876
|
+
fletcher32=True,
|
|
877
|
+
**self.compression_kwargs)
|
|
878
|
+
offset = 0
|
|
879
|
+
else:
|
|
880
|
+
dset = group[name]
|
|
881
|
+
offset = dset.shape[0]
|
|
882
|
+
dset.resize(offset + data.shape[0], axis=0)
|
|
883
|
+
if len(data.shape) == 1:
|
|
884
|
+
# store scalar data in one go
|
|
885
|
+
dset[offset:] = data
|
|
886
|
+
# store ufunc data for min/max
|
|
887
|
+
for uname, ufunc in [("min", np.nanmin),
|
|
888
|
+
("max", np.nanmax)]:
|
|
889
|
+
val_a = dset.attrs.get(uname, None)
|
|
890
|
+
if val_a is not None:
|
|
891
|
+
val_b = ufunc(data)
|
|
892
|
+
val = ufunc([val_a, val_b])
|
|
893
|
+
else:
|
|
894
|
+
val = ufunc(dset)
|
|
895
|
+
dset.attrs[uname] = val
|
|
896
|
+
# store ufunc data for mean (weighted with size)
|
|
897
|
+
mean_a = dset.attrs.get("mean", None)
|
|
898
|
+
if mean_a is not None:
|
|
899
|
+
num_a = offset
|
|
900
|
+
mean_b = np.nanmean(data)
|
|
901
|
+
num_b = data.size
|
|
902
|
+
mean = (mean_a * num_a + mean_b * num_b) / (num_a + num_b)
|
|
903
|
+
else:
|
|
904
|
+
mean = np.nanmean(dset)
|
|
905
|
+
dset.attrs["mean"] = mean
|
|
906
|
+
else:
|
|
907
|
+
chunk_size = dset.chunks[0]
|
|
908
|
+
# populate higher-dimensional data in chunks
|
|
909
|
+
# (reduces file size, memory usage, and saves time)
|
|
910
|
+
num_chunks = len(data) // chunk_size
|
|
911
|
+
for ii in range(num_chunks):
|
|
912
|
+
start = ii * chunk_size
|
|
913
|
+
stop = start + chunk_size
|
|
914
|
+
dset[offset+start:offset+stop] = data[start:stop]
|
|
915
|
+
# write remainder (if applicable)
|
|
916
|
+
num_remain = len(data) % chunk_size
|
|
917
|
+
if num_remain:
|
|
918
|
+
start_e = num_chunks * chunk_size
|
|
919
|
+
stop_e = start_e + num_remain
|
|
920
|
+
dset[offset+start_e:offset+stop_e] = data[start_e:stop_e]
|
|
921
|
+
return dset
|
|
922
|
+
|
|
923
|
+
def write_ragged(self, group, name, data):
|
|
924
|
+
"""Write ragged data (i.e. list of arrays of different lenghts)
|
|
925
|
+
|
|
926
|
+
Ragged array data (e.g. contour data) are stored in
|
|
927
|
+
a separate group and each entry becomes an HDF5 dataset.
|
|
928
|
+
|
|
929
|
+
Parameters
|
|
930
|
+
----------
|
|
931
|
+
group: h5py.Group
|
|
932
|
+
parent group
|
|
933
|
+
name: str
|
|
934
|
+
name of the dataset containing the text
|
|
935
|
+
data: list of np.ndarray or np.ndarray
|
|
936
|
+
the data in a list
|
|
937
|
+
"""
|
|
938
|
+
if isinstance(data, np.ndarray) and len(data.shape) == 2:
|
|
939
|
+
# place single event in list
|
|
940
|
+
data = [data]
|
|
941
|
+
grp = group.require_group(name)
|
|
942
|
+
# The following case is just a workaround for the very slow
|
|
943
|
+
# `len(grp)` which makes things horrible if you are storing
|
|
944
|
+
# contour data one-by-one. The only downside of this is that
|
|
945
|
+
# we have to keep track of the length of the group. But I
|
|
946
|
+
# think that is OK, since everything is very private here.
|
|
947
|
+
# - Paul (2021-10-18)
|
|
948
|
+
if grp not in self._group_sizes:
|
|
949
|
+
self._group_sizes[grp] = len(grp)
|
|
950
|
+
curid = self._group_sizes[grp]
|
|
951
|
+
for ii, cc in enumerate(data):
|
|
952
|
+
grp.create_dataset("{}".format(curid + ii),
|
|
953
|
+
data=cc,
|
|
954
|
+
fletcher32=True,
|
|
955
|
+
chunks=cc.shape,
|
|
956
|
+
**self.compression_kwargs)
|
|
957
|
+
self._group_sizes[grp] += 1
|
|
958
|
+
|
|
959
|
+
def write_text(self, group, name, lines):
|
|
960
|
+
"""Write text to an HDF5 dataset
|
|
961
|
+
|
|
962
|
+
Text data are written as a fixed-length string dataset.
|
|
963
|
+
|
|
964
|
+
Parameters
|
|
965
|
+
----------
|
|
966
|
+
group: h5py.Group
|
|
967
|
+
parent group
|
|
968
|
+
name: str
|
|
969
|
+
name of the dataset containing the text
|
|
970
|
+
lines: list of str or str
|
|
971
|
+
the text, line by line
|
|
972
|
+
"""
|
|
973
|
+
# replace text?
|
|
974
|
+
if name in group and self.mode == "replace":
|
|
975
|
+
del group[name]
|
|
976
|
+
|
|
977
|
+
# handle strings
|
|
978
|
+
if isinstance(lines, (str, bytes)):
|
|
979
|
+
lines = [lines]
|
|
980
|
+
|
|
981
|
+
lnum = len(lines)
|
|
982
|
+
# Determine the maximum line length and use fixed-length strings,
|
|
983
|
+
# because compression and fletcher32 filters won't work with
|
|
984
|
+
# variable length strings.
|
|
985
|
+
# https://github.com/h5py/h5py/issues/1948
|
|
986
|
+
# 100 is the recommended maximum and the default, because if
|
|
987
|
+
# `mode` is e.g. "append", then this line may not be the longest.
|
|
988
|
+
max_length = 100
|
|
989
|
+
lines_as_bytes = []
|
|
990
|
+
for line in lines:
|
|
991
|
+
# convert lines to bytes
|
|
992
|
+
if not isinstance(line, bytes):
|
|
993
|
+
lbytes = line.encode("UTF-8")
|
|
994
|
+
else:
|
|
995
|
+
lbytes = line
|
|
996
|
+
max_length = max(max_length, len(lbytes))
|
|
997
|
+
lines_as_bytes.append(lbytes)
|
|
998
|
+
|
|
999
|
+
if name not in group:
|
|
1000
|
+
# Create the dataset
|
|
1001
|
+
txt_dset = group.create_dataset(
|
|
1002
|
+
name,
|
|
1003
|
+
shape=(lnum,),
|
|
1004
|
+
dtype=f"S{max_length}",
|
|
1005
|
+
maxshape=(None,),
|
|
1006
|
+
chunks=True,
|
|
1007
|
+
fletcher32=True,
|
|
1008
|
+
**self.compression_kwargs)
|
|
1009
|
+
line_offset = 0
|
|
1010
|
+
else:
|
|
1011
|
+
# TODO: test whether fixed length is long enough!
|
|
1012
|
+
# Resize the dataset
|
|
1013
|
+
txt_dset = group[name]
|
|
1014
|
+
line_offset = txt_dset.shape[0]
|
|
1015
|
+
txt_dset.resize(line_offset + lnum, axis=0)
|
|
1016
|
+
|
|
1017
|
+
# Write the text data line-by-line
|
|
1018
|
+
for ii, lbytes in enumerate(lines_as_bytes):
|
|
1019
|
+
txt_dset[line_offset + ii] = lbytes
|