dclab 0.67.0__cp314-cp314t-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dclab might be problematic. Click here for more details.

Files changed (142) hide show
  1. dclab/__init__.py +41 -0
  2. dclab/_version.py +34 -0
  3. dclab/cached.py +97 -0
  4. dclab/cli/__init__.py +10 -0
  5. dclab/cli/common.py +237 -0
  6. dclab/cli/task_compress.py +126 -0
  7. dclab/cli/task_condense.py +223 -0
  8. dclab/cli/task_join.py +229 -0
  9. dclab/cli/task_repack.py +98 -0
  10. dclab/cli/task_split.py +154 -0
  11. dclab/cli/task_tdms2rtdc.py +186 -0
  12. dclab/cli/task_verify_dataset.py +75 -0
  13. dclab/definitions/__init__.py +79 -0
  14. dclab/definitions/feat_const.py +202 -0
  15. dclab/definitions/feat_logic.py +182 -0
  16. dclab/definitions/meta_const.py +252 -0
  17. dclab/definitions/meta_logic.py +111 -0
  18. dclab/definitions/meta_parse.py +94 -0
  19. dclab/downsampling.cpython-314t-darwin.so +0 -0
  20. dclab/downsampling.pyx +230 -0
  21. dclab/external/__init__.py +4 -0
  22. dclab/external/packaging/LICENSE +3 -0
  23. dclab/external/packaging/LICENSE.APACHE +177 -0
  24. dclab/external/packaging/LICENSE.BSD +23 -0
  25. dclab/external/packaging/__init__.py +6 -0
  26. dclab/external/packaging/_structures.py +61 -0
  27. dclab/external/packaging/version.py +505 -0
  28. dclab/external/skimage/LICENSE +28 -0
  29. dclab/external/skimage/__init__.py +2 -0
  30. dclab/external/skimage/_find_contours.py +216 -0
  31. dclab/external/skimage/_find_contours_cy.cpython-314t-darwin.so +0 -0
  32. dclab/external/skimage/_find_contours_cy.pyx +188 -0
  33. dclab/external/skimage/_pnpoly.cpython-314t-darwin.so +0 -0
  34. dclab/external/skimage/_pnpoly.pyx +99 -0
  35. dclab/external/skimage/_shared/__init__.py +1 -0
  36. dclab/external/skimage/_shared/geometry.cpython-314t-darwin.so +0 -0
  37. dclab/external/skimage/_shared/geometry.pxd +6 -0
  38. dclab/external/skimage/_shared/geometry.pyx +55 -0
  39. dclab/external/skimage/measure.py +7 -0
  40. dclab/external/skimage/pnpoly.py +53 -0
  41. dclab/external/statsmodels/LICENSE +35 -0
  42. dclab/external/statsmodels/__init__.py +6 -0
  43. dclab/external/statsmodels/nonparametric/__init__.py +1 -0
  44. dclab/external/statsmodels/nonparametric/_kernel_base.py +203 -0
  45. dclab/external/statsmodels/nonparametric/kernel_density.py +165 -0
  46. dclab/external/statsmodels/nonparametric/kernels.py +36 -0
  47. dclab/features/__init__.py +9 -0
  48. dclab/features/bright.py +81 -0
  49. dclab/features/bright_bc.py +93 -0
  50. dclab/features/bright_perc.py +63 -0
  51. dclab/features/contour.py +161 -0
  52. dclab/features/emodulus/__init__.py +339 -0
  53. dclab/features/emodulus/load.py +252 -0
  54. dclab/features/emodulus/lut_HE-2D-FEM-22.txt +16432 -0
  55. dclab/features/emodulus/lut_HE-3D-FEM-22.txt +1276 -0
  56. dclab/features/emodulus/lut_LE-2D-FEM-19.txt +13082 -0
  57. dclab/features/emodulus/pxcorr.py +135 -0
  58. dclab/features/emodulus/scale_linear.py +247 -0
  59. dclab/features/emodulus/viscosity.py +260 -0
  60. dclab/features/fl_crosstalk.py +95 -0
  61. dclab/features/inert_ratio.py +377 -0
  62. dclab/features/volume.py +242 -0
  63. dclab/http_utils.py +322 -0
  64. dclab/isoelastics/__init__.py +468 -0
  65. dclab/isoelastics/iso_HE-2D-FEM-22-area_um-deform.txt +2440 -0
  66. dclab/isoelastics/iso_HE-2D-FEM-22-volume-deform.txt +2635 -0
  67. dclab/isoelastics/iso_HE-3D-FEM-22-area_um-deform.txt +1930 -0
  68. dclab/isoelastics/iso_HE-3D-FEM-22-volume-deform.txt +2221 -0
  69. dclab/isoelastics/iso_LE-2D-FEM-19-area_um-deform.txt +2151 -0
  70. dclab/isoelastics/iso_LE-2D-FEM-19-volume-deform.txt +2250 -0
  71. dclab/isoelastics/iso_LE-2D-ana-18-area_um-deform.txt +1266 -0
  72. dclab/kde/__init__.py +1 -0
  73. dclab/kde/base.py +459 -0
  74. dclab/kde/contours.py +222 -0
  75. dclab/kde/methods.py +313 -0
  76. dclab/kde_contours.py +10 -0
  77. dclab/kde_methods.py +11 -0
  78. dclab/lme4/__init__.py +5 -0
  79. dclab/lme4/lme4_template.R +94 -0
  80. dclab/lme4/rsetup.py +204 -0
  81. dclab/lme4/wrapr.py +386 -0
  82. dclab/polygon_filter.py +398 -0
  83. dclab/rtdc_dataset/__init__.py +15 -0
  84. dclab/rtdc_dataset/check.py +902 -0
  85. dclab/rtdc_dataset/config.py +533 -0
  86. dclab/rtdc_dataset/copier.py +353 -0
  87. dclab/rtdc_dataset/core.py +896 -0
  88. dclab/rtdc_dataset/export.py +867 -0
  89. dclab/rtdc_dataset/feat_anc_core/__init__.py +24 -0
  90. dclab/rtdc_dataset/feat_anc_core/af_basic.py +75 -0
  91. dclab/rtdc_dataset/feat_anc_core/af_emodulus.py +160 -0
  92. dclab/rtdc_dataset/feat_anc_core/af_fl_max_ctc.py +133 -0
  93. dclab/rtdc_dataset/feat_anc_core/af_image_contour.py +113 -0
  94. dclab/rtdc_dataset/feat_anc_core/af_ml_class.py +102 -0
  95. dclab/rtdc_dataset/feat_anc_core/ancillary_feature.py +320 -0
  96. dclab/rtdc_dataset/feat_anc_ml/__init__.py +32 -0
  97. dclab/rtdc_dataset/feat_anc_plugin/__init__.py +3 -0
  98. dclab/rtdc_dataset/feat_anc_plugin/plugin_feature.py +329 -0
  99. dclab/rtdc_dataset/feat_basin.py +762 -0
  100. dclab/rtdc_dataset/feat_temp.py +102 -0
  101. dclab/rtdc_dataset/filter.py +263 -0
  102. dclab/rtdc_dataset/fmt_dcor/__init__.py +7 -0
  103. dclab/rtdc_dataset/fmt_dcor/access_token.py +52 -0
  104. dclab/rtdc_dataset/fmt_dcor/api.py +173 -0
  105. dclab/rtdc_dataset/fmt_dcor/base.py +299 -0
  106. dclab/rtdc_dataset/fmt_dcor/basin.py +73 -0
  107. dclab/rtdc_dataset/fmt_dcor/logs.py +26 -0
  108. dclab/rtdc_dataset/fmt_dcor/tables.py +66 -0
  109. dclab/rtdc_dataset/fmt_dict.py +103 -0
  110. dclab/rtdc_dataset/fmt_hdf5/__init__.py +6 -0
  111. dclab/rtdc_dataset/fmt_hdf5/base.py +192 -0
  112. dclab/rtdc_dataset/fmt_hdf5/basin.py +30 -0
  113. dclab/rtdc_dataset/fmt_hdf5/events.py +276 -0
  114. dclab/rtdc_dataset/fmt_hdf5/feat_defect.py +164 -0
  115. dclab/rtdc_dataset/fmt_hdf5/logs.py +33 -0
  116. dclab/rtdc_dataset/fmt_hdf5/tables.py +60 -0
  117. dclab/rtdc_dataset/fmt_hierarchy/__init__.py +11 -0
  118. dclab/rtdc_dataset/fmt_hierarchy/base.py +278 -0
  119. dclab/rtdc_dataset/fmt_hierarchy/events.py +146 -0
  120. dclab/rtdc_dataset/fmt_hierarchy/hfilter.py +140 -0
  121. dclab/rtdc_dataset/fmt_hierarchy/mapper.py +134 -0
  122. dclab/rtdc_dataset/fmt_http.py +102 -0
  123. dclab/rtdc_dataset/fmt_s3.py +354 -0
  124. dclab/rtdc_dataset/fmt_tdms/__init__.py +476 -0
  125. dclab/rtdc_dataset/fmt_tdms/event_contour.py +264 -0
  126. dclab/rtdc_dataset/fmt_tdms/event_image.py +220 -0
  127. dclab/rtdc_dataset/fmt_tdms/event_mask.py +62 -0
  128. dclab/rtdc_dataset/fmt_tdms/event_trace.py +146 -0
  129. dclab/rtdc_dataset/fmt_tdms/exc.py +37 -0
  130. dclab/rtdc_dataset/fmt_tdms/naming.py +151 -0
  131. dclab/rtdc_dataset/load.py +77 -0
  132. dclab/rtdc_dataset/meta_table.py +25 -0
  133. dclab/rtdc_dataset/writer.py +1019 -0
  134. dclab/statistics.py +226 -0
  135. dclab/util.py +176 -0
  136. dclab/warn.py +15 -0
  137. dclab-0.67.0.dist-info/METADATA +153 -0
  138. dclab-0.67.0.dist-info/RECORD +142 -0
  139. dclab-0.67.0.dist-info/WHEEL +6 -0
  140. dclab-0.67.0.dist-info/entry_points.txt +8 -0
  141. dclab-0.67.0.dist-info/licenses/LICENSE +283 -0
  142. dclab-0.67.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,192 @@
1
+ """RT-DC hdf5 format"""
2
+ from __future__ import annotations
3
+
4
+ import io
5
+ import json
6
+ import pathlib
7
+ from typing import Any, BinaryIO, Dict
8
+ import warnings
9
+
10
+ import h5py
11
+
12
+ from ...external.packaging import parse as parse_version
13
+ from ...util import hashobj, hashfile
14
+
15
+ from ..config import Configuration
16
+ from ..core import RTDCBase
17
+
18
+ from . import events
19
+ from . import logs
20
+ from . import tables
21
+
22
+ #: rtdc files exported with dclab prior to this version are not supported
23
+ MIN_DCLAB_EXPORT_VERSION = "0.3.3.dev2"
24
+
25
+
26
+ class OldFormatNotSupportedError(BaseException):
27
+ pass
28
+
29
+
30
+ class UnknownKeyWarning(UserWarning):
31
+ pass
32
+
33
+
34
+ class RTDC_HDF5(RTDCBase):
35
+ def __init__(self,
36
+ h5path: str | pathlib.Path | BinaryIO | io.IOBase,
37
+ h5kwargs: Dict[str, Any] = None,
38
+ *args,
39
+ **kwargs):
40
+ """HDF5 file format for RT-DC measurements
41
+
42
+ Parameters
43
+ ----------
44
+ h5path: str or pathlib.Path or file-like object
45
+ Path to an '.rtdc' measurement file or a file-like object
46
+ h5kwargs: dict
47
+ Additional keyword arguments given to :class:`h5py.File`
48
+ *args:
49
+ Arguments for `RTDCBase`
50
+ **kwargs:
51
+ Keyword arguments for `RTDCBase`
52
+
53
+ Attributes
54
+ ----------
55
+ path: pathlib.Path
56
+ Path to the experimental HDF5 (.rtdc) file
57
+ """
58
+ super(RTDC_HDF5, self).__init__(*args, **kwargs)
59
+
60
+ # Any subclass from RTDC_HDF5 is probably a remote-type and should
61
+ # not be able to access local basins. If you do not agree, please
62
+ # enable this in the definition of the subclass.
63
+ self._local_basins_allowed = True if self.format == "hdf5" else False
64
+
65
+ if isinstance(h5path, (str, pathlib.Path)):
66
+ h5path = pathlib.Path(h5path)
67
+ else:
68
+ h5path = h5path
69
+
70
+ self._hash = None
71
+ self.path = h5path
72
+
73
+ # Increase the read cache (which defaults to 1MiB), since
74
+ # normally we have around 2.5MiB image chunks.
75
+ if h5kwargs is None:
76
+ h5kwargs = {}
77
+ h5kwargs.setdefault("rdcc_nbytes", 10 * 1024 ** 2)
78
+ h5kwargs.setdefault("rdcc_w0", 0)
79
+
80
+ self.h5kwargs = h5kwargs
81
+ self.h5file = h5py.File(h5path, **h5kwargs)
82
+
83
+ self._events = events.H5Events(self.h5file)
84
+
85
+ # Parse configuration
86
+ self.config = RTDC_HDF5.parse_config(self.h5file)
87
+
88
+ # Override logs property with HDF5 data
89
+ self.logs = logs.H5Logs(self.h5file)
90
+
91
+ # Override the tables property with HDF5 data
92
+ self.tables = tables.H5Tables(self.h5file)
93
+
94
+ # check version
95
+ rtdc_soft = self.config["setup"].get("software version", "unknown")
96
+ if rtdc_soft.startswith("dclab "):
97
+ rtdc_ver = parse_version(rtdc_soft.split(" ")[1])
98
+ if rtdc_ver < parse_version(MIN_DCLAB_EXPORT_VERSION):
99
+ msg = "The file {} was created ".format(self.path) \
100
+ + "with dclab {} which is ".format(rtdc_ver) \
101
+ + "not supported anymore! Please rerun " \
102
+ + "dclab-tdms2rtdc / export the data again."
103
+ raise OldFormatNotSupportedError(msg)
104
+
105
+ self.title = "{} - M{}".format(
106
+ self.config["experiment"].get("sample", "undefined sample"),
107
+ self.config["experiment"].get("run index", "0"))
108
+
109
+ def close(self):
110
+ """Close the underlying HDF5 file"""
111
+ super(RTDC_HDF5, self).close()
112
+ self.h5file.close()
113
+
114
+ @property
115
+ def _h5(self):
116
+ warnings.warn("Access to the underlying HDF5 file is now public. "
117
+ "Please use the `h5file` attribute instead of `_h5`!",
118
+ DeprecationWarning)
119
+ return self.h5file
120
+
121
+ @staticmethod
122
+ def can_open(h5path):
123
+ """Check whether a given file is in the .rtdc file format"""
124
+ h5path = pathlib.Path(h5path)
125
+ if h5path.suffix == ".rtdc":
126
+ return True
127
+ else:
128
+ # we don't know the extension; check for the "events" group
129
+ canopen = False
130
+ try:
131
+ # This is a workaround for Python2 where h5py cannot handle
132
+ # unicode file names.
133
+ with h5path.open("rb") as fd:
134
+ h5 = h5py.File(fd, "r")
135
+ if "events" in h5:
136
+ canopen = True
137
+ except IOError:
138
+ # not an HDF5 file
139
+ pass
140
+ return canopen
141
+
142
+ @staticmethod
143
+ def parse_config(h5path):
144
+ """Parse the RT-DC configuration of an HDF5 file
145
+
146
+ `h5path` may be a h5py.File object or an actual path
147
+ """
148
+ if not isinstance(h5path, h5py.File):
149
+ with h5py.File(h5path, mode="r") as fh5:
150
+ h5attrs = dict(fh5.attrs)
151
+ else:
152
+ h5attrs = dict(h5path.attrs)
153
+
154
+ # Convert byte strings to unicode strings
155
+ # https://github.com/h5py/h5py/issues/379
156
+ for key in h5attrs:
157
+ if isinstance(h5attrs[key], bytes):
158
+ h5attrs[key] = h5attrs[key].decode("utf-8")
159
+
160
+ config = Configuration()
161
+ for key in h5attrs:
162
+ section, pname = key.split(":")
163
+ config[section][pname] = h5attrs[key]
164
+ return config
165
+
166
+ @property
167
+ def hash(self):
168
+ """Hash value based on file name and content"""
169
+ if self._hash is None:
170
+ tohash = [self.path.name,
171
+ # Hash a maximum of ~1MB of the hdf5 file
172
+ hashfile(self.path, blocksize=65536, count=20)]
173
+ self._hash = hashobj(tohash)
174
+ return self._hash
175
+
176
+ def basins_get_dicts(self):
177
+ """Return list of dicts for all basins defined in `self.h5file`"""
178
+ return self.basin_get_dicts_from_h5file(self.h5file)
179
+
180
+ @staticmethod
181
+ def basin_get_dicts_from_h5file(h5file):
182
+ """Return list of dicts for all basins defined in `h5file`"""
183
+ basins = []
184
+ # Do not sort anything here, sorting is done in `RTDCBase`.
185
+ for bk in h5file.get("basins", []):
186
+ bdat = list(h5file["basins"][bk])
187
+ if isinstance(bdat[0], bytes):
188
+ bdat = [bi.decode("utf") for bi in bdat]
189
+ bdict = json.loads(" ".join(bdat))
190
+ bdict["key"] = bk
191
+ basins.append(bdict)
192
+ return basins
@@ -0,0 +1,30 @@
1
+ """RT-DC hdf5 format"""
2
+ from __future__ import annotations
3
+
4
+ import pathlib
5
+
6
+ from .. import feat_basin
7
+
8
+ from .base import RTDC_HDF5
9
+
10
+
11
+ class HDF5Basin(feat_basin.Basin):
12
+ basin_format = "hdf5"
13
+ basin_type = "file"
14
+
15
+ def __init__(self, *args, **kwargs):
16
+ self._available_verified = None
17
+ super(HDF5Basin, self).__init__(*args, **kwargs)
18
+
19
+ def _load_dataset(self, location, **kwargs):
20
+ return RTDC_HDF5(location, **kwargs)
21
+
22
+ def is_available(self):
23
+ if self._available_verified is None:
24
+ with self._av_check_lock:
25
+ try:
26
+ self._available_verified = \
27
+ pathlib.Path(self.location).exists()
28
+ except OSError:
29
+ pass
30
+ return self._available_verified
@@ -0,0 +1,276 @@
1
+ """RT-DC hdf5 format"""
2
+ from __future__ import annotations
3
+
4
+ import pathlib
5
+ import warnings
6
+
7
+ import numbers
8
+ import numpy as np
9
+
10
+ from ... import definitions as dfn
11
+ from ...util import copy_if_needed
12
+
13
+ from . import feat_defect
14
+
15
+
16
+ class H5ContourEvent:
17
+ def __init__(self, h5group, length=None):
18
+ self._length = length
19
+ self.h5group = h5group
20
+ # for hashing in util.obj2bytes
21
+ # path within the HDF5 file
22
+ o_name = h5group["0"].name,
23
+ # filename
24
+ o_filename = h5group.file.filename
25
+ _data = [o_name, o_filename]
26
+ if pathlib.Path(o_filename).exists():
27
+ # when the file was changed
28
+ _data.append(pathlib.Path(h5group.file.filename).stat().st_mtime)
29
+ # size of the file
30
+ _data.append(pathlib.Path(h5group.file.filename).stat().st_size)
31
+ self.identifier = _data
32
+
33
+ def __getitem__(self, key):
34
+ if not isinstance(key, numbers.Integral):
35
+ # slicing!
36
+ indices = np.arange(len(self))[key]
37
+ output = []
38
+ # populate the output list
39
+ for evid in indices:
40
+ output.append(self.h5group[str(evid)][:])
41
+ return output
42
+ elif key < 0:
43
+ return self.__getitem__(key + len(self))
44
+ else:
45
+ return self.h5group[str(key)][:]
46
+
47
+ def __iter__(self):
48
+ for idx in range(len(self)):
49
+ yield self[idx]
50
+
51
+ def __len__(self):
52
+ if self._length is None:
53
+ # computing the length of an H5Group is slow
54
+ self._length = len(self.h5group)
55
+ return self._length
56
+
57
+ @property
58
+ def dtype(self):
59
+ return self.h5group["0"].dtype
60
+
61
+ @property
62
+ def shape(self):
63
+ return len(self), np.nan, 2
64
+
65
+
66
+ class H5Events:
67
+ def __init__(self, h5):
68
+ self.h5file = h5
69
+ # According to https://github.com/h5py/h5py/issues/1960, we always
70
+ # have to keep a reference to the HDF5 dataset, otherwise it will
71
+ # be garbage-collected immediately. In addition to caching the HDF5
72
+ # datasets, we cache the wrapping classes in the `self._cached_events`
73
+ # dictionary.
74
+ self._cached_events = {}
75
+ self._defective_features = {}
76
+ self._features_list = None
77
+
78
+ @property
79
+ def _features(self):
80
+ if self._features_list is None:
81
+ self._features_list = sorted(self.h5file["events"].keys())
82
+ # make sure that "trace" is not empty
83
+ if ("trace" in self._features
84
+ and len(self.h5file["events"]["trace"]) == 0):
85
+ self._features_list.remove("trace")
86
+ return self._features_list
87
+
88
+ def __contains__(self, key):
89
+ return key in self.keys()
90
+
91
+ def __getitem__(self, key):
92
+ if key not in self._cached_events:
93
+ # user-level checking is done in core.py
94
+ assert dfn.feature_exists(key), f"Feature '{key}' does not exist!"
95
+ data = self.h5file["events"][key]
96
+ if key == "contour":
97
+ length = self.h5file.attrs.get("experiment:event count")
98
+ fdata = H5ContourEvent(data, length=length)
99
+ elif key == "mask":
100
+ fdata = H5MaskEvent(data)
101
+ elif key == "trace":
102
+ fdata = H5TraceEvent(data)
103
+ elif data.ndim == 1:
104
+ fdata = H5ScalarEvent(data)
105
+ else:
106
+ # for features like "image", "image_bg" and other non-scalar
107
+ # ancillary features
108
+ fdata = data
109
+ self._cached_events[key] = fdata
110
+ return self._cached_events[key]
111
+
112
+ def __iter__(self):
113
+ # dict-like behavior
114
+ for key in self.keys():
115
+ yield key
116
+
117
+ def _is_defective_feature(self, feat):
118
+ """Whether the stored feature is defective"""
119
+ if feat not in self._defective_features:
120
+ defective = False
121
+ if (feat in feat_defect.DEFECTIVE_FEATURES
122
+ and feat in self._features):
123
+ # feature exists in the HDF5 file
124
+ # workaround machinery for sorting out defective features
125
+ defective = feat_defect.DEFECTIVE_FEATURES[feat](self.h5file)
126
+ self._defective_features[feat] = defective
127
+ return self._defective_features[feat]
128
+
129
+ def keys(self):
130
+ """Returns list of valid features
131
+
132
+ Checks for
133
+ - defective features: whether the data in the HDF5 file is invalid
134
+ - existing feature names: dynamic, depending on e.g. plugin features
135
+ """
136
+ features = []
137
+ for key in self._features:
138
+ # check for defective features
139
+ if dfn.feature_exists(key) and not self._is_defective_feature(key):
140
+ features.append(key)
141
+ return features
142
+
143
+
144
+ class H5MaskEvent:
145
+ """Cast uint8 masks to boolean"""
146
+
147
+ def __init__(self, h5dataset):
148
+ self.h5dataset = h5dataset
149
+ # identifier required because "mask" is used for computation
150
+ # of ancillary feature "contour".
151
+ self.identifier = (self.h5dataset.file.filename, self.h5dataset.name)
152
+ self.dtype = np.dtype(bool)
153
+
154
+ def __array__(self, dtype=np.bool_, copy=copy_if_needed, *args, **kwargs):
155
+ if dtype is not np.uint8:
156
+ warnings.warn("Please avoid calling the `__array__` method of the "
157
+ "`H5MaskEvent`. It may consume a lot of memory.",
158
+ UserWarning)
159
+ # One of the reasons why we implement __array__ is such that
160
+ # the data exporter knows this object is sliceable
161
+ # (see yield_filtered_array_stacks).
162
+ return self.h5dataset.__array__(dtype=dtype, *args, **kwargs)
163
+
164
+ def __getitem__(self, idx):
165
+ return np.asarray(self.h5dataset[idx], dtype=bool)
166
+
167
+ def __iter__(self):
168
+ for idx in range(len(self)):
169
+ yield self[idx]
170
+
171
+ def __len__(self):
172
+ return len(self.h5dataset)
173
+
174
+ @property
175
+ def attrs(self):
176
+ return self.h5dataset.attrs
177
+
178
+ @property
179
+ def shape(self):
180
+ return self.h5dataset.shape
181
+
182
+ @property
183
+ def size(self):
184
+ return np.prod(self.shape)
185
+
186
+
187
+ class H5ScalarEvent(np.lib.mixins.NDArrayOperatorsMixin):
188
+ def __init__(self, h5ds):
189
+ """Lazy access to a scalar feature with cache"""
190
+ self.h5ds = h5ds
191
+ # for hashing in util.obj2bytes
192
+ self.identifier = (self.h5ds.file.filename, self.h5ds.name)
193
+ self._array = None
194
+ self.ndim = 1 # matplotlib might expect this from an array
195
+ # attrs
196
+ self._ufunc_attrs = dict(self.h5ds.attrs)
197
+
198
+ def __array__(self, dtype=None, copy=copy_if_needed, *args, **kwargs):
199
+ if self._array is None:
200
+ self._array = np.asarray(self.h5ds, *args, **kwargs)
201
+ return np.array(self._array, dtype=dtype, copy=copy)
202
+
203
+ def __getitem__(self, idx):
204
+ return self.__array__()[idx]
205
+
206
+ def __len__(self):
207
+ return len(self.h5ds)
208
+
209
+ def _fetch_ufunc_attr(self, uname, ufunc):
210
+ """A wrapper for calling functions on the scalar feature data
211
+
212
+ The ideas are:
213
+
214
+ 1. If there is a ufunc (max/mean/min) value stored in the dataset
215
+ attributes, then use this one.
216
+ 2. If the ufunc is computed, it is cached permanently in
217
+ self._ufunc_attrs
218
+ """
219
+ val = self._ufunc_attrs.get(uname, None)
220
+ if val is None:
221
+ val = ufunc(self.__array__())
222
+ self._ufunc_attrs[uname] = val
223
+ return val
224
+
225
+ def max(self, *args, **kwargs):
226
+ return self._fetch_ufunc_attr("max", np.nanmax)
227
+
228
+ def mean(self, *args, **kwargs):
229
+ return self._fetch_ufunc_attr("mean", np.nanmean)
230
+
231
+ def min(self, *args, **kwargs):
232
+ return self._fetch_ufunc_attr("min", np.nanmin)
233
+
234
+ @property
235
+ def dtype(self):
236
+ return self.h5ds.dtype
237
+
238
+ @property
239
+ def shape(self):
240
+ return self.h5ds.shape
241
+
242
+ @property
243
+ def size(self):
244
+ return len(self)
245
+
246
+
247
+ class H5TraceEvent:
248
+ def __init__(self, h5group):
249
+ self.h5group = h5group
250
+ self._num_traces = None
251
+ self._shape = None
252
+
253
+ def __getitem__(self, idx):
254
+ return self.h5group[idx]
255
+
256
+ def __contains__(self, item):
257
+ return item in self.h5group
258
+
259
+ def __len__(self):
260
+ if self._num_traces is None:
261
+ self._num_traces = len(self.h5group)
262
+ return self._num_traces
263
+
264
+ def __iter__(self):
265
+ for key in sorted(self.h5group.keys()):
266
+ yield key
267
+
268
+ def keys(self):
269
+ return self.h5group.keys()
270
+
271
+ @property
272
+ def shape(self):
273
+ if self._shape is None:
274
+ atrace = list(self.h5group.keys())[0]
275
+ self._shape = tuple([len(self)] + list(self.h5group[atrace].shape))
276
+ return self._shape
@@ -0,0 +1,164 @@
1
+ """RT-DC hdf5 format"""
2
+ from __future__ import annotations
3
+
4
+ from ...external.packaging import parse as parse_version
5
+
6
+
7
+ def get_software_version_from_h5(h5):
8
+ software_version = h5.attrs.get("setup:software version", "")
9
+ if isinstance(software_version, bytes):
10
+ software_version = software_version.decode("utf-8")
11
+ return software_version
12
+
13
+
14
+ def is_defective_feature_aspect(h5):
15
+ """In Shape-In 2.0.6, there was a wrong variable cast"""
16
+ software_version = get_software_version_from_h5(h5)
17
+ return software_version in ["ShapeIn 2.0.6", "ShapeIn 2.0.7"]
18
+
19
+
20
+ def is_defective_feature_time(h5):
21
+ """Shape-In stores the "time" feature as a low-precision float32
22
+
23
+ This makes time resolution for large measurements useless,
24
+ because times are only resolved with four digits after the
25
+ decimal point. Here, we first check whether the "frame" feature
26
+ and the [imaging]:"frame rate" configuration are set. If so,
27
+ then we can compute "time" as an ancillary feature which will
28
+ be more accurate than its float32 version.
29
+ """
30
+ # This is a necessary requirement. If we cannot compute the
31
+ # ancillary feature, then we cannot ignore (even inaccurate) information.
32
+ has_ancil = "frame" in h5["events"] and h5.attrs.get("imaging:frame rate",
33
+ 0) != 0
34
+ if not has_ancil:
35
+ return False
36
+
37
+ # If we have a 32 bit dataset, then things are pretty clear.
38
+ is_32float = h5["events/time"].dtype.char[-1] == "f"
39
+ if is_32float:
40
+ return True
41
+
42
+ # Consider the software
43
+ software_version = get_software_version_from_h5(h5)
44
+
45
+ # Only Shape-In stores false data, so we can ignore other recording
46
+ # software.
47
+ is_shapein = software_version.count("ShapeIn")
48
+ if not is_shapein:
49
+ return False
50
+
51
+ # The tricky part: dclab might have analyzed the dataset recorded by
52
+ # Shape-In, e.g. in a compression step. Since dclab appends its version
53
+ # string to the software_version, we just have to parse that and make
54
+ # sure that it is above 0.47.6.
55
+ last_version = software_version.split("|")[-1].strip()
56
+ if last_version.startswith("dclab"):
57
+ dclab_version = last_version.split()[1]
58
+ if parse_version(dclab_version) < parse_version("0.47.6"):
59
+ # written with an older version of dclab
60
+ return True
61
+
62
+ # We covered all cases:
63
+ # - ancillary information are available
64
+ # - it's not a float32 dataset
65
+ # - we excluded all non-Shape-In recording software
66
+ # - it was not written with an older version of dclab
67
+ return False
68
+
69
+
70
+ def is_defective_feature_volume(h5):
71
+ """dclab computed volume wrong up until version 0.36.1"""
72
+ # first check if the scripted fix was applied
73
+ if "dclab_issue_141" in list(h5.get("logs", {}).keys()):
74
+ return False
75
+ # if that does not apply, check the software version
76
+ software_version = get_software_version_from_h5(h5)
77
+ if software_version:
78
+ last_version = software_version.split("|")[-1].strip()
79
+ if last_version.startswith("dclab"):
80
+ dclab_version = last_version.split()[1]
81
+ if parse_version(dclab_version) < parse_version("0.37.0"):
82
+ return True
83
+ return False
84
+
85
+
86
+ def is_defective_feature_inert_ratio(h5):
87
+ """For long channels, there was an integer overflow until 0.48.1
88
+
89
+ The problem here is that not only the channel length, but also
90
+ the length of the contour play a role. All point coordinates of
91
+ the contour are summed up and multiplied with one another which
92
+ leads to integer overflows when computing mu20.
93
+
94
+ Thus, this test is only a best guess, but still quite fast.
95
+
96
+ See also https://github.com/DC-analysis/dclab/issues/212
97
+ """
98
+ # determine whether the image width is larger than 500px
99
+ # If this file was written with dclab, then we always have the ROI size,
100
+ # so we don't have to check the actual image.
101
+ width_large = h5.attrs.get("imaging:roi size x", 0) > 500
102
+
103
+ if width_large:
104
+ # determine whether the software version was outdated
105
+ software_version = get_software_version_from_h5(h5)
106
+ if software_version:
107
+ version_pipeline = [v.strip() for v in software_version.split("|")]
108
+ last_version = version_pipeline[-1]
109
+ if last_version.startswith("dclab"):
110
+ dclab_version = last_version.split()[1]
111
+ # The fix was implemented in 0.48.2, but this method here
112
+ # was only implemented in 0.48.3, so we might have leaked
113
+ # old data into new files.
114
+ if parse_version(dclab_version) < parse_version("0.48.3"):
115
+ return True
116
+ return False
117
+
118
+
119
+ def is_defective_feature_inert_ratio_raw_cvx(h5):
120
+ """Additional check for `inert_ratio_raw` and `inert_ratio_cvx`
121
+
122
+ These features were computed with Shape-In and were very likely
123
+ computed correctly.
124
+
125
+ See https://github.com/DC-analysis/dclab/issues/224
126
+ """
127
+ if is_defective_feature_inert_ratio(h5):
128
+ # Possibly affected. Only return False if Shape-In check is negative
129
+ software_version = get_software_version_from_h5(h5)
130
+ version_pipeline = [v.strip() for v in software_version.split("|")]
131
+ first_version = version_pipeline[0]
132
+ if first_version.startswith("ShapeIn"):
133
+ si_version = first_version.split()[1]
134
+ elif "shapein-acquisition" in h5.get("logs", []):
135
+ # Later versions of Shape-In do not anymore write "ShapeIn" in the
136
+ # version string.
137
+ si_version = first_version
138
+ else:
139
+ # Some other software was used to record the data and dclab
140
+ # very likely stored the wrong inertia ratio.
141
+ return True
142
+
143
+ # We trust Shape-In >= 2.0.5
144
+ if parse_version(si_version) >= parse_version("2.0.5"):
145
+ return False
146
+
147
+ return True
148
+
149
+ return False
150
+
151
+
152
+ #: dictionary of defective features, defined by HDF5 attributes;
153
+ #: if a value matches the given HDF5 attribute, the feature is
154
+ #: considered defective
155
+ DEFECTIVE_FEATURES = {
156
+ # feature: [HDF5_attribute, matching_value]
157
+ "aspect": is_defective_feature_aspect,
158
+ "inert_ratio_cvx": is_defective_feature_inert_ratio_raw_cvx,
159
+ "inert_ratio_prnc": is_defective_feature_inert_ratio,
160
+ "inert_ratio_raw": is_defective_feature_inert_ratio_raw_cvx,
161
+ "tilt": is_defective_feature_inert_ratio,
162
+ "time": is_defective_feature_time,
163
+ "volume": is_defective_feature_volume,
164
+ }
@@ -0,0 +1,33 @@
1
+ class H5Logs:
2
+ def __init__(self, h5):
3
+ self.h5file = h5
4
+ self._cache_keys = None
5
+
6
+ def __getitem__(self, key):
7
+ if key in self.keys():
8
+ log = list(self.h5file["logs"][key])
9
+ if isinstance(log[0], bytes):
10
+ log = [li.decode("utf") for li in log]
11
+ else:
12
+ raise KeyError(
13
+ f"File {self.h5file.file.filename} does not have the log "
14
+ f"'{key}'. Available logs are {self.keys()}.")
15
+ return log
16
+
17
+ def __iter__(self):
18
+ # dict-like behavior
19
+ for key in self.keys():
20
+ yield key
21
+
22
+ def __len__(self):
23
+ return len(self.keys())
24
+
25
+ def keys(self):
26
+ if self._cache_keys is None:
27
+ names = []
28
+ if "logs" in self.h5file:
29
+ for key in self.h5file["logs"]:
30
+ if self.h5file["logs"][key].size:
31
+ names.append(key)
32
+ self._cache_keys = names
33
+ return self._cache_keys