dclab 0.62.11__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dclab might be problematic. Click here for more details.
- dclab/__init__.py +23 -0
- dclab/_version.py +16 -0
- dclab/cached.py +97 -0
- dclab/cli/__init__.py +10 -0
- dclab/cli/common.py +237 -0
- dclab/cli/task_compress.py +126 -0
- dclab/cli/task_condense.py +223 -0
- dclab/cli/task_join.py +229 -0
- dclab/cli/task_repack.py +98 -0
- dclab/cli/task_split.py +154 -0
- dclab/cli/task_tdms2rtdc.py +186 -0
- dclab/cli/task_verify_dataset.py +75 -0
- dclab/definitions/__init__.py +79 -0
- dclab/definitions/feat_const.py +202 -0
- dclab/definitions/feat_logic.py +183 -0
- dclab/definitions/meta_const.py +252 -0
- dclab/definitions/meta_logic.py +111 -0
- dclab/definitions/meta_parse.py +94 -0
- dclab/downsampling.cp313-win_amd64.pyd +0 -0
- dclab/downsampling.pyx +230 -0
- dclab/external/__init__.py +4 -0
- dclab/external/packaging/LICENSE +3 -0
- dclab/external/packaging/LICENSE.APACHE +177 -0
- dclab/external/packaging/LICENSE.BSD +23 -0
- dclab/external/packaging/__init__.py +6 -0
- dclab/external/packaging/_structures.py +61 -0
- dclab/external/packaging/version.py +505 -0
- dclab/external/skimage/LICENSE +28 -0
- dclab/external/skimage/__init__.py +2 -0
- dclab/external/skimage/_find_contours.py +216 -0
- dclab/external/skimage/_find_contours_cy.cp313-win_amd64.pyd +0 -0
- dclab/external/skimage/_find_contours_cy.pyx +188 -0
- dclab/external/skimage/_pnpoly.cp313-win_amd64.pyd +0 -0
- dclab/external/skimage/_pnpoly.pyx +99 -0
- dclab/external/skimage/_shared/__init__.py +1 -0
- dclab/external/skimage/_shared/geometry.cp313-win_amd64.pyd +0 -0
- dclab/external/skimage/_shared/geometry.pxd +6 -0
- dclab/external/skimage/_shared/geometry.pyx +55 -0
- dclab/external/skimage/measure.py +7 -0
- dclab/external/skimage/pnpoly.py +53 -0
- dclab/external/statsmodels/LICENSE +35 -0
- dclab/external/statsmodels/__init__.py +6 -0
- dclab/external/statsmodels/nonparametric/__init__.py +1 -0
- dclab/external/statsmodels/nonparametric/_kernel_base.py +203 -0
- dclab/external/statsmodels/nonparametric/kernel_density.py +165 -0
- dclab/external/statsmodels/nonparametric/kernels.py +36 -0
- dclab/features/__init__.py +9 -0
- dclab/features/bright.py +81 -0
- dclab/features/bright_bc.py +93 -0
- dclab/features/bright_perc.py +63 -0
- dclab/features/contour.py +161 -0
- dclab/features/emodulus/__init__.py +339 -0
- dclab/features/emodulus/load.py +252 -0
- dclab/features/emodulus/lut_HE-2D-FEM-22.txt +16432 -0
- dclab/features/emodulus/lut_HE-3D-FEM-22.txt +1276 -0
- dclab/features/emodulus/lut_LE-2D-FEM-19.txt +13082 -0
- dclab/features/emodulus/pxcorr.py +135 -0
- dclab/features/emodulus/scale_linear.py +247 -0
- dclab/features/emodulus/viscosity.py +256 -0
- dclab/features/fl_crosstalk.py +95 -0
- dclab/features/inert_ratio.py +377 -0
- dclab/features/volume.py +242 -0
- dclab/http_utils.py +322 -0
- dclab/isoelastics/__init__.py +468 -0
- dclab/isoelastics/iso_HE-2D-FEM-22-area_um-deform.txt +2440 -0
- dclab/isoelastics/iso_HE-2D-FEM-22-volume-deform.txt +2635 -0
- dclab/isoelastics/iso_HE-3D-FEM-22-area_um-deform.txt +1930 -0
- dclab/isoelastics/iso_HE-3D-FEM-22-volume-deform.txt +2221 -0
- dclab/isoelastics/iso_LE-2D-FEM-19-area_um-deform.txt +2151 -0
- dclab/isoelastics/iso_LE-2D-FEM-19-volume-deform.txt +2250 -0
- dclab/isoelastics/iso_LE-2D-ana-18-area_um-deform.txt +1266 -0
- dclab/kde_contours.py +222 -0
- dclab/kde_methods.py +303 -0
- dclab/lme4/__init__.py +5 -0
- dclab/lme4/lme4_template.R +94 -0
- dclab/lme4/rsetup.py +204 -0
- dclab/lme4/wrapr.py +386 -0
- dclab/polygon_filter.py +398 -0
- dclab/rtdc_dataset/__init__.py +15 -0
- dclab/rtdc_dataset/check.py +902 -0
- dclab/rtdc_dataset/config.py +533 -0
- dclab/rtdc_dataset/copier.py +353 -0
- dclab/rtdc_dataset/core.py +1001 -0
- dclab/rtdc_dataset/export.py +737 -0
- dclab/rtdc_dataset/feat_anc_core/__init__.py +24 -0
- dclab/rtdc_dataset/feat_anc_core/af_basic.py +75 -0
- dclab/rtdc_dataset/feat_anc_core/af_emodulus.py +160 -0
- dclab/rtdc_dataset/feat_anc_core/af_fl_max_ctc.py +133 -0
- dclab/rtdc_dataset/feat_anc_core/af_image_contour.py +113 -0
- dclab/rtdc_dataset/feat_anc_core/af_ml_class.py +102 -0
- dclab/rtdc_dataset/feat_anc_core/ancillary_feature.py +320 -0
- dclab/rtdc_dataset/feat_anc_ml/__init__.py +32 -0
- dclab/rtdc_dataset/feat_anc_plugin/__init__.py +3 -0
- dclab/rtdc_dataset/feat_anc_plugin/plugin_feature.py +329 -0
- dclab/rtdc_dataset/feat_basin.py +550 -0
- dclab/rtdc_dataset/feat_temp.py +102 -0
- dclab/rtdc_dataset/filter.py +263 -0
- dclab/rtdc_dataset/fmt_dcor/__init__.py +7 -0
- dclab/rtdc_dataset/fmt_dcor/access_token.py +52 -0
- dclab/rtdc_dataset/fmt_dcor/api.py +111 -0
- dclab/rtdc_dataset/fmt_dcor/base.py +200 -0
- dclab/rtdc_dataset/fmt_dcor/basin.py +73 -0
- dclab/rtdc_dataset/fmt_dcor/logs.py +26 -0
- dclab/rtdc_dataset/fmt_dcor/tables.py +42 -0
- dclab/rtdc_dataset/fmt_dict.py +103 -0
- dclab/rtdc_dataset/fmt_hdf5/__init__.py +6 -0
- dclab/rtdc_dataset/fmt_hdf5/base.py +192 -0
- dclab/rtdc_dataset/fmt_hdf5/basin.py +30 -0
- dclab/rtdc_dataset/fmt_hdf5/events.py +257 -0
- dclab/rtdc_dataset/fmt_hdf5/feat_defect.py +164 -0
- dclab/rtdc_dataset/fmt_hdf5/logs.py +33 -0
- dclab/rtdc_dataset/fmt_hdf5/tables.py +30 -0
- dclab/rtdc_dataset/fmt_hierarchy/__init__.py +11 -0
- dclab/rtdc_dataset/fmt_hierarchy/base.py +278 -0
- dclab/rtdc_dataset/fmt_hierarchy/events.py +146 -0
- dclab/rtdc_dataset/fmt_hierarchy/hfilter.py +140 -0
- dclab/rtdc_dataset/fmt_hierarchy/mapper.py +134 -0
- dclab/rtdc_dataset/fmt_http.py +102 -0
- dclab/rtdc_dataset/fmt_s3.py +320 -0
- dclab/rtdc_dataset/fmt_tdms/__init__.py +476 -0
- dclab/rtdc_dataset/fmt_tdms/event_contour.py +264 -0
- dclab/rtdc_dataset/fmt_tdms/event_image.py +220 -0
- dclab/rtdc_dataset/fmt_tdms/event_mask.py +62 -0
- dclab/rtdc_dataset/fmt_tdms/event_trace.py +146 -0
- dclab/rtdc_dataset/fmt_tdms/exc.py +37 -0
- dclab/rtdc_dataset/fmt_tdms/naming.py +151 -0
- dclab/rtdc_dataset/load.py +72 -0
- dclab/rtdc_dataset/writer.py +985 -0
- dclab/statistics.py +203 -0
- dclab/util.py +156 -0
- dclab/warn.py +15 -0
- dclab-0.62.11.dist-info/LICENSE +343 -0
- dclab-0.62.11.dist-info/METADATA +146 -0
- dclab-0.62.11.dist-info/RECORD +137 -0
- dclab-0.62.11.dist-info/WHEEL +5 -0
- dclab-0.62.11.dist-info/entry_points.txt +8 -0
- dclab-0.62.11.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,550 @@
|
|
|
1
|
+
"""
|
|
2
|
+
With basins, you can create analysis pipelines that result in output files
|
|
3
|
+
which, when opened in dclab, can access features stored in the input file
|
|
4
|
+
(without having to write those features to the output file).
|
|
5
|
+
"""
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import abc
|
|
9
|
+
import numbers
|
|
10
|
+
import threading
|
|
11
|
+
from typing import Dict, List, Literal
|
|
12
|
+
import uuid
|
|
13
|
+
import warnings
|
|
14
|
+
import weakref
|
|
15
|
+
|
|
16
|
+
import numpy as np
|
|
17
|
+
|
|
18
|
+
from ..util import copy_if_needed
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class BasinFeatureMissingWarning(UserWarning):
|
|
22
|
+
"""Used when a badin feature is defined but not stored"""
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class CyclicBasinDependencyFoundWarning(UserWarning):
|
|
26
|
+
"""Used when a basin is defined in one of its sub-basins"""
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class BasinmapFeatureMissingError(KeyError):
|
|
30
|
+
"""Used when one of the `basinmap` features is not defined"""
|
|
31
|
+
pass
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class BasinNotAvailableError(BaseException):
|
|
35
|
+
"""Used to identify situations where the basin data is not available"""
|
|
36
|
+
pass
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class BasinAvailabilityChecker(threading.Thread):
|
|
40
|
+
"""Helper thread for checking basin availability in the background"""
|
|
41
|
+
def __init__(self, basin, *args, **kwargs):
|
|
42
|
+
super(BasinAvailabilityChecker, self).__init__(*args, daemon=True,
|
|
43
|
+
**kwargs)
|
|
44
|
+
self.basin = basin
|
|
45
|
+
|
|
46
|
+
def run(self):
|
|
47
|
+
self.basin.is_available()
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class Basin(abc.ABC):
|
|
51
|
+
"""A basin represents data from an external source
|
|
52
|
+
|
|
53
|
+
The external data must be a valid RT-DC dataset, subclasses
|
|
54
|
+
should ensure that the corresponding API is available.
|
|
55
|
+
"""
|
|
56
|
+
id_getters = {}
|
|
57
|
+
|
|
58
|
+
def __init__(self,
|
|
59
|
+
location: str,
|
|
60
|
+
name: str = None,
|
|
61
|
+
description: str = None,
|
|
62
|
+
features: List[str] = None,
|
|
63
|
+
measurement_identifier: str = None,
|
|
64
|
+
mapping: Literal["same",
|
|
65
|
+
"basinmap0",
|
|
66
|
+
"basinmap1",
|
|
67
|
+
"basinmap2",
|
|
68
|
+
"basinmap3",
|
|
69
|
+
"basinmap4",
|
|
70
|
+
"basinmap5",
|
|
71
|
+
"basinmap6",
|
|
72
|
+
"basinmap7",
|
|
73
|
+
"basinmap8",
|
|
74
|
+
"basinmap9",
|
|
75
|
+
] = "same",
|
|
76
|
+
mapping_referrer: Dict = None,
|
|
77
|
+
ignored_basins: List[str] = None,
|
|
78
|
+
key: str = None,
|
|
79
|
+
**kwargs):
|
|
80
|
+
"""
|
|
81
|
+
|
|
82
|
+
Parameters
|
|
83
|
+
----------
|
|
84
|
+
location: str
|
|
85
|
+
Location of the basin, this can be a path or a URL, depending
|
|
86
|
+
on the implementation of the subclass
|
|
87
|
+
name: str
|
|
88
|
+
Human-readable name of the basin
|
|
89
|
+
description: str
|
|
90
|
+
Lengthy description of the basin
|
|
91
|
+
features: list of str
|
|
92
|
+
List of features this basin provides; This list is enforced,
|
|
93
|
+
even if the basin actually contains more features.
|
|
94
|
+
measurement_identifier: str
|
|
95
|
+
A measurement identifier against which to check the basin.
|
|
96
|
+
If this is set to None (default), there is no certainty
|
|
97
|
+
that the downstream dataset is from the same measurement.
|
|
98
|
+
mapping: str
|
|
99
|
+
Which type of mapping to use. This can be either "same"
|
|
100
|
+
when the event list of the basin is identical to that
|
|
101
|
+
of the dataset defining the basin, or one of the "basinmap"
|
|
102
|
+
features (e.g. "basinmap1") in cases where the dataset consists
|
|
103
|
+
of a subset of the events of the basin dataset. In the latter
|
|
104
|
+
case, the feature defined by `mapping` must be present in the
|
|
105
|
+
dataset and consist of integer-valued indices (starting at 0)
|
|
106
|
+
for the basin dataset.
|
|
107
|
+
mapping_referrer: dict-like
|
|
108
|
+
Dict-like object from which "basinmap" features can be obtained
|
|
109
|
+
in situations where `mapping != "same"`. This can be a simple
|
|
110
|
+
dictionary of numpy arrays or e.g. an instance of
|
|
111
|
+
:class:`.RTDCBase`.
|
|
112
|
+
ignored_basins: list of str
|
|
113
|
+
List of basins to ignore in subsequent basin instantiations
|
|
114
|
+
key: str
|
|
115
|
+
Unique key to identify this basin; normally computed from
|
|
116
|
+
a JSON dump of the basin definition. A random string is used
|
|
117
|
+
if None is specified.
|
|
118
|
+
kwargs:
|
|
119
|
+
Additional keyword arguments passed to the `load_dataset`
|
|
120
|
+
method of the `Basin` subclass.
|
|
121
|
+
|
|
122
|
+
.. versionchanged: 0.58.0
|
|
123
|
+
|
|
124
|
+
Added the `mapping` keyword argument to support basins
|
|
125
|
+
with a superset of events.
|
|
126
|
+
"""
|
|
127
|
+
#: location of the basin (e.g. path or URL)
|
|
128
|
+
self.location = location
|
|
129
|
+
#: user-defined name of the basin
|
|
130
|
+
self.name = name
|
|
131
|
+
#: lengthy description of the basin
|
|
132
|
+
self.description = description
|
|
133
|
+
# defining key of the basin
|
|
134
|
+
self.key = key or str(uuid.uuid4())
|
|
135
|
+
# features this basin provides
|
|
136
|
+
self._features = features
|
|
137
|
+
#: measurement identifier of the referencing dataset
|
|
138
|
+
self.measurement_identifier = measurement_identifier
|
|
139
|
+
self._measurement_identifier_verified = False
|
|
140
|
+
#: ignored basins
|
|
141
|
+
self.ignored_basins = ignored_basins or []
|
|
142
|
+
#: additional keyword arguments passed to the basin
|
|
143
|
+
self.kwargs = kwargs
|
|
144
|
+
#: Event mapping strategy. If this is "same", it means that the
|
|
145
|
+
#: referring dataset and the basin dataset have identical event
|
|
146
|
+
#: indices. If `mapping` is e.g. `basinmap1` then the mapping of the
|
|
147
|
+
#: indices from the basin to the referring dataset is defined in
|
|
148
|
+
#: `self.basinmap` (copied during initialization of this class from
|
|
149
|
+
#: the array in the key `basinmap1` from the dict-like object
|
|
150
|
+
#: `mapping_referrer`).
|
|
151
|
+
self.mapping = mapping or "same"
|
|
152
|
+
self._basinmap = None # see `basinmap` property
|
|
153
|
+
# Create a weakref to the original referrer: If it is an instance
|
|
154
|
+
# of RTDCBase, then garbage collection can clean up properly and
|
|
155
|
+
# the basin instance has no reason to exist without the referrer.
|
|
156
|
+
if self.mapping != "same":
|
|
157
|
+
self._basinmap_referrer = weakref.ref(mapping_referrer)
|
|
158
|
+
else:
|
|
159
|
+
self._basinmap_referrer = None
|
|
160
|
+
self._ds = None
|
|
161
|
+
# perform availability check in separate thread
|
|
162
|
+
self._av_check_lock = threading.Lock()
|
|
163
|
+
self._av_check = BasinAvailabilityChecker(self)
|
|
164
|
+
self._av_check.start()
|
|
165
|
+
|
|
166
|
+
def __repr__(self):
|
|
167
|
+
options = [
|
|
168
|
+
self.name,
|
|
169
|
+
f"mapped {self.mapping}" if self.mapping != "same" else "",
|
|
170
|
+
f"features {self._features}" if self.features else "full-featured",
|
|
171
|
+
f"location {self.location}",
|
|
172
|
+
]
|
|
173
|
+
opt_str = ", ".join([o for o in options if o])
|
|
174
|
+
|
|
175
|
+
return f"<{self.__class__.__name__} ({opt_str}) at {hex(id(self))}>"
|
|
176
|
+
|
|
177
|
+
def _assert_measurement_identifier(self):
|
|
178
|
+
"""Make sure the basin matches the measurement identifier
|
|
179
|
+
"""
|
|
180
|
+
if not self.verify_basin(run_identifier=True):
|
|
181
|
+
raise KeyError(f"Measurement identifier of basin {self.ds} "
|
|
182
|
+
f"({self.get_measurement_identifier()}) does "
|
|
183
|
+
f"not match {self.measurement_identifier}!")
|
|
184
|
+
|
|
185
|
+
@property
|
|
186
|
+
def basinmap(self):
|
|
187
|
+
"""Contains the indexing array in case of a mapped basin"""
|
|
188
|
+
if self._basinmap is None:
|
|
189
|
+
if self.mapping != "same":
|
|
190
|
+
try:
|
|
191
|
+
basinmap = self._basinmap_referrer()[self.mapping]
|
|
192
|
+
except (KeyError, RecursionError):
|
|
193
|
+
raise BasinmapFeatureMissingError(
|
|
194
|
+
f"Could not find the feature '{self.mapping}' in the "
|
|
195
|
+
f"dataset or any of its basins. This suggests that "
|
|
196
|
+
f"this feature was never saved anywhere. Please check "
|
|
197
|
+
f"the input files.")
|
|
198
|
+
#: `basinmap` is an integer array that maps the events from the
|
|
199
|
+
#: basin to the events of the referring dataset.
|
|
200
|
+
self._basinmap = np.array(basinmap,
|
|
201
|
+
dtype=np.uint64,
|
|
202
|
+
copy=True)
|
|
203
|
+
else:
|
|
204
|
+
self._basinmap = None
|
|
205
|
+
return self._basinmap
|
|
206
|
+
|
|
207
|
+
@property
|
|
208
|
+
@abc.abstractmethod
|
|
209
|
+
def basin_format(self):
|
|
210
|
+
"""Basin format (:class:`.RTDCBase` subclass), e.g. "hdf5" or "s3"
|
|
211
|
+
"""
|
|
212
|
+
# to be implemented in subclasses
|
|
213
|
+
|
|
214
|
+
@property
|
|
215
|
+
@abc.abstractmethod
|
|
216
|
+
def basin_type(self):
|
|
217
|
+
"""Storage type to use (e.g. "file" or "remote")"""
|
|
218
|
+
# to be implemented in subclasses
|
|
219
|
+
|
|
220
|
+
@property
|
|
221
|
+
def ds(self):
|
|
222
|
+
"""The :class:`.RTDCBase` instance represented by the basin"""
|
|
223
|
+
if self._ds is None:
|
|
224
|
+
if not self.is_available():
|
|
225
|
+
raise BasinNotAvailableError(f"Basin {self} is not available!")
|
|
226
|
+
self._ds = self.load_dataset(self.location, **self.kwargs)
|
|
227
|
+
self._ds.ignore_basins(self.ignored_basins)
|
|
228
|
+
return self._ds
|
|
229
|
+
|
|
230
|
+
@property
|
|
231
|
+
def features(self):
|
|
232
|
+
"""Features made available by the basin
|
|
233
|
+
|
|
234
|
+
.. versionchanged: 0.56.0
|
|
235
|
+
|
|
236
|
+
Return nested basin features
|
|
237
|
+
"""
|
|
238
|
+
if self._features is None:
|
|
239
|
+
if self.is_available():
|
|
240
|
+
# If features are not specified already, either by previous
|
|
241
|
+
# call to this method or during initialization from basin
|
|
242
|
+
# definition, then make the innate and *all* the basin
|
|
243
|
+
# features available.
|
|
244
|
+
self._features = sorted(set(self.ds.features_innate
|
|
245
|
+
+ self.ds.features_basin))
|
|
246
|
+
else:
|
|
247
|
+
self._features = []
|
|
248
|
+
return self._features
|
|
249
|
+
|
|
250
|
+
def as_dict(self):
|
|
251
|
+
"""Return basin kwargs for :func:`RTDCWriter.store_basin`
|
|
252
|
+
|
|
253
|
+
Note that each subclass of :class:`.RTDCBase` has its own
|
|
254
|
+
implementation of :func:`.RTDCBase.basins_get_dicts` which
|
|
255
|
+
returns a list of basin dictionaries that are used to
|
|
256
|
+
instantiate the basins in :func:`RTDCBase.basins_enable`.
|
|
257
|
+
This method here is only intended for usage with
|
|
258
|
+
:func:`RTDCWriter.store_basin`.
|
|
259
|
+
"""
|
|
260
|
+
return {
|
|
261
|
+
"basin_name": self.name,
|
|
262
|
+
"basin_type": self.basin_type,
|
|
263
|
+
"basin_format": self.basin_format,
|
|
264
|
+
"basin_locs": [self.location],
|
|
265
|
+
"basin_descr": self.description,
|
|
266
|
+
"basin_feats": self.features,
|
|
267
|
+
"basin_map": self.basinmap,
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
def close(self):
|
|
271
|
+
"""Close any open file handles or connections"""
|
|
272
|
+
if self._ds is not None:
|
|
273
|
+
self._ds.close()
|
|
274
|
+
self._av_check.join(0.5)
|
|
275
|
+
|
|
276
|
+
def get_feature_data(self, feat):
|
|
277
|
+
"""Return an object representing feature data of the basin"""
|
|
278
|
+
self._assert_measurement_identifier()
|
|
279
|
+
return self.ds[feat]
|
|
280
|
+
|
|
281
|
+
def get_measurement_identifier(self):
|
|
282
|
+
"""Return the identifier of the basin dataset"""
|
|
283
|
+
return self.ds.get_measurement_identifier()
|
|
284
|
+
|
|
285
|
+
@abc.abstractmethod
|
|
286
|
+
def is_available(self):
|
|
287
|
+
"""Return True if the basin is available"""
|
|
288
|
+
|
|
289
|
+
@abc.abstractmethod
|
|
290
|
+
def _load_dataset(self, location, **kwargs):
|
|
291
|
+
"""Subclasses should return an instance of :class:`.RTDCBase`"""
|
|
292
|
+
|
|
293
|
+
def load_dataset(self, location, **kwargs):
|
|
294
|
+
"""Return an instance of :class:`.RTDCBase` for this basin
|
|
295
|
+
|
|
296
|
+
If the basin mapping (`self.mapping`) is not the same as
|
|
297
|
+
the referencing dataset
|
|
298
|
+
"""
|
|
299
|
+
ds = self._load_dataset(location, **kwargs)
|
|
300
|
+
if self.mapping != "same":
|
|
301
|
+
# The array `self.basinmap` may contain duplicate elements,
|
|
302
|
+
# which is why we cannot use hierarchy children to access the
|
|
303
|
+
# data (sometimes the data must be blown-up rather than gated).
|
|
304
|
+
ds_bn = BasinProxy(ds=ds, basinmap=self.basinmap)
|
|
305
|
+
else:
|
|
306
|
+
ds_bn = ds
|
|
307
|
+
return ds_bn
|
|
308
|
+
|
|
309
|
+
def verify_basin(self, run_identifier=True, availability=True):
|
|
310
|
+
if not availability:
|
|
311
|
+
warnings.warn("The keyword argument 'availability' is "
|
|
312
|
+
"deprecated, because it can lead to long waiting "
|
|
313
|
+
"times with many unavailable basins.",
|
|
314
|
+
DeprecationWarning)
|
|
315
|
+
if availability:
|
|
316
|
+
check_avail = self.is_available()
|
|
317
|
+
else:
|
|
318
|
+
check_avail = True
|
|
319
|
+
|
|
320
|
+
# Only check for run identifier if requested and if the availability
|
|
321
|
+
# check did not fail.
|
|
322
|
+
if run_identifier and check_avail:
|
|
323
|
+
if not self._measurement_identifier_verified:
|
|
324
|
+
if self.measurement_identifier is None:
|
|
325
|
+
# No measurement identifier was presented by the
|
|
326
|
+
# referencing dataset. Don't perform any checks.
|
|
327
|
+
self._measurement_identifier_verified = True
|
|
328
|
+
else:
|
|
329
|
+
if self.mapping == "same":
|
|
330
|
+
# When we have identical mapping, then the measurement
|
|
331
|
+
# identifier has to match exactly.
|
|
332
|
+
verifier = str.__eq__
|
|
333
|
+
else:
|
|
334
|
+
# When we have non-identical mapping (e.g. exported
|
|
335
|
+
# data), then the measurement identifier has to
|
|
336
|
+
# partially match.
|
|
337
|
+
verifier = str.startswith
|
|
338
|
+
self._measurement_identifier_verified = verifier(
|
|
339
|
+
self.measurement_identifier,
|
|
340
|
+
self.get_measurement_identifier()
|
|
341
|
+
)
|
|
342
|
+
check_rid = self._measurement_identifier_verified
|
|
343
|
+
else:
|
|
344
|
+
check_rid = True
|
|
345
|
+
|
|
346
|
+
return check_rid and check_avail
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
class BasinProxy:
|
|
350
|
+
def __init__(self, ds, basinmap):
|
|
351
|
+
"""Proxy for accessing data in basin datasets
|
|
352
|
+
|
|
353
|
+
The idea of a basin proxy is to give access to the data of an
|
|
354
|
+
:class:`.RTDCBase` that is mapped, i.e. the indices defined for
|
|
355
|
+
the basin do not coincide with the indices in the downstream
|
|
356
|
+
dataset.
|
|
357
|
+
|
|
358
|
+
This class achieves two things:
|
|
359
|
+
1. Subset indexing: For every event in the downstream dataset, there
|
|
360
|
+
is *only* one corresponding event in the basin dataset. This
|
|
361
|
+
could also be achieved via hierarchy children
|
|
362
|
+
(:class:`RTDCHierarchy`).
|
|
363
|
+
2. Blown indexing: Two different events in the downstream dataset
|
|
364
|
+
can refer to one event in the basin dataset. I.e. the basin
|
|
365
|
+
dataset contains fewer events than the downstream dataset,
|
|
366
|
+
because e.g. it is a raw image recording series that has been
|
|
367
|
+
processed and multiple events were found in one frame.
|
|
368
|
+
|
|
369
|
+
Parameters
|
|
370
|
+
----------
|
|
371
|
+
ds: RTDCBase
|
|
372
|
+
the basin dataset
|
|
373
|
+
basinmap: np.ndarray
|
|
374
|
+
1D integer indexing array that maps the events of the basin
|
|
375
|
+
dataset to the downstream dataset
|
|
376
|
+
"""
|
|
377
|
+
self.ds = ds
|
|
378
|
+
self.basins_get_dicts = ds.basins_get_dicts
|
|
379
|
+
self.basinmap = basinmap
|
|
380
|
+
self._features = {}
|
|
381
|
+
|
|
382
|
+
def __contains__(self, item):
|
|
383
|
+
return item in self.ds
|
|
384
|
+
|
|
385
|
+
def __getattr__(self, item):
|
|
386
|
+
if item in [
|
|
387
|
+
"basins",
|
|
388
|
+
"close",
|
|
389
|
+
"features",
|
|
390
|
+
"features_ancillary",
|
|
391
|
+
"features_basin",
|
|
392
|
+
"features_innate",
|
|
393
|
+
"features_loaded",
|
|
394
|
+
"features_local",
|
|
395
|
+
"features_scalar",
|
|
396
|
+
"get_measurement_identifier",
|
|
397
|
+
"ignore_basins",
|
|
398
|
+
]:
|
|
399
|
+
return getattr(self.ds, item)
|
|
400
|
+
else:
|
|
401
|
+
raise AttributeError(
|
|
402
|
+
f"BasinProxy does not implement {item}")
|
|
403
|
+
|
|
404
|
+
def __getitem__(self, feat):
|
|
405
|
+
if feat not in self._features:
|
|
406
|
+
feat_obj = BasinProxyFeature(feat_obj=self.ds[feat],
|
|
407
|
+
basinmap=self.basinmap)
|
|
408
|
+
self._features[feat] = feat_obj
|
|
409
|
+
return self._features[feat]
|
|
410
|
+
|
|
411
|
+
def __len__(self):
|
|
412
|
+
return len(self.basinmap)
|
|
413
|
+
|
|
414
|
+
|
|
415
|
+
class BasinProxyFeature(np.lib.mixins.NDArrayOperatorsMixin):
|
|
416
|
+
def __init__(self, feat_obj, basinmap):
|
|
417
|
+
"""Wrap around a feature object, mapping it upon data access"""
|
|
418
|
+
self.feat_obj = feat_obj
|
|
419
|
+
self.basinmap = basinmap
|
|
420
|
+
self._cache = None
|
|
421
|
+
self.is_scalar = bool(len(self.feat_obj.shape) == 1)
|
|
422
|
+
|
|
423
|
+
def __array__(self, dtype=None, copy=copy_if_needed, *args, **kwargs):
|
|
424
|
+
if self._cache is None and self.is_scalar:
|
|
425
|
+
self._cache = self.feat_obj[:][self.basinmap]
|
|
426
|
+
else:
|
|
427
|
+
# This is dangerous territory in terms of memory usage
|
|
428
|
+
out_arr = np.empty((len(self.basinmap),) + self.feat_obj.shape[1:],
|
|
429
|
+
dtype=dtype or self.feat_obj.dtype,
|
|
430
|
+
*args, **kwargs)
|
|
431
|
+
for ii, idx in enumerate(self.basinmap):
|
|
432
|
+
out_arr[ii] = self.feat_obj[idx]
|
|
433
|
+
return out_arr
|
|
434
|
+
return np.array(self._cache, copy=copy)
|
|
435
|
+
|
|
436
|
+
def __getattr__(self, item):
|
|
437
|
+
if item in [
|
|
438
|
+
"dtype",
|
|
439
|
+
"shape",
|
|
440
|
+
"size",
|
|
441
|
+
]:
|
|
442
|
+
return getattr(self.feat_obj, item)
|
|
443
|
+
else:
|
|
444
|
+
raise AttributeError(
|
|
445
|
+
f"BasinProxyFeature does not implement {item}")
|
|
446
|
+
|
|
447
|
+
def __getitem__(self, index):
|
|
448
|
+
if self._cache is None and isinstance(index, numbers.Integral):
|
|
449
|
+
# single index, cheap operation
|
|
450
|
+
return self.feat_obj[self.basinmap[index]]
|
|
451
|
+
elif not self.is_scalar:
|
|
452
|
+
# image, mask, etc
|
|
453
|
+
if isinstance(index, slice) and index == slice(None):
|
|
454
|
+
indices = self.basinmap
|
|
455
|
+
else:
|
|
456
|
+
indices = self.basinmap[index]
|
|
457
|
+
out_arr = np.empty((len(indices),) + self.feat_obj.shape[1:],
|
|
458
|
+
dtype=self.feat_obj.dtype)
|
|
459
|
+
for ii, idx in enumerate(indices):
|
|
460
|
+
out_arr[ii] = self.feat_obj[idx]
|
|
461
|
+
return out_arr
|
|
462
|
+
else:
|
|
463
|
+
# sets the cache if not already set
|
|
464
|
+
return self.__array__()[index]
|
|
465
|
+
|
|
466
|
+
def __len__(self):
|
|
467
|
+
return len(self.basinmap)
|
|
468
|
+
|
|
469
|
+
|
|
470
|
+
def basin_priority_sorted_key(bdict: Dict):
|
|
471
|
+
"""Yield a sorting value for a given basin that can be used with `sorted`
|
|
472
|
+
|
|
473
|
+
Basins are normally stored in random order in a dataset. This method
|
|
474
|
+
brings them into correct order, prioritizing:
|
|
475
|
+
|
|
476
|
+
- type: "file" over "remote"
|
|
477
|
+
- format: "HTTP" over "S3" over "dcor"
|
|
478
|
+
- mapping: "same" over anything else
|
|
479
|
+
"""
|
|
480
|
+
srt_type = {
|
|
481
|
+
"internal": "a",
|
|
482
|
+
"file": "b",
|
|
483
|
+
"remote": "c",
|
|
484
|
+
}.get(bdict.get("type"), "z")
|
|
485
|
+
|
|
486
|
+
srt_format = {
|
|
487
|
+
"h5dataset": "a",
|
|
488
|
+
"hdf5": "b",
|
|
489
|
+
"http": "c",
|
|
490
|
+
"s3": "d",
|
|
491
|
+
"dcor": "e",
|
|
492
|
+
}.get(bdict.get("format"), "z")
|
|
493
|
+
|
|
494
|
+
mapping = bdict.get("mapping", "same") # old dicts don't have "mapping"
|
|
495
|
+
srt_map = "a" if mapping == "same" else mapping
|
|
496
|
+
|
|
497
|
+
return srt_type + srt_format + srt_map
|
|
498
|
+
|
|
499
|
+
|
|
500
|
+
class InternalH5DatasetBasin(Basin):
|
|
501
|
+
basin_format = "h5dataset"
|
|
502
|
+
basin_type = "internal"
|
|
503
|
+
|
|
504
|
+
def __init__(self, *args, **kwargs):
|
|
505
|
+
super(InternalH5DatasetBasin, self).__init__(*args, **kwargs)
|
|
506
|
+
if self.mapping == "same":
|
|
507
|
+
raise ValueError(
|
|
508
|
+
"'internal' basins must be instantiated with `mapping`. "
|
|
509
|
+
"If you are not doing that, then you probably don't need "
|
|
510
|
+
"them.")
|
|
511
|
+
if self._features is None:
|
|
512
|
+
raise ValueError("You must specify features when defining "
|
|
513
|
+
"internal basins.")
|
|
514
|
+
# Redefine the features if necessary
|
|
515
|
+
h5root = self._basinmap_referrer().h5file
|
|
516
|
+
available_features = []
|
|
517
|
+
for feat in self._features:
|
|
518
|
+
if self.location in h5root and feat in h5root[self.location]:
|
|
519
|
+
available_features.append(feat)
|
|
520
|
+
else:
|
|
521
|
+
warnings.warn(
|
|
522
|
+
f"Feature '{feat}' is defined as an internal basin, "
|
|
523
|
+
f"but it cannot be found in '{self.location}'.",
|
|
524
|
+
BasinFeatureMissingWarning)
|
|
525
|
+
self._features.clear()
|
|
526
|
+
self._features += available_features
|
|
527
|
+
|
|
528
|
+
def _load_dataset(self, location, **kwargs):
|
|
529
|
+
from .fmt_dict import RTDC_Dict
|
|
530
|
+
# get the h5file object
|
|
531
|
+
h5root = self._basinmap_referrer().h5file
|
|
532
|
+
ds_dict = {}
|
|
533
|
+
for feat in self.features:
|
|
534
|
+
ds_dict[feat] = h5root[self.location][feat]
|
|
535
|
+
return RTDC_Dict(ds_dict)
|
|
536
|
+
|
|
537
|
+
def is_available(self):
|
|
538
|
+
return bool(self._features)
|
|
539
|
+
|
|
540
|
+
def verify_basin(self, *args, **kwargs):
|
|
541
|
+
"""It's not necessary to verify internal basins"""
|
|
542
|
+
return True
|
|
543
|
+
|
|
544
|
+
|
|
545
|
+
def get_basin_classes():
|
|
546
|
+
bc = {}
|
|
547
|
+
for b_cls in Basin.__subclasses__():
|
|
548
|
+
if hasattr(b_cls, "basin_format"):
|
|
549
|
+
bc[b_cls.basin_format] = b_cls
|
|
550
|
+
return bc
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
"""
|
|
2
|
+
.. versionadded:: 0.33.0
|
|
3
|
+
"""
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
from typing import Optional
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
|
|
10
|
+
from ..definitions import feat_logic
|
|
11
|
+
|
|
12
|
+
from .core import RTDCBase
|
|
13
|
+
from .fmt_hierarchy import RTDC_Hierarchy, map_indices_child2root
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
_registered_temporary_features = []
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def deregister_all():
|
|
20
|
+
"""Deregisters all temporary features"""
|
|
21
|
+
for feat in list(_registered_temporary_features):
|
|
22
|
+
deregister_temporary_feature(feat)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def deregister_temporary_feature(feature: str):
|
|
26
|
+
"""Convenience function for deregistering a temporary feature
|
|
27
|
+
|
|
28
|
+
This method is mostly used during testing. It does not
|
|
29
|
+
remove the actual feature data from any dataset; the data
|
|
30
|
+
will stay in memory but is not accessible anymore through
|
|
31
|
+
the public methods of the :class:`RTDCBase` user interface.
|
|
32
|
+
"""
|
|
33
|
+
if feature in _registered_temporary_features:
|
|
34
|
+
_registered_temporary_features.remove(feature)
|
|
35
|
+
feat_logic.feature_deregister(feature)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def register_temporary_feature(feature: str,
|
|
39
|
+
label: Optional[str] = None,
|
|
40
|
+
is_scalar: bool = True):
|
|
41
|
+
"""Register a new temporary feature
|
|
42
|
+
|
|
43
|
+
Temporary features are custom features that can be defined ad hoc
|
|
44
|
+
by the user. Temporary features are helpful when the integral
|
|
45
|
+
features are not enough, e.g. for prototyping, testing, or
|
|
46
|
+
collating with other data. Temporary features allow you to
|
|
47
|
+
leverage the full functionality of :class:`RTDCBase` with
|
|
48
|
+
your custom features (no need to go for a custom `pandas.Dataframe`).
|
|
49
|
+
|
|
50
|
+
Parameters
|
|
51
|
+
----------
|
|
52
|
+
feature: str
|
|
53
|
+
Feature name; allowed characters are lower-case letters,
|
|
54
|
+
digits, and underscores
|
|
55
|
+
label: str
|
|
56
|
+
Feature label used e.g. for plotting
|
|
57
|
+
is_scalar: bool
|
|
58
|
+
Whether or not the feature is a scalar feature
|
|
59
|
+
"""
|
|
60
|
+
feat_logic.feature_register(feature, label, is_scalar)
|
|
61
|
+
_registered_temporary_features.append(feature)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def set_temporary_feature(rtdc_ds: RTDCBase,
|
|
65
|
+
feature: str,
|
|
66
|
+
data: np.ndarray):
|
|
67
|
+
"""Set temporary feature data for a dataset
|
|
68
|
+
|
|
69
|
+
Parameters
|
|
70
|
+
----------
|
|
71
|
+
rtdc_ds: dclab.RTDCBase
|
|
72
|
+
Dataset for which to set the feature. Note that the
|
|
73
|
+
length of the feature `data` must match the number of events
|
|
74
|
+
in `rtdc_ds`. If the dataset is a hierarchy child, the data will also
|
|
75
|
+
be set in the parent dataset, but only for those events that are part
|
|
76
|
+
of the child. For all events in the parent dataset that are not part
|
|
77
|
+
of the child dataset, the temporary feature is set to np.nan.
|
|
78
|
+
feature: str
|
|
79
|
+
Feature name
|
|
80
|
+
data: np.ndarray
|
|
81
|
+
The data
|
|
82
|
+
"""
|
|
83
|
+
if not feat_logic.feature_exists(feature):
|
|
84
|
+
raise ValueError(
|
|
85
|
+
f"Temporary feature '{feature}' has not been registered!")
|
|
86
|
+
if len(data) != len(rtdc_ds):
|
|
87
|
+
raise ValueError(f"The temporary feature {feature} must have same "
|
|
88
|
+
f"length as the dataset. Expected length "
|
|
89
|
+
f"{len(rtdc_ds)}, got length {len(data)}!")
|
|
90
|
+
if isinstance(rtdc_ds, RTDC_Hierarchy):
|
|
91
|
+
root_ids = map_indices_child2root(rtdc_ds, np.arange(len(rtdc_ds)))
|
|
92
|
+
root_parent = rtdc_ds.get_root_parent()
|
|
93
|
+
root_feat_data = np.empty((len(root_parent)))
|
|
94
|
+
root_feat_data[:] = np.nan
|
|
95
|
+
root_feat_data[root_ids] = data
|
|
96
|
+
set_temporary_feature(root_parent, feature, root_feat_data)
|
|
97
|
+
rtdc_ds.rejuvenate()
|
|
98
|
+
else:
|
|
99
|
+
feat_logic.check_feature_shape(feature, data)
|
|
100
|
+
data_ro = data.view()
|
|
101
|
+
data_ro.setflags(write=False)
|
|
102
|
+
rtdc_ds._usertemp[feature] = data_ro
|