dclab 0.67.0__cp314-cp314-macosx_10_13_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dclab might be problematic. Click here for more details.
- dclab/__init__.py +41 -0
- dclab/_version.py +34 -0
- dclab/cached.py +97 -0
- dclab/cli/__init__.py +10 -0
- dclab/cli/common.py +237 -0
- dclab/cli/task_compress.py +126 -0
- dclab/cli/task_condense.py +223 -0
- dclab/cli/task_join.py +229 -0
- dclab/cli/task_repack.py +98 -0
- dclab/cli/task_split.py +154 -0
- dclab/cli/task_tdms2rtdc.py +186 -0
- dclab/cli/task_verify_dataset.py +75 -0
- dclab/definitions/__init__.py +79 -0
- dclab/definitions/feat_const.py +202 -0
- dclab/definitions/feat_logic.py +182 -0
- dclab/definitions/meta_const.py +252 -0
- dclab/definitions/meta_logic.py +111 -0
- dclab/definitions/meta_parse.py +94 -0
- dclab/downsampling.cpython-314-darwin.so +0 -0
- dclab/downsampling.pyx +230 -0
- dclab/external/__init__.py +4 -0
- dclab/external/packaging/LICENSE +3 -0
- dclab/external/packaging/LICENSE.APACHE +177 -0
- dclab/external/packaging/LICENSE.BSD +23 -0
- dclab/external/packaging/__init__.py +6 -0
- dclab/external/packaging/_structures.py +61 -0
- dclab/external/packaging/version.py +505 -0
- dclab/external/skimage/LICENSE +28 -0
- dclab/external/skimage/__init__.py +2 -0
- dclab/external/skimage/_find_contours.py +216 -0
- dclab/external/skimage/_find_contours_cy.cpython-314-darwin.so +0 -0
- dclab/external/skimage/_find_contours_cy.pyx +188 -0
- dclab/external/skimage/_pnpoly.cpython-314-darwin.so +0 -0
- dclab/external/skimage/_pnpoly.pyx +99 -0
- dclab/external/skimage/_shared/__init__.py +1 -0
- dclab/external/skimage/_shared/geometry.cpython-314-darwin.so +0 -0
- dclab/external/skimage/_shared/geometry.pxd +6 -0
- dclab/external/skimage/_shared/geometry.pyx +55 -0
- dclab/external/skimage/measure.py +7 -0
- dclab/external/skimage/pnpoly.py +53 -0
- dclab/external/statsmodels/LICENSE +35 -0
- dclab/external/statsmodels/__init__.py +6 -0
- dclab/external/statsmodels/nonparametric/__init__.py +1 -0
- dclab/external/statsmodels/nonparametric/_kernel_base.py +203 -0
- dclab/external/statsmodels/nonparametric/kernel_density.py +165 -0
- dclab/external/statsmodels/nonparametric/kernels.py +36 -0
- dclab/features/__init__.py +9 -0
- dclab/features/bright.py +81 -0
- dclab/features/bright_bc.py +93 -0
- dclab/features/bright_perc.py +63 -0
- dclab/features/contour.py +161 -0
- dclab/features/emodulus/__init__.py +339 -0
- dclab/features/emodulus/load.py +252 -0
- dclab/features/emodulus/lut_HE-2D-FEM-22.txt +16432 -0
- dclab/features/emodulus/lut_HE-3D-FEM-22.txt +1276 -0
- dclab/features/emodulus/lut_LE-2D-FEM-19.txt +13082 -0
- dclab/features/emodulus/pxcorr.py +135 -0
- dclab/features/emodulus/scale_linear.py +247 -0
- dclab/features/emodulus/viscosity.py +260 -0
- dclab/features/fl_crosstalk.py +95 -0
- dclab/features/inert_ratio.py +377 -0
- dclab/features/volume.py +242 -0
- dclab/http_utils.py +322 -0
- dclab/isoelastics/__init__.py +468 -0
- dclab/isoelastics/iso_HE-2D-FEM-22-area_um-deform.txt +2440 -0
- dclab/isoelastics/iso_HE-2D-FEM-22-volume-deform.txt +2635 -0
- dclab/isoelastics/iso_HE-3D-FEM-22-area_um-deform.txt +1930 -0
- dclab/isoelastics/iso_HE-3D-FEM-22-volume-deform.txt +2221 -0
- dclab/isoelastics/iso_LE-2D-FEM-19-area_um-deform.txt +2151 -0
- dclab/isoelastics/iso_LE-2D-FEM-19-volume-deform.txt +2250 -0
- dclab/isoelastics/iso_LE-2D-ana-18-area_um-deform.txt +1266 -0
- dclab/kde/__init__.py +1 -0
- dclab/kde/base.py +459 -0
- dclab/kde/contours.py +222 -0
- dclab/kde/methods.py +313 -0
- dclab/kde_contours.py +10 -0
- dclab/kde_methods.py +11 -0
- dclab/lme4/__init__.py +5 -0
- dclab/lme4/lme4_template.R +94 -0
- dclab/lme4/rsetup.py +204 -0
- dclab/lme4/wrapr.py +386 -0
- dclab/polygon_filter.py +398 -0
- dclab/rtdc_dataset/__init__.py +15 -0
- dclab/rtdc_dataset/check.py +902 -0
- dclab/rtdc_dataset/config.py +533 -0
- dclab/rtdc_dataset/copier.py +353 -0
- dclab/rtdc_dataset/core.py +896 -0
- dclab/rtdc_dataset/export.py +867 -0
- dclab/rtdc_dataset/feat_anc_core/__init__.py +24 -0
- dclab/rtdc_dataset/feat_anc_core/af_basic.py +75 -0
- dclab/rtdc_dataset/feat_anc_core/af_emodulus.py +160 -0
- dclab/rtdc_dataset/feat_anc_core/af_fl_max_ctc.py +133 -0
- dclab/rtdc_dataset/feat_anc_core/af_image_contour.py +113 -0
- dclab/rtdc_dataset/feat_anc_core/af_ml_class.py +102 -0
- dclab/rtdc_dataset/feat_anc_core/ancillary_feature.py +320 -0
- dclab/rtdc_dataset/feat_anc_ml/__init__.py +32 -0
- dclab/rtdc_dataset/feat_anc_plugin/__init__.py +3 -0
- dclab/rtdc_dataset/feat_anc_plugin/plugin_feature.py +329 -0
- dclab/rtdc_dataset/feat_basin.py +762 -0
- dclab/rtdc_dataset/feat_temp.py +102 -0
- dclab/rtdc_dataset/filter.py +263 -0
- dclab/rtdc_dataset/fmt_dcor/__init__.py +7 -0
- dclab/rtdc_dataset/fmt_dcor/access_token.py +52 -0
- dclab/rtdc_dataset/fmt_dcor/api.py +173 -0
- dclab/rtdc_dataset/fmt_dcor/base.py +299 -0
- dclab/rtdc_dataset/fmt_dcor/basin.py +73 -0
- dclab/rtdc_dataset/fmt_dcor/logs.py +26 -0
- dclab/rtdc_dataset/fmt_dcor/tables.py +66 -0
- dclab/rtdc_dataset/fmt_dict.py +103 -0
- dclab/rtdc_dataset/fmt_hdf5/__init__.py +6 -0
- dclab/rtdc_dataset/fmt_hdf5/base.py +192 -0
- dclab/rtdc_dataset/fmt_hdf5/basin.py +30 -0
- dclab/rtdc_dataset/fmt_hdf5/events.py +276 -0
- dclab/rtdc_dataset/fmt_hdf5/feat_defect.py +164 -0
- dclab/rtdc_dataset/fmt_hdf5/logs.py +33 -0
- dclab/rtdc_dataset/fmt_hdf5/tables.py +60 -0
- dclab/rtdc_dataset/fmt_hierarchy/__init__.py +11 -0
- dclab/rtdc_dataset/fmt_hierarchy/base.py +278 -0
- dclab/rtdc_dataset/fmt_hierarchy/events.py +146 -0
- dclab/rtdc_dataset/fmt_hierarchy/hfilter.py +140 -0
- dclab/rtdc_dataset/fmt_hierarchy/mapper.py +134 -0
- dclab/rtdc_dataset/fmt_http.py +102 -0
- dclab/rtdc_dataset/fmt_s3.py +354 -0
- dclab/rtdc_dataset/fmt_tdms/__init__.py +476 -0
- dclab/rtdc_dataset/fmt_tdms/event_contour.py +264 -0
- dclab/rtdc_dataset/fmt_tdms/event_image.py +220 -0
- dclab/rtdc_dataset/fmt_tdms/event_mask.py +62 -0
- dclab/rtdc_dataset/fmt_tdms/event_trace.py +146 -0
- dclab/rtdc_dataset/fmt_tdms/exc.py +37 -0
- dclab/rtdc_dataset/fmt_tdms/naming.py +151 -0
- dclab/rtdc_dataset/load.py +77 -0
- dclab/rtdc_dataset/meta_table.py +25 -0
- dclab/rtdc_dataset/writer.py +1019 -0
- dclab/statistics.py +226 -0
- dclab/util.py +176 -0
- dclab/warn.py +15 -0
- dclab-0.67.0.dist-info/METADATA +153 -0
- dclab-0.67.0.dist-info/RECORD +142 -0
- dclab-0.67.0.dist-info/WHEEL +6 -0
- dclab-0.67.0.dist-info/entry_points.txt +8 -0
- dclab-0.67.0.dist-info/licenses/LICENSE +283 -0
- dclab-0.67.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,762 @@
|
|
|
1
|
+
"""
|
|
2
|
+
With basins, you can create analysis pipelines that result in output files
|
|
3
|
+
which, when opened in dclab, can access features stored in the input file
|
|
4
|
+
(without having to write those features to the output file).
|
|
5
|
+
"""
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import abc
|
|
9
|
+
import logging
|
|
10
|
+
import numbers
|
|
11
|
+
import threading
|
|
12
|
+
from typing import Callable, Dict, List, Literal, Union
|
|
13
|
+
import uuid
|
|
14
|
+
import warnings
|
|
15
|
+
import weakref
|
|
16
|
+
|
|
17
|
+
import numpy as np
|
|
18
|
+
|
|
19
|
+
from ..util import copy_if_needed
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger(__name__)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class BasinFeatureMissingWarning(UserWarning):
|
|
26
|
+
"""Used when a badin feature is defined but not stored"""
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class CyclicBasinDependencyFoundWarning(UserWarning):
|
|
30
|
+
"""Used when a basin is defined in one of its sub-basins"""
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class IgnoringPerishableBasinTTL(UserWarning):
|
|
34
|
+
"""Used when refreshing a basin does not support TTL"""
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class BasinmapFeatureMissingError(KeyError):
|
|
38
|
+
"""Used when one of the `basinmap` features is not defined"""
|
|
39
|
+
pass
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class BasinNotAvailableError(BaseException):
|
|
43
|
+
"""Used to identify situations where the basin data is not available"""
|
|
44
|
+
pass
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class BasinAvailabilityChecker(threading.Thread):
|
|
48
|
+
"""Helper thread for checking basin availability in the background"""
|
|
49
|
+
def __init__(self, basin, *args, **kwargs):
|
|
50
|
+
super(BasinAvailabilityChecker, self).__init__(*args, daemon=True,
|
|
51
|
+
**kwargs)
|
|
52
|
+
self.basin = basin
|
|
53
|
+
|
|
54
|
+
def run(self):
|
|
55
|
+
self.basin.is_available()
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class PerishableRecord:
|
|
59
|
+
"""A class containing information about perishable basins
|
|
60
|
+
|
|
61
|
+
Perishable basins are basins than may discontinue to work after
|
|
62
|
+
e.g. a specific amount of time (e.g. presigned S3 URLs). With the
|
|
63
|
+
`PerishableRecord`, these basins may be "refreshed" (made
|
|
64
|
+
available again).
|
|
65
|
+
"""
|
|
66
|
+
def __init__(self,
|
|
67
|
+
basin,
|
|
68
|
+
expiration_func: Callable = None,
|
|
69
|
+
expiration_kwargs: Dict = None,
|
|
70
|
+
refresh_func: Callable = None,
|
|
71
|
+
refresh_kwargs: Dict = None,
|
|
72
|
+
):
|
|
73
|
+
"""
|
|
74
|
+
Parameters
|
|
75
|
+
----------
|
|
76
|
+
basin: Basin
|
|
77
|
+
Instance of the perishable basin
|
|
78
|
+
expiration_func: callable
|
|
79
|
+
A function that determines whether the basin has perished.
|
|
80
|
+
It must accept `basin` as the first argument. Calling this
|
|
81
|
+
function should be fast, as it is called every time a feature
|
|
82
|
+
is accessed.
|
|
83
|
+
Note that if you are implementing this in the time domain, then
|
|
84
|
+
you should use `time.time()` (TSE), because you need an absolute
|
|
85
|
+
time measure. `time.monotonic()` for instance does not count up
|
|
86
|
+
when the system goes to sleep. However, keep in mind that if
|
|
87
|
+
a remote machine dictates the expiration time, then that
|
|
88
|
+
remote machine should also transmit the creation time (in case
|
|
89
|
+
there are time offsets).
|
|
90
|
+
expiration_kwargs: dict
|
|
91
|
+
Additional kwargs for `expiration_func`.
|
|
92
|
+
refresh_func: callable
|
|
93
|
+
The function used to refresh the `basin`. It must accept
|
|
94
|
+
`basin` as the first argument.
|
|
95
|
+
refresh_kwargs: dict
|
|
96
|
+
Additional kwargs for `refresh_func`
|
|
97
|
+
"""
|
|
98
|
+
if not isinstance(basin, weakref.ProxyType):
|
|
99
|
+
basin = weakref.proxy(basin)
|
|
100
|
+
self.basin = basin
|
|
101
|
+
self.expiration_func = expiration_func
|
|
102
|
+
self.expiration_kwargs = expiration_kwargs or {}
|
|
103
|
+
self.refresh_func = refresh_func
|
|
104
|
+
self.refresh_kwargs = refresh_kwargs or {}
|
|
105
|
+
|
|
106
|
+
def __repr__(self):
|
|
107
|
+
state = "perished" if self.perished() else "valid"
|
|
108
|
+
return f"<PerishableRecord ({state}) at {hex(id(self))}>"
|
|
109
|
+
|
|
110
|
+
def perished(self) -> Union[bool, None]:
|
|
111
|
+
"""Determine whether the basin has perished
|
|
112
|
+
|
|
113
|
+
Returns
|
|
114
|
+
-------
|
|
115
|
+
state: bool or None
|
|
116
|
+
True means the basin has perished, False means the basin
|
|
117
|
+
has not perished, and `None` means we don't know
|
|
118
|
+
"""
|
|
119
|
+
if self.expiration_func is None:
|
|
120
|
+
return None
|
|
121
|
+
else:
|
|
122
|
+
return self.expiration_func(self.basin, **self.expiration_kwargs)
|
|
123
|
+
|
|
124
|
+
def refresh(self, extend_by: float = None) -> None:
|
|
125
|
+
"""Extend the lifetime of the associated perishable basin
|
|
126
|
+
|
|
127
|
+
Parameters
|
|
128
|
+
----------
|
|
129
|
+
extend_by: float
|
|
130
|
+
Custom argument for extending the life of the basin.
|
|
131
|
+
Normally, this would be a lifetime.
|
|
132
|
+
|
|
133
|
+
Returns
|
|
134
|
+
-------
|
|
135
|
+
basin: dict | None
|
|
136
|
+
Dictionary for instantiating a new basin
|
|
137
|
+
"""
|
|
138
|
+
if self.refresh_func is None:
|
|
139
|
+
# The basin is a perishable basin, but we have no way of
|
|
140
|
+
# refreshing it.
|
|
141
|
+
logger.error(f"Cannot refresh basin '{self.basin}'")
|
|
142
|
+
return
|
|
143
|
+
|
|
144
|
+
if extend_by and "extend_by" not in self.refresh_kwargs:
|
|
145
|
+
warnings.warn(
|
|
146
|
+
"Parameter 'extend_by' ignored, because the basin "
|
|
147
|
+
"source does not support it",
|
|
148
|
+
IgnoringPerishableBasinTTL)
|
|
149
|
+
extend_by = None
|
|
150
|
+
|
|
151
|
+
rkw = {}
|
|
152
|
+
rkw.update(self.refresh_kwargs)
|
|
153
|
+
|
|
154
|
+
if extend_by is not None:
|
|
155
|
+
rkw["extend_by"] = extend_by
|
|
156
|
+
|
|
157
|
+
self.refresh_func(self.basin, **rkw)
|
|
158
|
+
logger.info(f"Refreshed basin '{self.basin}'")
|
|
159
|
+
|
|
160
|
+
# If everything went well, reset the current dataset of the basin
|
|
161
|
+
if self.basin._ds is not None:
|
|
162
|
+
self.basin._ds.close()
|
|
163
|
+
self.basin._ds = None
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
class Basin(abc.ABC):
|
|
167
|
+
"""A basin represents data from an external source
|
|
168
|
+
|
|
169
|
+
The external data must be a valid RT-DC dataset, subclasses
|
|
170
|
+
should ensure that the corresponding API is available.
|
|
171
|
+
"""
|
|
172
|
+
id_getters = {}
|
|
173
|
+
|
|
174
|
+
def __init__(self,
|
|
175
|
+
location: str,
|
|
176
|
+
name: str = None,
|
|
177
|
+
description: str = None,
|
|
178
|
+
features: List[str] = None,
|
|
179
|
+
referrer_identifier: str = None,
|
|
180
|
+
basin_identifier: str = None,
|
|
181
|
+
mapping: Literal["same",
|
|
182
|
+
"basinmap0",
|
|
183
|
+
"basinmap1",
|
|
184
|
+
"basinmap2",
|
|
185
|
+
"basinmap3",
|
|
186
|
+
"basinmap4",
|
|
187
|
+
"basinmap5",
|
|
188
|
+
"basinmap6",
|
|
189
|
+
"basinmap7",
|
|
190
|
+
"basinmap8",
|
|
191
|
+
"basinmap9",
|
|
192
|
+
] = "same",
|
|
193
|
+
mapping_referrer: Dict = None,
|
|
194
|
+
ignored_basins: List[str] = None,
|
|
195
|
+
key: str = None,
|
|
196
|
+
perishable=False,
|
|
197
|
+
**kwargs):
|
|
198
|
+
"""
|
|
199
|
+
|
|
200
|
+
Parameters
|
|
201
|
+
----------
|
|
202
|
+
location: str
|
|
203
|
+
Location of the basin, this can be a path or a URL, depending
|
|
204
|
+
on the implementation of the subclass
|
|
205
|
+
name: str
|
|
206
|
+
Human-readable name of the basin
|
|
207
|
+
description: str
|
|
208
|
+
Lengthy description of the basin
|
|
209
|
+
features: list of str
|
|
210
|
+
List of features this basin provides; This list is enforced,
|
|
211
|
+
even if the basin actually contains more features.
|
|
212
|
+
referrer_identifier: str
|
|
213
|
+
A measurement identifier against which to check the basin.
|
|
214
|
+
If the basin mapping is "same", then this must match the
|
|
215
|
+
identifier of the basin exactly, otherwise it must start
|
|
216
|
+
with the basin identifier (e.g. "basin-id_referrer-sub-id").
|
|
217
|
+
If this is set to None (default), there is no certainty
|
|
218
|
+
that the downstream dataset is from the same measurement.
|
|
219
|
+
basin_identifier: str
|
|
220
|
+
A measurement identifier that must match the basin exactly.
|
|
221
|
+
In contrast to `referrer_identifier`, the basin identifier is
|
|
222
|
+
the identifier of the basin file. If `basin_identifier` is
|
|
223
|
+
specified, the identifier of the basin must be identical to it.
|
|
224
|
+
mapping: str
|
|
225
|
+
Which type of mapping to use. This can be either "same"
|
|
226
|
+
when the event list of the basin is identical to that
|
|
227
|
+
of the dataset defining the basin, or one of the "basinmap"
|
|
228
|
+
features (e.g. "basinmap1") in cases where the dataset consists
|
|
229
|
+
of a subset of the events of the basin dataset. In the latter
|
|
230
|
+
case, the feature defined by `mapping` must be present in the
|
|
231
|
+
dataset and consist of integer-valued indices (starting at 0)
|
|
232
|
+
for the basin dataset.
|
|
233
|
+
mapping_referrer: dict-like
|
|
234
|
+
Dict-like object from which "basinmap" features can be obtained
|
|
235
|
+
in situations where `mapping != "same"`. This can be a simple
|
|
236
|
+
dictionary of numpy arrays or e.g. an instance of
|
|
237
|
+
:class:`.RTDCBase`.
|
|
238
|
+
ignored_basins: list of str
|
|
239
|
+
List of basins to ignore in subsequent basin instantiations
|
|
240
|
+
key: str
|
|
241
|
+
Unique key to identify this basin; normally computed from
|
|
242
|
+
a JSON dump of the basin definition. A random string is used
|
|
243
|
+
if None is specified.
|
|
244
|
+
perishable: bool or PerishableRecord
|
|
245
|
+
If this is not False, then it must be a :class:`.PerishableRecord`
|
|
246
|
+
that holds the information about the expiration time, and that
|
|
247
|
+
comes with a method `refresh` to extend the lifetime of the basin.
|
|
248
|
+
kwargs:
|
|
249
|
+
Additional keyword arguments passed to the `load_dataset`
|
|
250
|
+
method of the `Basin` subclass.
|
|
251
|
+
|
|
252
|
+
.. versionchanged: 0.58.0
|
|
253
|
+
|
|
254
|
+
Added the `mapping` keyword argument to support basins
|
|
255
|
+
with a superset of events.
|
|
256
|
+
"""
|
|
257
|
+
#: location of the basin (e.g. path or URL)
|
|
258
|
+
self.location = location
|
|
259
|
+
#: user-defined name of the basin
|
|
260
|
+
self.name = name
|
|
261
|
+
#: lengthy description of the basin
|
|
262
|
+
self.description = description
|
|
263
|
+
# perishable record
|
|
264
|
+
if isinstance(perishable, bool) and perishable:
|
|
265
|
+
# Create an empty perishable record
|
|
266
|
+
perishable = PerishableRecord(self)
|
|
267
|
+
self.perishable = perishable
|
|
268
|
+
# define key of the basin
|
|
269
|
+
self.key = key or str(uuid.uuid4())
|
|
270
|
+
# features this basin provides
|
|
271
|
+
self._features = features
|
|
272
|
+
#: measurement identifier of the referencing dataset
|
|
273
|
+
self.referrer_identifier = referrer_identifier
|
|
274
|
+
self.basin_identifier = basin_identifier or None
|
|
275
|
+
self._identifiers_verification = None
|
|
276
|
+
#: ignored basins
|
|
277
|
+
self.ignored_basins = ignored_basins or []
|
|
278
|
+
#: additional keyword arguments passed to the basin
|
|
279
|
+
self.kwargs = kwargs
|
|
280
|
+
#: Event mapping strategy. If this is "same", it means that the
|
|
281
|
+
#: referring dataset and the basin dataset have identical event
|
|
282
|
+
#: indices. If `mapping` is e.g. `basinmap1` then the mapping of the
|
|
283
|
+
#: indices from the basin to the referring dataset is defined in
|
|
284
|
+
#: `self.basinmap` (copied during initialization of this class from
|
|
285
|
+
#: the array in the key `basinmap1` from the dict-like object
|
|
286
|
+
#: `mapping_referrer`).
|
|
287
|
+
self.mapping = mapping or "same"
|
|
288
|
+
self._basinmap = None # see `basinmap` property
|
|
289
|
+
# Create a weakref to the original referrer: If it is an instance
|
|
290
|
+
# of RTDCBase, then garbage collection can clean up properly and
|
|
291
|
+
# the basin instance has no reason to exist without the referrer.
|
|
292
|
+
if self.mapping != "same":
|
|
293
|
+
self._basinmap_referrer = weakref.ref(mapping_referrer)
|
|
294
|
+
else:
|
|
295
|
+
self._basinmap_referrer = None
|
|
296
|
+
self._ds = None
|
|
297
|
+
# perform availability check in separate thread
|
|
298
|
+
self._av_check_lock = threading.Lock()
|
|
299
|
+
self._av_check = BasinAvailabilityChecker(self)
|
|
300
|
+
self._av_check.start()
|
|
301
|
+
|
|
302
|
+
def __repr__(self):
|
|
303
|
+
try:
|
|
304
|
+
feature_info = len(self.features)
|
|
305
|
+
except BaseException:
|
|
306
|
+
feature_info = "unknown"
|
|
307
|
+
options = [
|
|
308
|
+
self.name,
|
|
309
|
+
f"mapped {self.mapping}" if self.mapping != "same" else "",
|
|
310
|
+
f"{feature_info} features",
|
|
311
|
+
f"location {self.location}",
|
|
312
|
+
]
|
|
313
|
+
opt_str = ", ".join([o for o in options if o])
|
|
314
|
+
|
|
315
|
+
return f"<{self.__class__.__name__} ({opt_str}) at {hex(id(self))}>"
|
|
316
|
+
|
|
317
|
+
def _assert_referrer_identifier(self):
|
|
318
|
+
"""Make sure the basin matches the measurement identifier
|
|
319
|
+
"""
|
|
320
|
+
if not self.verify_basin(run_identifier=True):
|
|
321
|
+
raise KeyError(f"Measurement identifier of basin {self.ds} "
|
|
322
|
+
f"({self.get_measurement_identifier()}) does "
|
|
323
|
+
f"not match {self.referrer_identifier}!")
|
|
324
|
+
|
|
325
|
+
@property
|
|
326
|
+
def basinmap(self):
|
|
327
|
+
"""Contains the indexing array in case of a mapped basin"""
|
|
328
|
+
if self._basinmap is None:
|
|
329
|
+
if self.mapping != "same":
|
|
330
|
+
try:
|
|
331
|
+
basinmap = self._basinmap_referrer()[self.mapping]
|
|
332
|
+
except (KeyError, RecursionError):
|
|
333
|
+
raise BasinmapFeatureMissingError(
|
|
334
|
+
f"Could not find the feature '{self.mapping}' in the "
|
|
335
|
+
f"dataset or any of its basins. This suggests that "
|
|
336
|
+
f"this feature was never saved anywhere. Please check "
|
|
337
|
+
f"the input files.")
|
|
338
|
+
#: `basinmap` is an integer array that maps the events from the
|
|
339
|
+
#: basin to the events of the referring dataset.
|
|
340
|
+
self._basinmap = np.array(basinmap,
|
|
341
|
+
dtype=np.uint64,
|
|
342
|
+
copy=True)
|
|
343
|
+
else:
|
|
344
|
+
self._basinmap = None
|
|
345
|
+
return self._basinmap
|
|
346
|
+
|
|
347
|
+
@property
|
|
348
|
+
@abc.abstractmethod
|
|
349
|
+
def basin_format(self):
|
|
350
|
+
"""Basin format (:class:`.RTDCBase` subclass), e.g. "hdf5" or "s3"
|
|
351
|
+
"""
|
|
352
|
+
# to be implemented in subclasses
|
|
353
|
+
|
|
354
|
+
@property
|
|
355
|
+
@abc.abstractmethod
|
|
356
|
+
def basin_type(self):
|
|
357
|
+
"""Storage type to use (e.g. "file" or "remote")"""
|
|
358
|
+
# to be implemented in subclasses
|
|
359
|
+
|
|
360
|
+
@property
|
|
361
|
+
def ds(self):
|
|
362
|
+
"""The :class:`.RTDCBase` instance represented by the basin"""
|
|
363
|
+
if self.perishable and self.perishable.perished():
|
|
364
|
+
# We have perished. Ask the PerishableRecord to refresh this
|
|
365
|
+
# basin so we can access it again.
|
|
366
|
+
self.perishable.refresh()
|
|
367
|
+
if self._ds is None:
|
|
368
|
+
if not self.is_available():
|
|
369
|
+
raise BasinNotAvailableError(f"Basin {self} is not available!")
|
|
370
|
+
self._ds = self.load_dataset(self.location, **self.kwargs)
|
|
371
|
+
self._ds.ignore_basins(self.ignored_basins)
|
|
372
|
+
return self._ds
|
|
373
|
+
|
|
374
|
+
@property
|
|
375
|
+
def features(self):
|
|
376
|
+
"""Features made available by the basin
|
|
377
|
+
|
|
378
|
+
.. versionchanged: 0.56.0
|
|
379
|
+
|
|
380
|
+
Return nested basin features
|
|
381
|
+
"""
|
|
382
|
+
if self._features is None:
|
|
383
|
+
if self.is_available():
|
|
384
|
+
# If features are not specified already, either by previous
|
|
385
|
+
# call to this method or during initialization from basin
|
|
386
|
+
# definition, then make the innate and *all* the basin
|
|
387
|
+
# features available.
|
|
388
|
+
self._features = sorted(set(self.ds.features_innate
|
|
389
|
+
+ self.ds.features_basin))
|
|
390
|
+
else:
|
|
391
|
+
self._features = []
|
|
392
|
+
return self._features
|
|
393
|
+
|
|
394
|
+
def as_dict(self):
|
|
395
|
+
"""Return basin kwargs for :func:`RTDCWriter.store_basin`
|
|
396
|
+
|
|
397
|
+
Note that each subclass of :class:`.RTDCBase` has its own
|
|
398
|
+
implementation of :func:`.RTDCBase.basins_get_dicts` which
|
|
399
|
+
returns a list of basin dictionaries that are used to
|
|
400
|
+
instantiate the basins in :func:`RTDCBase.basins_enable`.
|
|
401
|
+
This method here is only intended for usage with
|
|
402
|
+
:func:`RTDCWriter.store_basin`.
|
|
403
|
+
"""
|
|
404
|
+
return {
|
|
405
|
+
"basin_name": self.name,
|
|
406
|
+
"basin_type": self.basin_type,
|
|
407
|
+
"basin_format": self.basin_format,
|
|
408
|
+
"basin_locs": [self.location],
|
|
409
|
+
"basin_descr": self.description,
|
|
410
|
+
"basin_feats": self.features,
|
|
411
|
+
"basin_map": self.basinmap,
|
|
412
|
+
"perishable": bool(self.perishable),
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
def close(self):
|
|
416
|
+
"""Close any open file handles or connections"""
|
|
417
|
+
if self._ds is not None:
|
|
418
|
+
self._ds.close()
|
|
419
|
+
self._av_check.join(0.5)
|
|
420
|
+
|
|
421
|
+
def get_feature_data(self, feat):
|
|
422
|
+
"""Return an object representing feature data of the basin"""
|
|
423
|
+
self._assert_referrer_identifier()
|
|
424
|
+
return self.ds[feat]
|
|
425
|
+
|
|
426
|
+
def get_measurement_identifier(self):
|
|
427
|
+
"""Return the identifier of the basin dataset"""
|
|
428
|
+
return self.ds.get_measurement_identifier()
|
|
429
|
+
|
|
430
|
+
@abc.abstractmethod
|
|
431
|
+
def is_available(self):
|
|
432
|
+
"""Return True if the basin is available"""
|
|
433
|
+
|
|
434
|
+
@abc.abstractmethod
|
|
435
|
+
def _load_dataset(self, location, **kwargs):
|
|
436
|
+
"""Subclasses should return an instance of :class:`.RTDCBase`"""
|
|
437
|
+
|
|
438
|
+
def load_dataset(self, location, **kwargs):
|
|
439
|
+
"""Return an instance of :class:`.RTDCBase` for this basin
|
|
440
|
+
|
|
441
|
+
If the basin mapping (`self.mapping`) is not the same as
|
|
442
|
+
the referencing dataset
|
|
443
|
+
"""
|
|
444
|
+
ds = self._load_dataset(location, **kwargs)
|
|
445
|
+
if self.mapping != "same":
|
|
446
|
+
# The array `self.basinmap` may contain duplicate elements,
|
|
447
|
+
# which is why we cannot use hierarchy children to access the
|
|
448
|
+
# data (sometimes the data must be blown-up rather than gated).
|
|
449
|
+
ds_bn = BasinProxy(ds=ds, basinmap=self.basinmap)
|
|
450
|
+
else:
|
|
451
|
+
ds_bn = ds
|
|
452
|
+
return ds_bn
|
|
453
|
+
|
|
454
|
+
def verify_basin(self, run_identifier=True, availability=True):
|
|
455
|
+
if not availability:
|
|
456
|
+
warnings.warn("The keyword argument 'availability' is "
|
|
457
|
+
"deprecated, because it can lead to long waiting "
|
|
458
|
+
"times with many unavailable basins.",
|
|
459
|
+
DeprecationWarning)
|
|
460
|
+
if availability:
|
|
461
|
+
check_avail = self.is_available()
|
|
462
|
+
else:
|
|
463
|
+
check_avail = True
|
|
464
|
+
|
|
465
|
+
# Only check for run identifier if requested and if the availability
|
|
466
|
+
# check did not fail.
|
|
467
|
+
if run_identifier and check_avail:
|
|
468
|
+
if self._identifiers_verification is None:
|
|
469
|
+
# This is the measurement identifier of the basin.
|
|
470
|
+
basin_identifier = self.get_measurement_identifier()
|
|
471
|
+
|
|
472
|
+
# Perform a sanity check for the basin identifier.
|
|
473
|
+
if (self.basin_identifier
|
|
474
|
+
and self.basin_identifier != basin_identifier):
|
|
475
|
+
# We should not proceed any further with this basin.
|
|
476
|
+
self._identifiers_verification = False
|
|
477
|
+
warnings.warn(
|
|
478
|
+
f"Basin identifier mismatch for {self}. Expected "
|
|
479
|
+
f"'{self.basin_identifier}', got '{basin_identifier}'")
|
|
480
|
+
|
|
481
|
+
if self.referrer_identifier is None:
|
|
482
|
+
# No measurement identifier was presented by the
|
|
483
|
+
# referencing dataset. We are in the dark.
|
|
484
|
+
# Don't perform any checks.
|
|
485
|
+
self._identifiers_verification = True
|
|
486
|
+
else:
|
|
487
|
+
if basin_identifier is None:
|
|
488
|
+
# Again, we are in the dark, because the basin dataset
|
|
489
|
+
# does not have an identifier. This is an undesirable
|
|
490
|
+
# situation, but there is nothing we can do about it.
|
|
491
|
+
self._identifiers_verification = True
|
|
492
|
+
else:
|
|
493
|
+
if self.mapping == "same":
|
|
494
|
+
# When we have identical mapping, then the
|
|
495
|
+
# measurement identifier has to match exactly.
|
|
496
|
+
verifier = str.__eq__
|
|
497
|
+
else:
|
|
498
|
+
# When we have non-identical mapping (e.g. exported
|
|
499
|
+
# data), then the measurement identifier has to
|
|
500
|
+
# partially match.
|
|
501
|
+
verifier = str.startswith
|
|
502
|
+
self._identifiers_verification = verifier(
|
|
503
|
+
self.referrer_identifier, basin_identifier)
|
|
504
|
+
|
|
505
|
+
check_rid = self._identifiers_verification
|
|
506
|
+
else:
|
|
507
|
+
check_rid = True
|
|
508
|
+
|
|
509
|
+
return check_rid and check_avail
|
|
510
|
+
|
|
511
|
+
|
|
512
|
+
class BasinProxy:
|
|
513
|
+
def __init__(self, ds, basinmap):
|
|
514
|
+
"""Proxy for accessing data in basin datasets
|
|
515
|
+
|
|
516
|
+
The idea of a basin proxy is to give access to the data of an
|
|
517
|
+
:class:`.RTDCBase` that is mapped, i.e. the indices defined for
|
|
518
|
+
the basin do not coincide with the indices in the downstream
|
|
519
|
+
dataset.
|
|
520
|
+
|
|
521
|
+
This class achieves two things:
|
|
522
|
+
1. Subset indexing: For every event in the downstream dataset, there
|
|
523
|
+
is *only* one corresponding event in the basin dataset. This
|
|
524
|
+
could also be achieved via hierarchy children
|
|
525
|
+
(:class:`RTDCHierarchy`).
|
|
526
|
+
2. Blown indexing: Two different events in the downstream dataset
|
|
527
|
+
can refer to one event in the basin dataset. I.e. the basin
|
|
528
|
+
dataset contains fewer events than the downstream dataset,
|
|
529
|
+
because e.g. it is a raw image recording series that has been
|
|
530
|
+
processed and multiple events were found in one frame.
|
|
531
|
+
|
|
532
|
+
Parameters
|
|
533
|
+
----------
|
|
534
|
+
ds: RTDCBase
|
|
535
|
+
the basin dataset
|
|
536
|
+
basinmap: np.ndarray
|
|
537
|
+
1D integer indexing array that maps the events of the basin
|
|
538
|
+
dataset to the downstream dataset
|
|
539
|
+
"""
|
|
540
|
+
self.ds = ds
|
|
541
|
+
self.basins_get_dicts = ds.basins_get_dicts
|
|
542
|
+
self.basinmap = basinmap
|
|
543
|
+
self._features = {}
|
|
544
|
+
|
|
545
|
+
def __contains__(self, item):
|
|
546
|
+
return item in self.ds
|
|
547
|
+
|
|
548
|
+
def __getattr__(self, item):
|
|
549
|
+
if item in [
|
|
550
|
+
"basins",
|
|
551
|
+
"close",
|
|
552
|
+
"features",
|
|
553
|
+
"features_ancillary",
|
|
554
|
+
"features_basin",
|
|
555
|
+
"features_innate",
|
|
556
|
+
"features_loaded",
|
|
557
|
+
"features_local",
|
|
558
|
+
"features_scalar",
|
|
559
|
+
"get_measurement_identifier",
|
|
560
|
+
"ignore_basins",
|
|
561
|
+
]:
|
|
562
|
+
return getattr(self.ds, item)
|
|
563
|
+
else:
|
|
564
|
+
raise AttributeError(
|
|
565
|
+
f"BasinProxy does not implement {item}")
|
|
566
|
+
|
|
567
|
+
def __getitem__(self, feat):
|
|
568
|
+
if feat not in self._features:
|
|
569
|
+
if feat == "contour":
|
|
570
|
+
feat_obj = BasinProxyContour(feat_obj=self.ds[feat],
|
|
571
|
+
basinmap=self.basinmap)
|
|
572
|
+
else:
|
|
573
|
+
feat_obj = BasinProxyFeature(feat_obj=self.ds[feat],
|
|
574
|
+
basinmap=self.basinmap)
|
|
575
|
+
self._features[feat] = feat_obj
|
|
576
|
+
return self._features[feat]
|
|
577
|
+
|
|
578
|
+
def __len__(self):
|
|
579
|
+
return len(self.basinmap)
|
|
580
|
+
|
|
581
|
+
|
|
582
|
+
class BasinProxyContour:
|
|
583
|
+
def __init__(self, feat_obj, basinmap):
|
|
584
|
+
"""Wrap around a contour, mapping it upon data access, no caching"""
|
|
585
|
+
self.feat_obj = feat_obj
|
|
586
|
+
self.basinmap = basinmap
|
|
587
|
+
self.is_scalar = False
|
|
588
|
+
self.shape = (len(self.basinmap), np.nan, 2)
|
|
589
|
+
self.identifier = feat_obj.identifier
|
|
590
|
+
|
|
591
|
+
def __getattr__(self, item):
|
|
592
|
+
if item in [
|
|
593
|
+
"dtype",
|
|
594
|
+
]:
|
|
595
|
+
return getattr(self.feat_obj, item)
|
|
596
|
+
else:
|
|
597
|
+
raise AttributeError(
|
|
598
|
+
f"BasinProxyContour does not implement {item}")
|
|
599
|
+
|
|
600
|
+
def __getitem__(self, index):
|
|
601
|
+
if isinstance(index, numbers.Integral):
|
|
602
|
+
# single index, cheap operation
|
|
603
|
+
return self.feat_obj[self.basinmap[index]]
|
|
604
|
+
else:
|
|
605
|
+
raise NotImplementedError(
|
|
606
|
+
"Cannot index contours without anything else than integers.")
|
|
607
|
+
|
|
608
|
+
def __len__(self):
|
|
609
|
+
return self.shape[0]
|
|
610
|
+
|
|
611
|
+
|
|
612
|
+
class BasinProxyFeature(np.lib.mixins.NDArrayOperatorsMixin):
|
|
613
|
+
def __init__(self, feat_obj, basinmap):
|
|
614
|
+
"""Wrap around a feature object, mapping it upon data access"""
|
|
615
|
+
self.feat_obj = feat_obj
|
|
616
|
+
self.basinmap = basinmap
|
|
617
|
+
self._cache = None
|
|
618
|
+
self._shape = None
|
|
619
|
+
self._size = None
|
|
620
|
+
self.is_scalar = bool(len(self.feat_obj.shape) == 1)
|
|
621
|
+
|
|
622
|
+
@property
|
|
623
|
+
def shape(self):
|
|
624
|
+
if self._shape is None:
|
|
625
|
+
if self.is_scalar:
|
|
626
|
+
self._shape = self.basinmap.shape
|
|
627
|
+
else:
|
|
628
|
+
self._shape = (self.basinmap.size,) + self.feat_obj.shape[1:]
|
|
629
|
+
return self._shape
|
|
630
|
+
|
|
631
|
+
@property
|
|
632
|
+
def size(self):
|
|
633
|
+
if self._size is None:
|
|
634
|
+
self._size = np.prod(self.shape)
|
|
635
|
+
return self._size
|
|
636
|
+
|
|
637
|
+
def __array__(self, dtype=None, copy=copy_if_needed, *args, **kwargs):
|
|
638
|
+
if self._cache is None and self.is_scalar:
|
|
639
|
+
self._cache = self.feat_obj[:][self.basinmap]
|
|
640
|
+
else:
|
|
641
|
+
# This is dangerous territory in terms of memory usage
|
|
642
|
+
out_arr = np.empty((len(self.basinmap),) + self.feat_obj.shape[1:],
|
|
643
|
+
dtype=dtype or self.feat_obj.dtype,
|
|
644
|
+
*args, **kwargs)
|
|
645
|
+
for ii, idx in enumerate(self.basinmap):
|
|
646
|
+
out_arr[ii] = self.feat_obj[idx]
|
|
647
|
+
return out_arr
|
|
648
|
+
return np.array(self._cache, copy=copy)
|
|
649
|
+
|
|
650
|
+
def __getattr__(self, item):
|
|
651
|
+
if item in [
|
|
652
|
+
"dtype",
|
|
653
|
+
]:
|
|
654
|
+
return getattr(self.feat_obj, item)
|
|
655
|
+
else:
|
|
656
|
+
raise AttributeError(
|
|
657
|
+
f"BasinProxyFeature does not implement {item}")
|
|
658
|
+
|
|
659
|
+
def __getitem__(self, index):
|
|
660
|
+
if self._cache is None and isinstance(index, numbers.Integral):
|
|
661
|
+
# single index, cheap operation
|
|
662
|
+
return self.feat_obj[self.basinmap[index]]
|
|
663
|
+
elif not self.is_scalar:
|
|
664
|
+
# image, mask, etc
|
|
665
|
+
if isinstance(index, slice) and index == slice(None):
|
|
666
|
+
indices = self.basinmap
|
|
667
|
+
else:
|
|
668
|
+
indices = self.basinmap[index]
|
|
669
|
+
out_arr = np.empty((len(indices),) + self.feat_obj.shape[1:],
|
|
670
|
+
dtype=self.feat_obj.dtype)
|
|
671
|
+
for ii, idx in enumerate(indices):
|
|
672
|
+
out_arr[ii] = self.feat_obj[idx]
|
|
673
|
+
return out_arr
|
|
674
|
+
else:
|
|
675
|
+
# sets the cache if not already set
|
|
676
|
+
return self.__array__()[index]
|
|
677
|
+
|
|
678
|
+
def __len__(self):
|
|
679
|
+
return len(self.basinmap)
|
|
680
|
+
|
|
681
|
+
|
|
682
|
+
def basin_priority_sorted_key(bdict: Dict):
|
|
683
|
+
"""Yield a sorting value for a given basin that can be used with `sorted`
|
|
684
|
+
|
|
685
|
+
Basins are normally stored in random order in a dataset. This method
|
|
686
|
+
brings them into correct order, prioritizing:
|
|
687
|
+
|
|
688
|
+
- type: "file" over "remote"
|
|
689
|
+
- format: "HTTP" over "S3" over "dcor"
|
|
690
|
+
- mapping: "same" over anything else
|
|
691
|
+
"""
|
|
692
|
+
srt_type = {
|
|
693
|
+
"internal": "a",
|
|
694
|
+
"file": "b",
|
|
695
|
+
"remote": "c",
|
|
696
|
+
}.get(bdict.get("type"), "z")
|
|
697
|
+
|
|
698
|
+
srt_format = {
|
|
699
|
+
"h5dataset": "a",
|
|
700
|
+
"hdf5": "b",
|
|
701
|
+
"http": "c",
|
|
702
|
+
"s3": "d",
|
|
703
|
+
"dcor": "e",
|
|
704
|
+
}.get(bdict.get("format"), "z")
|
|
705
|
+
|
|
706
|
+
mapping = bdict.get("mapping", "same") # old dicts don't have "mapping"
|
|
707
|
+
srt_map = "a" if mapping == "same" else mapping
|
|
708
|
+
|
|
709
|
+
return srt_type + srt_format + srt_map
|
|
710
|
+
|
|
711
|
+
|
|
712
|
+
class InternalH5DatasetBasin(Basin):
|
|
713
|
+
basin_format = "h5dataset"
|
|
714
|
+
basin_type = "internal"
|
|
715
|
+
|
|
716
|
+
def __init__(self, *args, **kwargs):
|
|
717
|
+
super(InternalH5DatasetBasin, self).__init__(*args, **kwargs)
|
|
718
|
+
if self.mapping == "same":
|
|
719
|
+
raise ValueError(
|
|
720
|
+
"'internal' basins must be instantiated with `mapping`. "
|
|
721
|
+
"If you are not doing that, then you probably don't need "
|
|
722
|
+
"them.")
|
|
723
|
+
if self._features is None:
|
|
724
|
+
raise ValueError("You must specify features when defining "
|
|
725
|
+
"internal basins.")
|
|
726
|
+
# Redefine the features if necessary
|
|
727
|
+
h5root = self._basinmap_referrer().h5file
|
|
728
|
+
available_features = []
|
|
729
|
+
for feat in self._features:
|
|
730
|
+
if self.location in h5root and feat in h5root[self.location]:
|
|
731
|
+
available_features.append(feat)
|
|
732
|
+
else:
|
|
733
|
+
warnings.warn(
|
|
734
|
+
f"Feature '{feat}' is defined as an internal basin, "
|
|
735
|
+
f"but it cannot be found in '{self.location}'.",
|
|
736
|
+
BasinFeatureMissingWarning)
|
|
737
|
+
self._features.clear()
|
|
738
|
+
self._features += available_features
|
|
739
|
+
|
|
740
|
+
def _load_dataset(self, location, **kwargs):
|
|
741
|
+
from .fmt_dict import RTDC_Dict
|
|
742
|
+
# get the h5file object
|
|
743
|
+
h5root = self._basinmap_referrer().h5file
|
|
744
|
+
ds_dict = {}
|
|
745
|
+
for feat in self.features:
|
|
746
|
+
ds_dict[feat] = h5root[self.location][feat]
|
|
747
|
+
return RTDC_Dict(ds_dict)
|
|
748
|
+
|
|
749
|
+
def is_available(self):
|
|
750
|
+
return bool(self._features)
|
|
751
|
+
|
|
752
|
+
def verify_basin(self, *args, **kwargs):
|
|
753
|
+
"""It's not necessary to verify internal basins"""
|
|
754
|
+
return True
|
|
755
|
+
|
|
756
|
+
|
|
757
|
+
def get_basin_classes():
|
|
758
|
+
bc = {}
|
|
759
|
+
for b_cls in Basin.__subclasses__():
|
|
760
|
+
if hasattr(b_cls, "basin_format"):
|
|
761
|
+
bc[b_cls.basin_format] = b_cls
|
|
762
|
+
return bc
|