dclab 0.62.17__cp39-cp39-macosx_11_0_arm64.whl → 0.67.3__cp39-cp39-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dclab/_version.py +16 -3
- dclab/cli/task_tdms2rtdc.py +1 -1
- dclab/cli/task_verify_dataset.py +3 -3
- dclab/definitions/__init__.py +1 -1
- dclab/definitions/feat_const.py +6 -4
- dclab/definitions/feat_logic.py +27 -28
- dclab/downsampling.cpython-39-darwin.so +0 -0
- dclab/downsampling.pyx +12 -7
- dclab/external/skimage/_find_contours_cy.cpython-39-darwin.so +0 -0
- dclab/external/skimage/_pnpoly.cpython-39-darwin.so +0 -0
- dclab/external/skimage/_shared/geometry.cpython-39-darwin.so +0 -0
- dclab/features/bright.py +11 -2
- dclab/features/bright_bc.py +13 -2
- dclab/features/bright_perc.py +10 -2
- dclab/features/contour.py +12 -7
- dclab/features/emodulus/__init__.py +33 -27
- dclab/features/emodulus/load.py +8 -6
- dclab/features/emodulus/pxcorr.py +33 -15
- dclab/features/emodulus/scale_linear.py +79 -52
- dclab/features/emodulus/viscosity.py +31 -19
- dclab/features/fl_crosstalk.py +19 -10
- dclab/features/inert_ratio.py +18 -11
- dclab/features/volume.py +24 -14
- dclab/http_utils.py +1 -1
- dclab/kde/base.py +238 -14
- dclab/kde/methods.py +33 -12
- dclab/rtdc_dataset/config.py +1 -1
- dclab/rtdc_dataset/core.py +22 -8
- dclab/rtdc_dataset/export.py +171 -34
- dclab/rtdc_dataset/feat_basin.py +250 -33
- dclab/rtdc_dataset/fmt_dcor/api.py +69 -7
- dclab/rtdc_dataset/fmt_dcor/base.py +103 -4
- dclab/rtdc_dataset/fmt_dcor/logs.py +1 -1
- dclab/rtdc_dataset/fmt_dcor/tables.py +1 -1
- dclab/rtdc_dataset/fmt_hdf5/events.py +20 -1
- dclab/rtdc_dataset/fmt_hierarchy/base.py +1 -1
- dclab/rtdc_dataset/fmt_s3.py +29 -10
- dclab/rtdc_dataset/fmt_tdms/event_trace.py +1 -1
- dclab/rtdc_dataset/fmt_tdms/naming.py +1 -1
- dclab/rtdc_dataset/writer.py +43 -11
- dclab/statistics.py +27 -4
- dclab/warn.py +1 -1
- {dclab-0.62.17.dist-info → dclab-0.67.3.dist-info}/METADATA +26 -4
- {dclab-0.62.17.dist-info → dclab-0.67.3.dist-info}/RECORD +48 -48
- {dclab-0.62.17.dist-info → dclab-0.67.3.dist-info}/WHEEL +1 -1
- {dclab-0.62.17.dist-info → dclab-0.67.3.dist-info}/entry_points.txt +0 -0
- {dclab-0.62.17.dist-info → dclab-0.67.3.dist-info}/licenses/LICENSE +0 -0
- {dclab-0.62.17.dist-info → dclab-0.67.3.dist-info}/top_level.txt +0 -0
dclab/rtdc_dataset/feat_basin.py
CHANGED
|
@@ -6,9 +6,10 @@ which, when opened in dclab, can access features stored in the input file
|
|
|
6
6
|
from __future__ import annotations
|
|
7
7
|
|
|
8
8
|
import abc
|
|
9
|
+
import logging
|
|
9
10
|
import numbers
|
|
10
11
|
import threading
|
|
11
|
-
from typing import Dict, List, Literal
|
|
12
|
+
from typing import Callable, Dict, List, Literal, Union
|
|
12
13
|
import uuid
|
|
13
14
|
import warnings
|
|
14
15
|
import weakref
|
|
@@ -18,14 +19,25 @@ import numpy as np
|
|
|
18
19
|
from ..util import copy_if_needed
|
|
19
20
|
|
|
20
21
|
|
|
22
|
+
logger = logging.getLogger(__name__)
|
|
23
|
+
|
|
24
|
+
|
|
21
25
|
class BasinFeatureMissingWarning(UserWarning):
|
|
22
26
|
"""Used when a badin feature is defined but not stored"""
|
|
23
27
|
|
|
24
28
|
|
|
29
|
+
class BasinIdentifierMismatchError(BaseException):
|
|
30
|
+
"""Used when the identifier of a basin does not match the definition"""
|
|
31
|
+
|
|
32
|
+
|
|
25
33
|
class CyclicBasinDependencyFoundWarning(UserWarning):
|
|
26
34
|
"""Used when a basin is defined in one of its sub-basins"""
|
|
27
35
|
|
|
28
36
|
|
|
37
|
+
class IgnoringPerishableBasinTTL(UserWarning):
|
|
38
|
+
"""Used when refreshing a basin does not support TTL"""
|
|
39
|
+
|
|
40
|
+
|
|
29
41
|
class BasinmapFeatureMissingError(KeyError):
|
|
30
42
|
"""Used when one of the `basinmap` features is not defined"""
|
|
31
43
|
pass
|
|
@@ -47,6 +59,114 @@ class BasinAvailabilityChecker(threading.Thread):
|
|
|
47
59
|
self.basin.is_available()
|
|
48
60
|
|
|
49
61
|
|
|
62
|
+
class PerishableRecord:
|
|
63
|
+
"""A class containing information about perishable basins
|
|
64
|
+
|
|
65
|
+
Perishable basins are basins that may discontinue to work after
|
|
66
|
+
e.g. a specific amount of time (e.g. presigned S3 URLs). With the
|
|
67
|
+
`PerishableRecord`, these basins may be "refreshed" (made
|
|
68
|
+
available again).
|
|
69
|
+
"""
|
|
70
|
+
def __init__(self,
|
|
71
|
+
basin,
|
|
72
|
+
expiration_func: Callable = None,
|
|
73
|
+
expiration_kwargs: Dict = None,
|
|
74
|
+
refresh_func: Callable = None,
|
|
75
|
+
refresh_kwargs: Dict = None,
|
|
76
|
+
):
|
|
77
|
+
"""
|
|
78
|
+
Parameters
|
|
79
|
+
----------
|
|
80
|
+
basin: Basin
|
|
81
|
+
Instance of the perishable basin
|
|
82
|
+
expiration_func: callable
|
|
83
|
+
A function that determines whether the basin has perished.
|
|
84
|
+
It must accept `basin` as the first argument. Calling this
|
|
85
|
+
function should be fast, as it is called every time a feature
|
|
86
|
+
is accessed.
|
|
87
|
+
Note that if you are implementing this in the time domain, then
|
|
88
|
+
you should use `time.time()` (TSE), because you need an absolute
|
|
89
|
+
time measure. `time.monotonic()` for instance does not count up
|
|
90
|
+
when the system goes to sleep. However, keep in mind that if
|
|
91
|
+
a remote machine dictates the expiration time, then that
|
|
92
|
+
remote machine should also transmit the creation time (in case
|
|
93
|
+
there are time offsets).
|
|
94
|
+
expiration_kwargs: dict
|
|
95
|
+
Additional kwargs for `expiration_func`.
|
|
96
|
+
refresh_func: callable
|
|
97
|
+
The function used to refresh the `basin`. It must accept
|
|
98
|
+
`basin` as the first argument.
|
|
99
|
+
refresh_kwargs: dict
|
|
100
|
+
Additional kwargs for `refresh_func`
|
|
101
|
+
"""
|
|
102
|
+
if not isinstance(basin, weakref.ProxyType):
|
|
103
|
+
basin = weakref.proxy(basin)
|
|
104
|
+
self.basin = basin
|
|
105
|
+
self.expiration_func = expiration_func
|
|
106
|
+
self.expiration_kwargs = expiration_kwargs or {}
|
|
107
|
+
self.refresh_func = refresh_func
|
|
108
|
+
self.refresh_kwargs = refresh_kwargs or {}
|
|
109
|
+
|
|
110
|
+
def __repr__(self):
|
|
111
|
+
state = "perished" if self.perished() else "valid"
|
|
112
|
+
return f"<PerishableRecord ({state}) at {hex(id(self))}>"
|
|
113
|
+
|
|
114
|
+
def perished(self) -> Union[bool, None]:
|
|
115
|
+
"""Determine whether the basin has perished
|
|
116
|
+
|
|
117
|
+
Returns
|
|
118
|
+
-------
|
|
119
|
+
state: bool or None
|
|
120
|
+
True means the basin has perished, False means the basin
|
|
121
|
+
has not perished, and `None` means we don't know
|
|
122
|
+
"""
|
|
123
|
+
if self.expiration_func is None:
|
|
124
|
+
return None
|
|
125
|
+
else:
|
|
126
|
+
return self.expiration_func(self.basin, **self.expiration_kwargs)
|
|
127
|
+
|
|
128
|
+
def refresh(self, extend_by: float = None) -> None:
|
|
129
|
+
"""Extend the lifetime of the associated perishable basin
|
|
130
|
+
|
|
131
|
+
Parameters
|
|
132
|
+
----------
|
|
133
|
+
extend_by: float
|
|
134
|
+
Custom argument for extending the life of the basin.
|
|
135
|
+
Normally, this would be a lifetime.
|
|
136
|
+
|
|
137
|
+
Returns
|
|
138
|
+
-------
|
|
139
|
+
basin: dict | None
|
|
140
|
+
Dictionary for instantiating a new basin
|
|
141
|
+
"""
|
|
142
|
+
if self.refresh_func is None:
|
|
143
|
+
# The basin is a perishable basin, but we have no way of
|
|
144
|
+
# refreshing it.
|
|
145
|
+
logger.error(f"Cannot refresh basin '{self.basin}'")
|
|
146
|
+
return
|
|
147
|
+
|
|
148
|
+
if extend_by and "extend_by" not in self.refresh_kwargs:
|
|
149
|
+
warnings.warn(
|
|
150
|
+
"Parameter 'extend_by' ignored, because the basin "
|
|
151
|
+
"source does not support it",
|
|
152
|
+
IgnoringPerishableBasinTTL)
|
|
153
|
+
extend_by = None
|
|
154
|
+
|
|
155
|
+
rkw = {}
|
|
156
|
+
rkw.update(self.refresh_kwargs)
|
|
157
|
+
|
|
158
|
+
if extend_by is not None:
|
|
159
|
+
rkw["extend_by"] = extend_by
|
|
160
|
+
|
|
161
|
+
self.refresh_func(self.basin, **rkw)
|
|
162
|
+
logger.info(f"Refreshed basin '{self.basin}'")
|
|
163
|
+
|
|
164
|
+
# If everything went well, reset the current dataset of the basin
|
|
165
|
+
if self.basin._ds is not None:
|
|
166
|
+
self.basin._ds.close()
|
|
167
|
+
self.basin._ds = None
|
|
168
|
+
|
|
169
|
+
|
|
50
170
|
class Basin(abc.ABC):
|
|
51
171
|
"""A basin represents data from an external source
|
|
52
172
|
|
|
@@ -60,7 +180,8 @@ class Basin(abc.ABC):
|
|
|
60
180
|
name: str = None,
|
|
61
181
|
description: str = None,
|
|
62
182
|
features: List[str] = None,
|
|
63
|
-
|
|
183
|
+
referrer_identifier: str = None,
|
|
184
|
+
basin_identifier: str = None,
|
|
64
185
|
mapping: Literal["same",
|
|
65
186
|
"basinmap0",
|
|
66
187
|
"basinmap1",
|
|
@@ -76,6 +197,7 @@ class Basin(abc.ABC):
|
|
|
76
197
|
mapping_referrer: Dict = None,
|
|
77
198
|
ignored_basins: List[str] = None,
|
|
78
199
|
key: str = None,
|
|
200
|
+
perishable=False,
|
|
79
201
|
**kwargs):
|
|
80
202
|
"""
|
|
81
203
|
|
|
@@ -91,10 +213,18 @@ class Basin(abc.ABC):
|
|
|
91
213
|
features: list of str
|
|
92
214
|
List of features this basin provides; This list is enforced,
|
|
93
215
|
even if the basin actually contains more features.
|
|
94
|
-
|
|
216
|
+
referrer_identifier: str
|
|
95
217
|
A measurement identifier against which to check the basin.
|
|
218
|
+
If the basin mapping is "same", then this must match the
|
|
219
|
+
identifier of the basin exactly, otherwise it must start
|
|
220
|
+
with the basin identifier (e.g. "basin-id_referrer-sub-id").
|
|
96
221
|
If this is set to None (default), there is no certainty
|
|
97
222
|
that the downstream dataset is from the same measurement.
|
|
223
|
+
basin_identifier: str
|
|
224
|
+
A measurement identifier that must match the basin exactly.
|
|
225
|
+
In contrast to `referrer_identifier`, the basin identifier is
|
|
226
|
+
the identifier of the basin file. If `basin_identifier` is
|
|
227
|
+
specified, the identifier of the basin must be identical to it.
|
|
98
228
|
mapping: str
|
|
99
229
|
Which type of mapping to use. This can be either "same"
|
|
100
230
|
when the event list of the basin is identical to that
|
|
@@ -115,6 +245,10 @@ class Basin(abc.ABC):
|
|
|
115
245
|
Unique key to identify this basin; normally computed from
|
|
116
246
|
a JSON dump of the basin definition. A random string is used
|
|
117
247
|
if None is specified.
|
|
248
|
+
perishable: bool or PerishableRecord
|
|
249
|
+
If this is not False, then it must be a :class:`.PerishableRecord`
|
|
250
|
+
that holds the information about the expiration time, and that
|
|
251
|
+
comes with a method `refresh` to extend the lifetime of the basin.
|
|
118
252
|
kwargs:
|
|
119
253
|
Additional keyword arguments passed to the `load_dataset`
|
|
120
254
|
method of the `Basin` subclass.
|
|
@@ -130,13 +264,19 @@ class Basin(abc.ABC):
|
|
|
130
264
|
self.name = name
|
|
131
265
|
#: lengthy description of the basin
|
|
132
266
|
self.description = description
|
|
133
|
-
#
|
|
267
|
+
# perishable record
|
|
268
|
+
if isinstance(perishable, bool) and perishable:
|
|
269
|
+
# Create an empty perishable record
|
|
270
|
+
perishable = PerishableRecord(self)
|
|
271
|
+
self.perishable = perishable
|
|
272
|
+
# define key of the basin
|
|
134
273
|
self.key = key or str(uuid.uuid4())
|
|
135
274
|
# features this basin provides
|
|
136
275
|
self._features = features
|
|
137
276
|
#: measurement identifier of the referencing dataset
|
|
138
|
-
self.
|
|
139
|
-
self.
|
|
277
|
+
self.referrer_identifier = referrer_identifier
|
|
278
|
+
self.basin_identifier = basin_identifier or None
|
|
279
|
+
self._identifiers_verification = None
|
|
140
280
|
#: ignored basins
|
|
141
281
|
self.ignored_basins = ignored_basins or []
|
|
142
282
|
#: additional keyword arguments passed to the basin
|
|
@@ -164,23 +304,28 @@ class Basin(abc.ABC):
|
|
|
164
304
|
self._av_check.start()
|
|
165
305
|
|
|
166
306
|
def __repr__(self):
|
|
307
|
+
try:
|
|
308
|
+
feature_info = len(self.features)
|
|
309
|
+
except BaseException:
|
|
310
|
+
feature_info = "unknown"
|
|
167
311
|
options = [
|
|
168
312
|
self.name,
|
|
169
313
|
f"mapped {self.mapping}" if self.mapping != "same" else "",
|
|
170
|
-
f"
|
|
314
|
+
f"{feature_info} features",
|
|
171
315
|
f"location {self.location}",
|
|
172
316
|
]
|
|
173
317
|
opt_str = ", ".join([o for o in options if o])
|
|
174
318
|
|
|
175
319
|
return f"<{self.__class__.__name__} ({opt_str}) at {hex(id(self))}>"
|
|
176
320
|
|
|
177
|
-
def
|
|
321
|
+
def _assert_referrer_identifier(self):
|
|
178
322
|
"""Make sure the basin matches the measurement identifier
|
|
179
323
|
"""
|
|
180
324
|
if not self.verify_basin(run_identifier=True):
|
|
181
|
-
raise
|
|
182
|
-
|
|
183
|
-
|
|
325
|
+
raise BasinIdentifierMismatchError(
|
|
326
|
+
f"Measurement identifier of basin {self.ds} "
|
|
327
|
+
f"({self.get_measurement_identifier()}) does "
|
|
328
|
+
f"not match {self.referrer_identifier}!")
|
|
184
329
|
|
|
185
330
|
@property
|
|
186
331
|
def basinmap(self):
|
|
@@ -220,6 +365,10 @@ class Basin(abc.ABC):
|
|
|
220
365
|
@property
|
|
221
366
|
def ds(self):
|
|
222
367
|
"""The :class:`.RTDCBase` instance represented by the basin"""
|
|
368
|
+
if self.perishable and self.perishable.perished():
|
|
369
|
+
# We have perished. Ask the PerishableRecord to refresh this
|
|
370
|
+
# basin so we can access it again.
|
|
371
|
+
self.perishable.refresh()
|
|
223
372
|
if self._ds is None:
|
|
224
373
|
if not self.is_available():
|
|
225
374
|
raise BasinNotAvailableError(f"Basin {self} is not available!")
|
|
@@ -265,6 +414,7 @@ class Basin(abc.ABC):
|
|
|
265
414
|
"basin_descr": self.description,
|
|
266
415
|
"basin_feats": self.features,
|
|
267
416
|
"basin_map": self.basinmap,
|
|
417
|
+
"perishable": bool(self.perishable),
|
|
268
418
|
}
|
|
269
419
|
|
|
270
420
|
def close(self):
|
|
@@ -275,7 +425,7 @@ class Basin(abc.ABC):
|
|
|
275
425
|
|
|
276
426
|
def get_feature_data(self, feat):
|
|
277
427
|
"""Return an object representing feature data of the basin"""
|
|
278
|
-
self.
|
|
428
|
+
self._assert_referrer_identifier()
|
|
279
429
|
return self.ds[feat]
|
|
280
430
|
|
|
281
431
|
def get_measurement_identifier(self):
|
|
@@ -320,26 +470,44 @@ class Basin(abc.ABC):
|
|
|
320
470
|
# Only check for run identifier if requested and if the availability
|
|
321
471
|
# check did not fail.
|
|
322
472
|
if run_identifier and check_avail:
|
|
323
|
-
if
|
|
324
|
-
|
|
473
|
+
if self._identifiers_verification is None:
|
|
474
|
+
# This is the measurement identifier of the basin.
|
|
475
|
+
basin_identifier = self.get_measurement_identifier()
|
|
476
|
+
|
|
477
|
+
# Perform a sanity check for the basin identifier.
|
|
478
|
+
if (self.basin_identifier
|
|
479
|
+
and self.basin_identifier != basin_identifier):
|
|
480
|
+
# We should not proceed any further with this basin.
|
|
481
|
+
self._identifiers_verification = False
|
|
482
|
+
warnings.warn(
|
|
483
|
+
f"Basin identifier mismatch for {self}. Expected "
|
|
484
|
+
f"'{self.basin_identifier}', got '{basin_identifier}'")
|
|
485
|
+
|
|
486
|
+
if self.referrer_identifier is None:
|
|
325
487
|
# No measurement identifier was presented by the
|
|
326
|
-
# referencing dataset.
|
|
327
|
-
|
|
488
|
+
# referencing dataset. We are in the dark.
|
|
489
|
+
# Don't perform any checks.
|
|
490
|
+
self._identifiers_verification = True
|
|
328
491
|
else:
|
|
329
|
-
if
|
|
330
|
-
#
|
|
331
|
-
# identifier
|
|
332
|
-
|
|
492
|
+
if basin_identifier is None:
|
|
493
|
+
# Again, we are in the dark, because the basin dataset
|
|
494
|
+
# does not have an identifier. This is an undesirable
|
|
495
|
+
# situation, but there is nothing we can do about it.
|
|
496
|
+
self._identifiers_verification = True
|
|
333
497
|
else:
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
498
|
+
if self.mapping == "same":
|
|
499
|
+
# When we have identical mapping, then the
|
|
500
|
+
# measurement identifier has to match exactly.
|
|
501
|
+
verifier = str.__eq__
|
|
502
|
+
else:
|
|
503
|
+
# When we have non-identical mapping (e.g. exported
|
|
504
|
+
# data), then the measurement identifier has to
|
|
505
|
+
# partially match.
|
|
506
|
+
verifier = str.startswith
|
|
507
|
+
self._identifiers_verification = verifier(
|
|
508
|
+
self.referrer_identifier, basin_identifier)
|
|
509
|
+
|
|
510
|
+
check_rid = self._identifiers_verification
|
|
343
511
|
else:
|
|
344
512
|
check_rid = True
|
|
345
513
|
|
|
@@ -403,8 +571,12 @@ class BasinProxy:
|
|
|
403
571
|
|
|
404
572
|
def __getitem__(self, feat):
|
|
405
573
|
if feat not in self._features:
|
|
406
|
-
|
|
407
|
-
|
|
574
|
+
if feat == "contour":
|
|
575
|
+
feat_obj = BasinProxyContour(feat_obj=self.ds[feat],
|
|
576
|
+
basinmap=self.basinmap)
|
|
577
|
+
else:
|
|
578
|
+
feat_obj = BasinProxyFeature(feat_obj=self.ds[feat],
|
|
579
|
+
basinmap=self.basinmap)
|
|
408
580
|
self._features[feat] = feat_obj
|
|
409
581
|
return self._features[feat]
|
|
410
582
|
|
|
@@ -412,14 +584,61 @@ class BasinProxy:
|
|
|
412
584
|
return len(self.basinmap)
|
|
413
585
|
|
|
414
586
|
|
|
587
|
+
class BasinProxyContour:
|
|
588
|
+
def __init__(self, feat_obj, basinmap):
|
|
589
|
+
"""Wrap around a contour, mapping it upon data access, no caching"""
|
|
590
|
+
self.feat_obj = feat_obj
|
|
591
|
+
self.basinmap = basinmap
|
|
592
|
+
self.is_scalar = False
|
|
593
|
+
self.shape = (len(self.basinmap), np.nan, 2)
|
|
594
|
+
self.identifier = feat_obj.identifier
|
|
595
|
+
|
|
596
|
+
def __getattr__(self, item):
|
|
597
|
+
if item in [
|
|
598
|
+
"dtype",
|
|
599
|
+
]:
|
|
600
|
+
return getattr(self.feat_obj, item)
|
|
601
|
+
else:
|
|
602
|
+
raise AttributeError(
|
|
603
|
+
f"BasinProxyContour does not implement {item}")
|
|
604
|
+
|
|
605
|
+
def __getitem__(self, index):
|
|
606
|
+
if isinstance(index, numbers.Integral):
|
|
607
|
+
# single index, cheap operation
|
|
608
|
+
return self.feat_obj[self.basinmap[index]]
|
|
609
|
+
else:
|
|
610
|
+
raise NotImplementedError(
|
|
611
|
+
"Cannot index contours without anything else than integers.")
|
|
612
|
+
|
|
613
|
+
def __len__(self):
|
|
614
|
+
return self.shape[0]
|
|
615
|
+
|
|
616
|
+
|
|
415
617
|
class BasinProxyFeature(np.lib.mixins.NDArrayOperatorsMixin):
|
|
416
618
|
def __init__(self, feat_obj, basinmap):
|
|
417
619
|
"""Wrap around a feature object, mapping it upon data access"""
|
|
418
620
|
self.feat_obj = feat_obj
|
|
419
621
|
self.basinmap = basinmap
|
|
420
622
|
self._cache = None
|
|
623
|
+
self._shape = None
|
|
624
|
+
self._size = None
|
|
421
625
|
self.is_scalar = bool(len(self.feat_obj.shape) == 1)
|
|
422
626
|
|
|
627
|
+
@property
|
|
628
|
+
def shape(self):
|
|
629
|
+
if self._shape is None:
|
|
630
|
+
if self.is_scalar:
|
|
631
|
+
self._shape = self.basinmap.shape
|
|
632
|
+
else:
|
|
633
|
+
self._shape = (self.basinmap.size,) + self.feat_obj.shape[1:]
|
|
634
|
+
return self._shape
|
|
635
|
+
|
|
636
|
+
@property
|
|
637
|
+
def size(self):
|
|
638
|
+
if self._size is None:
|
|
639
|
+
self._size = np.prod(self.shape)
|
|
640
|
+
return self._size
|
|
641
|
+
|
|
423
642
|
def __array__(self, dtype=None, copy=copy_if_needed, *args, **kwargs):
|
|
424
643
|
if self._cache is None and self.is_scalar:
|
|
425
644
|
self._cache = self.feat_obj[:][self.basinmap]
|
|
@@ -436,8 +655,6 @@ class BasinProxyFeature(np.lib.mixins.NDArrayOperatorsMixin):
|
|
|
436
655
|
def __getattr__(self, item):
|
|
437
656
|
if item in [
|
|
438
657
|
"dtype",
|
|
439
|
-
"shape",
|
|
440
|
-
"size",
|
|
441
658
|
]:
|
|
442
659
|
return getattr(self.feat_obj, item)
|
|
443
660
|
else:
|
|
@@ -11,7 +11,9 @@ class DCORAccessError(BaseException):
|
|
|
11
11
|
|
|
12
12
|
class APIHandler:
|
|
13
13
|
"""Handles the DCOR api with caching for simple queries"""
|
|
14
|
-
#:
|
|
14
|
+
#: These are cached to minimize network usage
|
|
15
|
+
#: Note that we are not caching basins, since they may contain
|
|
16
|
+
#: expiring URLs.
|
|
15
17
|
cache_queries = ["metadata", "size", "feature_list", "valid"]
|
|
16
18
|
#: DCOR API Keys/Tokens in the current session
|
|
17
19
|
api_keys = []
|
|
@@ -52,8 +54,36 @@ class APIHandler:
|
|
|
52
54
|
if api_key.strip() and api_key not in APIHandler.api_keys:
|
|
53
55
|
APIHandler.api_keys.append(api_key)
|
|
54
56
|
|
|
55
|
-
def _get(self,
|
|
56
|
-
|
|
57
|
+
def _get(self,
|
|
58
|
+
query: str,
|
|
59
|
+
feat: str = None,
|
|
60
|
+
trace: str = None,
|
|
61
|
+
event: str = None,
|
|
62
|
+
api_key: str = "",
|
|
63
|
+
timeout: float = None,
|
|
64
|
+
retries: int = 5):
|
|
65
|
+
"""Fetch information via the DCOR API
|
|
66
|
+
|
|
67
|
+
Parameters
|
|
68
|
+
----------
|
|
69
|
+
query: str
|
|
70
|
+
API route
|
|
71
|
+
feat: str
|
|
72
|
+
DEPRECATED (use basins instead), adds f"&feature={feat}" to query
|
|
73
|
+
trace: str
|
|
74
|
+
DEPRECATED (use basins instead), adds f"&trace={trace}" to query
|
|
75
|
+
event: str
|
|
76
|
+
DEPRECATED (use basins instead), adds f"&event={event}" to query
|
|
77
|
+
api_key: str
|
|
78
|
+
DCOR API token to use
|
|
79
|
+
timeout: float
|
|
80
|
+
Request timeout
|
|
81
|
+
retries: int
|
|
82
|
+
Number of retries to fetch the request. For every retry, the
|
|
83
|
+
timeout is increased by two seconds.
|
|
84
|
+
"""
|
|
85
|
+
if timeout is None:
|
|
86
|
+
timeout = 1
|
|
57
87
|
# "version=2" introduced in dclab 0.54.3
|
|
58
88
|
# (supported since ckanext.dc_serve 0.13.2)
|
|
59
89
|
qstr = f"&version={self.dcserv_api_version}&query={query}"
|
|
@@ -65,13 +95,13 @@ class APIHandler:
|
|
|
65
95
|
qstr += f"&event={event}"
|
|
66
96
|
apicall = self.url + qstr
|
|
67
97
|
fail_reasons = []
|
|
68
|
-
for
|
|
98
|
+
for ii in range(retries):
|
|
69
99
|
try:
|
|
70
100
|
# try-except both requests and json conversion
|
|
71
101
|
req = self.session.get(apicall,
|
|
72
102
|
headers={"Authorization": api_key},
|
|
73
103
|
verify=self.verify,
|
|
74
|
-
timeout=
|
|
104
|
+
timeout=timeout + ii * 2,
|
|
75
105
|
)
|
|
76
106
|
jreq = req.json()
|
|
77
107
|
except requests.urllib3.exceptions.ConnectionError: # requests
|
|
@@ -92,13 +122,45 @@ class APIHandler:
|
|
|
92
122
|
f"Messages: {fail_reasons}")
|
|
93
123
|
return jreq
|
|
94
124
|
|
|
95
|
-
def get(self,
|
|
125
|
+
def get(self,
|
|
126
|
+
query: str,
|
|
127
|
+
feat: str = None,
|
|
128
|
+
trace: str = None,
|
|
129
|
+
event: str = None,
|
|
130
|
+
timeout: float = None,
|
|
131
|
+
retries: int = 5,
|
|
132
|
+
):
|
|
133
|
+
"""Fetch information from DCOR
|
|
134
|
+
|
|
135
|
+
Parameters
|
|
136
|
+
----------
|
|
137
|
+
query: str
|
|
138
|
+
API route
|
|
139
|
+
feat: str
|
|
140
|
+
DEPRECATED (use basins instead), adds f"&feature={feat}" to query
|
|
141
|
+
trace: str
|
|
142
|
+
DEPRECATED (use basins instead), adds f"&trace={trace}" to query
|
|
143
|
+
event: str
|
|
144
|
+
DEPRECATED (use basins instead), adds f"&event={event}" to query
|
|
145
|
+
timeout: float
|
|
146
|
+
Request timeout
|
|
147
|
+
retries: int
|
|
148
|
+
Number of retries to fetch the request. For every retry, the
|
|
149
|
+
timeout is increased by two seconds.
|
|
150
|
+
"""
|
|
96
151
|
if query in APIHandler.cache_queries and query in self._cache:
|
|
97
152
|
result = self._cache[query]
|
|
98
153
|
else:
|
|
99
154
|
req = {"error": {"message": "No access to API (api key?)"}}
|
|
100
155
|
for api_key in [self.api_key] + APIHandler.api_keys:
|
|
101
|
-
req = self._get(query,
|
|
156
|
+
req = self._get(query=query,
|
|
157
|
+
feat=feat,
|
|
158
|
+
trace=trace,
|
|
159
|
+
event=event,
|
|
160
|
+
api_key=api_key,
|
|
161
|
+
timeout=timeout,
|
|
162
|
+
retries=retries,
|
|
163
|
+
)
|
|
102
164
|
if req["success"]:
|
|
103
165
|
self.api_key = api_key # remember working key
|
|
104
166
|
break
|
|
@@ -1,17 +1,23 @@
|
|
|
1
1
|
"""DCOR client interface"""
|
|
2
|
+
import logging
|
|
2
3
|
import pathlib
|
|
3
4
|
import re
|
|
5
|
+
import time
|
|
4
6
|
|
|
5
7
|
from ...util import hashobj
|
|
6
8
|
|
|
7
9
|
from ..config import Configuration
|
|
8
10
|
from ..core import RTDCBase
|
|
11
|
+
from ..feat_basin import PerishableRecord
|
|
9
12
|
|
|
10
13
|
from . import api
|
|
11
14
|
from .logs import DCORLogs
|
|
12
15
|
from .tables import DCORTables
|
|
13
16
|
|
|
14
17
|
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
15
21
|
#: Append directories here where dclab should look for certificate bundles
|
|
16
22
|
#: for a specific host. The directory should contain files named after the
|
|
17
23
|
#: hostname, e.g. "dcor.mpl.mpg.de.cert".
|
|
@@ -73,6 +79,8 @@ class RTDC_DCOR(RTDCBase):
|
|
|
73
79
|
super(RTDC_DCOR, self).__init__(*args, **kwargs)
|
|
74
80
|
|
|
75
81
|
self._hash = None
|
|
82
|
+
self._cache_basin_dict = None
|
|
83
|
+
self.cache_basin_dict_time = 600
|
|
76
84
|
self.path = RTDC_DCOR.get_full_url(url, use_ssl, host)
|
|
77
85
|
|
|
78
86
|
if cert_path is None:
|
|
@@ -161,15 +169,106 @@ class RTDC_DCOR(RTDCBase):
|
|
|
161
169
|
new_url = f"{scheme}://{netloc}/{api_path}"
|
|
162
170
|
return new_url
|
|
163
171
|
|
|
164
|
-
def
|
|
165
|
-
"""
|
|
172
|
+
def _basin_refresh(self, basin):
|
|
173
|
+
"""Refresh the specified basin"""
|
|
174
|
+
# Retrieve the basin dictionary from DCOR
|
|
175
|
+
basin_dicts = self.basins_get_dicts()
|
|
176
|
+
for bn_dict in basin_dicts:
|
|
177
|
+
if bn_dict.get("name") == basin.name:
|
|
178
|
+
break
|
|
179
|
+
else:
|
|
180
|
+
raise ValueError(f"Basin '{basin.name}' not found in {self}")
|
|
181
|
+
|
|
182
|
+
tre = bn_dict["time_request"]
|
|
183
|
+
ttl = bn_dict["time_expiration"]
|
|
184
|
+
# remember time relative to time.time, subtract 30s to be on safe side
|
|
185
|
+
tex = bn_dict["time_local_request"] + (ttl - tre) - 30
|
|
186
|
+
|
|
187
|
+
if isinstance(basin.perishable, bool):
|
|
188
|
+
logger.debug("Initializing basin perishable %s", basin.name)
|
|
189
|
+
# create a perishable record
|
|
190
|
+
basin.perishable = PerishableRecord(
|
|
191
|
+
basin=basin,
|
|
192
|
+
expiration_func=self._basin_expiration,
|
|
193
|
+
expiration_kwargs={"time_local_expiration": tex},
|
|
194
|
+
refresh_func=self._basin_refresh,
|
|
195
|
+
)
|
|
196
|
+
else:
|
|
197
|
+
logger.debug("Refreshing basin perishable %s", basin.name)
|
|
198
|
+
# only update (this also works with weakref.ProxyType)
|
|
199
|
+
basin.perishable.expiration_kwargs = {"time_local_expiration": tex}
|
|
200
|
+
|
|
201
|
+
if len(bn_dict["urls"]) > 1:
|
|
202
|
+
logger.warning(f"Basin {basin.name} has multiple URLs. I am not "
|
|
203
|
+
f"checking their availability: {bn_dict}")
|
|
204
|
+
basin.location = bn_dict["urls"][0]
|
|
205
|
+
|
|
206
|
+
def _basin_expiration(self, basin, time_local_expiration):
|
|
207
|
+
"""Check whether the basin has perished"""
|
|
208
|
+
return time_local_expiration < time.time()
|
|
209
|
+
|
|
210
|
+
def _basins_get_dicts(self):
|
|
166
211
|
try:
|
|
167
|
-
|
|
212
|
+
basin_dicts = self.api.get(query="basins")
|
|
213
|
+
# Fill in missing timing information
|
|
214
|
+
for bn_dict in basin_dicts:
|
|
215
|
+
if (bn_dict.get("format") == "http"
|
|
216
|
+
and "perishable" not in bn_dict):
|
|
217
|
+
# We are communicating with an older version of
|
|
218
|
+
# ckanext-dc_serve. Take a look at the URL and check
|
|
219
|
+
# whether we have a perishable (~1 hour) URL or whether
|
|
220
|
+
# this is a public resource.
|
|
221
|
+
expires_regexp = re.compile(".*expires=([0-9]*)$")
|
|
222
|
+
for url in bn_dict.get("urls", []):
|
|
223
|
+
if match := expires_regexp.match(url.lower()):
|
|
224
|
+
logger.debug("Detected perishable basin: %s",
|
|
225
|
+
bn_dict["name"])
|
|
226
|
+
bn_dict["perishable"] = True
|
|
227
|
+
bn_dict["time_request"] = time.time()
|
|
228
|
+
bn_dict["time_expiration"] = int(match.group(1))
|
|
229
|
+
# add part of the resource ID to the name
|
|
230
|
+
infourl = url.split(bn_dict["name"], 1)[-1]
|
|
231
|
+
infourl = infourl.replace("/", "")
|
|
232
|
+
bn_dict["name"] += f"-{infourl[:5]}"
|
|
233
|
+
break
|
|
234
|
+
else:
|
|
235
|
+
bn_dict["perishable"] = False
|
|
236
|
+
# If we have a perishable basin, add the local request time
|
|
237
|
+
if bn_dict.get("perishable"):
|
|
238
|
+
bn_dict["time_local_request"] = time.time()
|
|
168
239
|
except api.DCORAccessError:
|
|
169
240
|
# TODO: Do not catch this exception when all DCOR instances
|
|
170
241
|
# implement the 'basins' query.
|
|
171
242
|
# This means that the server does not implement the 'basins' query.
|
|
172
|
-
|
|
243
|
+
basin_dicts = []
|
|
244
|
+
return basin_dicts
|
|
245
|
+
|
|
246
|
+
def basins_get_dicts(self):
|
|
247
|
+
"""Return list of dicts for all basins defined on DCOR
|
|
248
|
+
|
|
249
|
+
The return value of this method is cached for 10 minutes
|
|
250
|
+
(cache time defined in the `cache_basin_dict_time` [s] property).
|
|
251
|
+
"""
|
|
252
|
+
if (self._cache_basin_dict is None
|
|
253
|
+
or time.time() > (self._cache_basin_dict[1]
|
|
254
|
+
+ self.cache_basin_dict_time)):
|
|
255
|
+
self._cache_basin_dict = (self._basins_get_dicts(), time.time())
|
|
256
|
+
return self._cache_basin_dict[0]
|
|
257
|
+
|
|
258
|
+
def basins_retrieve(self):
|
|
259
|
+
"""Same as superclass, but add perishable information"""
|
|
260
|
+
basin_dicts = self.basins_get_dicts()
|
|
261
|
+
basins = super(RTDC_DCOR, self).basins_retrieve()
|
|
262
|
+
for bn in basins:
|
|
263
|
+
for bn_dict in basin_dicts:
|
|
264
|
+
if bn.name == bn_dict.get("name"):
|
|
265
|
+
# Determine whether we have to set a perishable record.
|
|
266
|
+
if bn_dict.get("perishable"):
|
|
267
|
+
# required for `_basin_refresh` to create a record
|
|
268
|
+
bn.perishable = True
|
|
269
|
+
# create the actual record
|
|
270
|
+
self._basin_refresh(bn)
|
|
271
|
+
break
|
|
173
272
|
return basins
|
|
174
273
|
|
|
175
274
|
|