dclab 0.62.17__cp39-cp39-macosx_11_0_arm64.whl → 0.67.3__cp39-cp39-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. dclab/_version.py +16 -3
  2. dclab/cli/task_tdms2rtdc.py +1 -1
  3. dclab/cli/task_verify_dataset.py +3 -3
  4. dclab/definitions/__init__.py +1 -1
  5. dclab/definitions/feat_const.py +6 -4
  6. dclab/definitions/feat_logic.py +27 -28
  7. dclab/downsampling.cpython-39-darwin.so +0 -0
  8. dclab/downsampling.pyx +12 -7
  9. dclab/external/skimage/_find_contours_cy.cpython-39-darwin.so +0 -0
  10. dclab/external/skimage/_pnpoly.cpython-39-darwin.so +0 -0
  11. dclab/external/skimage/_shared/geometry.cpython-39-darwin.so +0 -0
  12. dclab/features/bright.py +11 -2
  13. dclab/features/bright_bc.py +13 -2
  14. dclab/features/bright_perc.py +10 -2
  15. dclab/features/contour.py +12 -7
  16. dclab/features/emodulus/__init__.py +33 -27
  17. dclab/features/emodulus/load.py +8 -6
  18. dclab/features/emodulus/pxcorr.py +33 -15
  19. dclab/features/emodulus/scale_linear.py +79 -52
  20. dclab/features/emodulus/viscosity.py +31 -19
  21. dclab/features/fl_crosstalk.py +19 -10
  22. dclab/features/inert_ratio.py +18 -11
  23. dclab/features/volume.py +24 -14
  24. dclab/http_utils.py +1 -1
  25. dclab/kde/base.py +238 -14
  26. dclab/kde/methods.py +33 -12
  27. dclab/rtdc_dataset/config.py +1 -1
  28. dclab/rtdc_dataset/core.py +22 -8
  29. dclab/rtdc_dataset/export.py +171 -34
  30. dclab/rtdc_dataset/feat_basin.py +250 -33
  31. dclab/rtdc_dataset/fmt_dcor/api.py +69 -7
  32. dclab/rtdc_dataset/fmt_dcor/base.py +103 -4
  33. dclab/rtdc_dataset/fmt_dcor/logs.py +1 -1
  34. dclab/rtdc_dataset/fmt_dcor/tables.py +1 -1
  35. dclab/rtdc_dataset/fmt_hdf5/events.py +20 -1
  36. dclab/rtdc_dataset/fmt_hierarchy/base.py +1 -1
  37. dclab/rtdc_dataset/fmt_s3.py +29 -10
  38. dclab/rtdc_dataset/fmt_tdms/event_trace.py +1 -1
  39. dclab/rtdc_dataset/fmt_tdms/naming.py +1 -1
  40. dclab/rtdc_dataset/writer.py +43 -11
  41. dclab/statistics.py +27 -4
  42. dclab/warn.py +1 -1
  43. {dclab-0.62.17.dist-info → dclab-0.67.3.dist-info}/METADATA +26 -4
  44. {dclab-0.62.17.dist-info → dclab-0.67.3.dist-info}/RECORD +48 -48
  45. {dclab-0.62.17.dist-info → dclab-0.67.3.dist-info}/WHEEL +1 -1
  46. {dclab-0.62.17.dist-info → dclab-0.67.3.dist-info}/entry_points.txt +0 -0
  47. {dclab-0.62.17.dist-info → dclab-0.67.3.dist-info}/licenses/LICENSE +0 -0
  48. {dclab-0.62.17.dist-info → dclab-0.67.3.dist-info}/top_level.txt +0 -0
@@ -6,9 +6,10 @@ which, when opened in dclab, can access features stored in the input file
6
6
  from __future__ import annotations
7
7
 
8
8
  import abc
9
+ import logging
9
10
  import numbers
10
11
  import threading
11
- from typing import Dict, List, Literal
12
+ from typing import Callable, Dict, List, Literal, Union
12
13
  import uuid
13
14
  import warnings
14
15
  import weakref
@@ -18,14 +19,25 @@ import numpy as np
18
19
  from ..util import copy_if_needed
19
20
 
20
21
 
22
+ logger = logging.getLogger(__name__)
23
+
24
+
21
25
  class BasinFeatureMissingWarning(UserWarning):
22
26
  """Used when a badin feature is defined but not stored"""
23
27
 
24
28
 
29
+ class BasinIdentifierMismatchError(BaseException):
30
+ """Used when the identifier of a basin does not match the definition"""
31
+
32
+
25
33
  class CyclicBasinDependencyFoundWarning(UserWarning):
26
34
  """Used when a basin is defined in one of its sub-basins"""
27
35
 
28
36
 
37
+ class IgnoringPerishableBasinTTL(UserWarning):
38
+ """Used when refreshing a basin does not support TTL"""
39
+
40
+
29
41
  class BasinmapFeatureMissingError(KeyError):
30
42
  """Used when one of the `basinmap` features is not defined"""
31
43
  pass
@@ -47,6 +59,114 @@ class BasinAvailabilityChecker(threading.Thread):
47
59
  self.basin.is_available()
48
60
 
49
61
 
62
+ class PerishableRecord:
63
+ """A class containing information about perishable basins
64
+
65
+ Perishable basins are basins that may discontinue to work after
66
+ e.g. a specific amount of time (e.g. presigned S3 URLs). With the
67
+ `PerishableRecord`, these basins may be "refreshed" (made
68
+ available again).
69
+ """
70
+ def __init__(self,
71
+ basin,
72
+ expiration_func: Callable = None,
73
+ expiration_kwargs: Dict = None,
74
+ refresh_func: Callable = None,
75
+ refresh_kwargs: Dict = None,
76
+ ):
77
+ """
78
+ Parameters
79
+ ----------
80
+ basin: Basin
81
+ Instance of the perishable basin
82
+ expiration_func: callable
83
+ A function that determines whether the basin has perished.
84
+ It must accept `basin` as the first argument. Calling this
85
+ function should be fast, as it is called every time a feature
86
+ is accessed.
87
+ Note that if you are implementing this in the time domain, then
88
+ you should use `time.time()` (TSE), because you need an absolute
89
+ time measure. `time.monotonic()` for instance does not count up
90
+ when the system goes to sleep. However, keep in mind that if
91
+ a remote machine dictates the expiration time, then that
92
+ remote machine should also transmit the creation time (in case
93
+ there are time offsets).
94
+ expiration_kwargs: dict
95
+ Additional kwargs for `expiration_func`.
96
+ refresh_func: callable
97
+ The function used to refresh the `basin`. It must accept
98
+ `basin` as the first argument.
99
+ refresh_kwargs: dict
100
+ Additional kwargs for `refresh_func`
101
+ """
102
+ if not isinstance(basin, weakref.ProxyType):
103
+ basin = weakref.proxy(basin)
104
+ self.basin = basin
105
+ self.expiration_func = expiration_func
106
+ self.expiration_kwargs = expiration_kwargs or {}
107
+ self.refresh_func = refresh_func
108
+ self.refresh_kwargs = refresh_kwargs or {}
109
+
110
+ def __repr__(self):
111
+ state = "perished" if self.perished() else "valid"
112
+ return f"<PerishableRecord ({state}) at {hex(id(self))}>"
113
+
114
+ def perished(self) -> Union[bool, None]:
115
+ """Determine whether the basin has perished
116
+
117
+ Returns
118
+ -------
119
+ state: bool or None
120
+ True means the basin has perished, False means the basin
121
+ has not perished, and `None` means we don't know
122
+ """
123
+ if self.expiration_func is None:
124
+ return None
125
+ else:
126
+ return self.expiration_func(self.basin, **self.expiration_kwargs)
127
+
128
+ def refresh(self, extend_by: float = None) -> None:
129
+ """Extend the lifetime of the associated perishable basin
130
+
131
+ Parameters
132
+ ----------
133
+ extend_by: float
134
+ Custom argument for extending the life of the basin.
135
+ Normally, this would be a lifetime.
136
+
137
+ Returns
138
+ -------
139
+ basin: dict | None
140
+ Dictionary for instantiating a new basin
141
+ """
142
+ if self.refresh_func is None:
143
+ # The basin is a perishable basin, but we have no way of
144
+ # refreshing it.
145
+ logger.error(f"Cannot refresh basin '{self.basin}'")
146
+ return
147
+
148
+ if extend_by and "extend_by" not in self.refresh_kwargs:
149
+ warnings.warn(
150
+ "Parameter 'extend_by' ignored, because the basin "
151
+ "source does not support it",
152
+ IgnoringPerishableBasinTTL)
153
+ extend_by = None
154
+
155
+ rkw = {}
156
+ rkw.update(self.refresh_kwargs)
157
+
158
+ if extend_by is not None:
159
+ rkw["extend_by"] = extend_by
160
+
161
+ self.refresh_func(self.basin, **rkw)
162
+ logger.info(f"Refreshed basin '{self.basin}'")
163
+
164
+ # If everything went well, reset the current dataset of the basin
165
+ if self.basin._ds is not None:
166
+ self.basin._ds.close()
167
+ self.basin._ds = None
168
+
169
+
50
170
  class Basin(abc.ABC):
51
171
  """A basin represents data from an external source
52
172
 
@@ -60,7 +180,8 @@ class Basin(abc.ABC):
60
180
  name: str = None,
61
181
  description: str = None,
62
182
  features: List[str] = None,
63
- measurement_identifier: str = None,
183
+ referrer_identifier: str = None,
184
+ basin_identifier: str = None,
64
185
  mapping: Literal["same",
65
186
  "basinmap0",
66
187
  "basinmap1",
@@ -76,6 +197,7 @@ class Basin(abc.ABC):
76
197
  mapping_referrer: Dict = None,
77
198
  ignored_basins: List[str] = None,
78
199
  key: str = None,
200
+ perishable=False,
79
201
  **kwargs):
80
202
  """
81
203
 
@@ -91,10 +213,18 @@ class Basin(abc.ABC):
91
213
  features: list of str
92
214
  List of features this basin provides; This list is enforced,
93
215
  even if the basin actually contains more features.
94
- measurement_identifier: str
216
+ referrer_identifier: str
95
217
  A measurement identifier against which to check the basin.
218
+ If the basin mapping is "same", then this must match the
219
+ identifier of the basin exactly, otherwise it must start
220
+ with the basin identifier (e.g. "basin-id_referrer-sub-id").
96
221
  If this is set to None (default), there is no certainty
97
222
  that the downstream dataset is from the same measurement.
223
+ basin_identifier: str
224
+ A measurement identifier that must match the basin exactly.
225
+ In contrast to `referrer_identifier`, the basin identifier is
226
+ the identifier of the basin file. If `basin_identifier` is
227
+ specified, the identifier of the basin must be identical to it.
98
228
  mapping: str
99
229
  Which type of mapping to use. This can be either "same"
100
230
  when the event list of the basin is identical to that
@@ -115,6 +245,10 @@ class Basin(abc.ABC):
115
245
  Unique key to identify this basin; normally computed from
116
246
  a JSON dump of the basin definition. A random string is used
117
247
  if None is specified.
248
+ perishable: bool or PerishableRecord
249
+ If this is not False, then it must be a :class:`.PerishableRecord`
250
+ that holds the information about the expiration time, and that
251
+ comes with a method `refresh` to extend the lifetime of the basin.
118
252
  kwargs:
119
253
  Additional keyword arguments passed to the `load_dataset`
120
254
  method of the `Basin` subclass.
@@ -130,13 +264,19 @@ class Basin(abc.ABC):
130
264
  self.name = name
131
265
  #: lengthy description of the basin
132
266
  self.description = description
133
- # defining key of the basin
267
+ # perishable record
268
+ if isinstance(perishable, bool) and perishable:
269
+ # Create an empty perishable record
270
+ perishable = PerishableRecord(self)
271
+ self.perishable = perishable
272
+ # define key of the basin
134
273
  self.key = key or str(uuid.uuid4())
135
274
  # features this basin provides
136
275
  self._features = features
137
276
  #: measurement identifier of the referencing dataset
138
- self.measurement_identifier = measurement_identifier
139
- self._measurement_identifier_verified = False
277
+ self.referrer_identifier = referrer_identifier
278
+ self.basin_identifier = basin_identifier or None
279
+ self._identifiers_verification = None
140
280
  #: ignored basins
141
281
  self.ignored_basins = ignored_basins or []
142
282
  #: additional keyword arguments passed to the basin
@@ -164,23 +304,28 @@ class Basin(abc.ABC):
164
304
  self._av_check.start()
165
305
 
166
306
  def __repr__(self):
307
+ try:
308
+ feature_info = len(self.features)
309
+ except BaseException:
310
+ feature_info = "unknown"
167
311
  options = [
168
312
  self.name,
169
313
  f"mapped {self.mapping}" if self.mapping != "same" else "",
170
- f"features {self._features}" if self.features else "full-featured",
314
+ f"{feature_info} features",
171
315
  f"location {self.location}",
172
316
  ]
173
317
  opt_str = ", ".join([o for o in options if o])
174
318
 
175
319
  return f"<{self.__class__.__name__} ({opt_str}) at {hex(id(self))}>"
176
320
 
177
- def _assert_measurement_identifier(self):
321
+ def _assert_referrer_identifier(self):
178
322
  """Make sure the basin matches the measurement identifier
179
323
  """
180
324
  if not self.verify_basin(run_identifier=True):
181
- raise KeyError(f"Measurement identifier of basin {self.ds} "
182
- f"({self.get_measurement_identifier()}) does "
183
- f"not match {self.measurement_identifier}!")
325
+ raise BasinIdentifierMismatchError(
326
+ f"Measurement identifier of basin {self.ds} "
327
+ f"({self.get_measurement_identifier()}) does "
328
+ f"not match {self.referrer_identifier}!")
184
329
 
185
330
  @property
186
331
  def basinmap(self):
@@ -220,6 +365,10 @@ class Basin(abc.ABC):
220
365
  @property
221
366
  def ds(self):
222
367
  """The :class:`.RTDCBase` instance represented by the basin"""
368
+ if self.perishable and self.perishable.perished():
369
+ # We have perished. Ask the PerishableRecord to refresh this
370
+ # basin so we can access it again.
371
+ self.perishable.refresh()
223
372
  if self._ds is None:
224
373
  if not self.is_available():
225
374
  raise BasinNotAvailableError(f"Basin {self} is not available!")
@@ -265,6 +414,7 @@ class Basin(abc.ABC):
265
414
  "basin_descr": self.description,
266
415
  "basin_feats": self.features,
267
416
  "basin_map": self.basinmap,
417
+ "perishable": bool(self.perishable),
268
418
  }
269
419
 
270
420
  def close(self):
@@ -275,7 +425,7 @@ class Basin(abc.ABC):
275
425
 
276
426
  def get_feature_data(self, feat):
277
427
  """Return an object representing feature data of the basin"""
278
- self._assert_measurement_identifier()
428
+ self._assert_referrer_identifier()
279
429
  return self.ds[feat]
280
430
 
281
431
  def get_measurement_identifier(self):
@@ -320,26 +470,44 @@ class Basin(abc.ABC):
320
470
  # Only check for run identifier if requested and if the availability
321
471
  # check did not fail.
322
472
  if run_identifier and check_avail:
323
- if not self._measurement_identifier_verified:
324
- if self.measurement_identifier is None:
473
+ if self._identifiers_verification is None:
474
+ # This is the measurement identifier of the basin.
475
+ basin_identifier = self.get_measurement_identifier()
476
+
477
+ # Perform a sanity check for the basin identifier.
478
+ if (self.basin_identifier
479
+ and self.basin_identifier != basin_identifier):
480
+ # We should not proceed any further with this basin.
481
+ self._identifiers_verification = False
482
+ warnings.warn(
483
+ f"Basin identifier mismatch for {self}. Expected "
484
+ f"'{self.basin_identifier}', got '{basin_identifier}'")
485
+
486
+ if self.referrer_identifier is None:
325
487
  # No measurement identifier was presented by the
326
- # referencing dataset. Don't perform any checks.
327
- self._measurement_identifier_verified = True
488
+ # referencing dataset. We are in the dark.
489
+ # Don't perform any checks.
490
+ self._identifiers_verification = True
328
491
  else:
329
- if self.mapping == "same":
330
- # When we have identical mapping, then the measurement
331
- # identifier has to match exactly.
332
- verifier = str.__eq__
492
+ if basin_identifier is None:
493
+ # Again, we are in the dark, because the basin dataset
494
+ # does not have an identifier. This is an undesirable
495
+ # situation, but there is nothing we can do about it.
496
+ self._identifiers_verification = True
333
497
  else:
334
- # When we have non-identical mapping (e.g. exported
335
- # data), then the measurement identifier has to
336
- # partially match.
337
- verifier = str.startswith
338
- self._measurement_identifier_verified = verifier(
339
- self.measurement_identifier,
340
- self.get_measurement_identifier()
341
- )
342
- check_rid = self._measurement_identifier_verified
498
+ if self.mapping == "same":
499
+ # When we have identical mapping, then the
500
+ # measurement identifier has to match exactly.
501
+ verifier = str.__eq__
502
+ else:
503
+ # When we have non-identical mapping (e.g. exported
504
+ # data), then the measurement identifier has to
505
+ # partially match.
506
+ verifier = str.startswith
507
+ self._identifiers_verification = verifier(
508
+ self.referrer_identifier, basin_identifier)
509
+
510
+ check_rid = self._identifiers_verification
343
511
  else:
344
512
  check_rid = True
345
513
 
@@ -403,8 +571,12 @@ class BasinProxy:
403
571
 
404
572
  def __getitem__(self, feat):
405
573
  if feat not in self._features:
406
- feat_obj = BasinProxyFeature(feat_obj=self.ds[feat],
407
- basinmap=self.basinmap)
574
+ if feat == "contour":
575
+ feat_obj = BasinProxyContour(feat_obj=self.ds[feat],
576
+ basinmap=self.basinmap)
577
+ else:
578
+ feat_obj = BasinProxyFeature(feat_obj=self.ds[feat],
579
+ basinmap=self.basinmap)
408
580
  self._features[feat] = feat_obj
409
581
  return self._features[feat]
410
582
 
@@ -412,14 +584,61 @@ class BasinProxy:
412
584
  return len(self.basinmap)
413
585
 
414
586
 
587
+ class BasinProxyContour:
588
+ def __init__(self, feat_obj, basinmap):
589
+ """Wrap around a contour, mapping it upon data access, no caching"""
590
+ self.feat_obj = feat_obj
591
+ self.basinmap = basinmap
592
+ self.is_scalar = False
593
+ self.shape = (len(self.basinmap), np.nan, 2)
594
+ self.identifier = feat_obj.identifier
595
+
596
+ def __getattr__(self, item):
597
+ if item in [
598
+ "dtype",
599
+ ]:
600
+ return getattr(self.feat_obj, item)
601
+ else:
602
+ raise AttributeError(
603
+ f"BasinProxyContour does not implement {item}")
604
+
605
+ def __getitem__(self, index):
606
+ if isinstance(index, numbers.Integral):
607
+ # single index, cheap operation
608
+ return self.feat_obj[self.basinmap[index]]
609
+ else:
610
+ raise NotImplementedError(
611
+ "Cannot index contours without anything else than integers.")
612
+
613
+ def __len__(self):
614
+ return self.shape[0]
615
+
616
+
415
617
  class BasinProxyFeature(np.lib.mixins.NDArrayOperatorsMixin):
416
618
  def __init__(self, feat_obj, basinmap):
417
619
  """Wrap around a feature object, mapping it upon data access"""
418
620
  self.feat_obj = feat_obj
419
621
  self.basinmap = basinmap
420
622
  self._cache = None
623
+ self._shape = None
624
+ self._size = None
421
625
  self.is_scalar = bool(len(self.feat_obj.shape) == 1)
422
626
 
627
+ @property
628
+ def shape(self):
629
+ if self._shape is None:
630
+ if self.is_scalar:
631
+ self._shape = self.basinmap.shape
632
+ else:
633
+ self._shape = (self.basinmap.size,) + self.feat_obj.shape[1:]
634
+ return self._shape
635
+
636
+ @property
637
+ def size(self):
638
+ if self._size is None:
639
+ self._size = np.prod(self.shape)
640
+ return self._size
641
+
423
642
  def __array__(self, dtype=None, copy=copy_if_needed, *args, **kwargs):
424
643
  if self._cache is None and self.is_scalar:
425
644
  self._cache = self.feat_obj[:][self.basinmap]
@@ -436,8 +655,6 @@ class BasinProxyFeature(np.lib.mixins.NDArrayOperatorsMixin):
436
655
  def __getattr__(self, item):
437
656
  if item in [
438
657
  "dtype",
439
- "shape",
440
- "size",
441
658
  ]:
442
659
  return getattr(self.feat_obj, item)
443
660
  else:
@@ -11,7 +11,9 @@ class DCORAccessError(BaseException):
11
11
 
12
12
  class APIHandler:
13
13
  """Handles the DCOR api with caching for simple queries"""
14
- #: these are cached to minimize network usage
14
+ #: These are cached to minimize network usage
15
+ #: Note that we are not caching basins, since they may contain
16
+ #: expiring URLs.
15
17
  cache_queries = ["metadata", "size", "feature_list", "valid"]
16
18
  #: DCOR API Keys/Tokens in the current session
17
19
  api_keys = []
@@ -52,8 +54,36 @@ class APIHandler:
52
54
  if api_key.strip() and api_key not in APIHandler.api_keys:
53
55
  APIHandler.api_keys.append(api_key)
54
56
 
55
- def _get(self, query, feat=None, trace=None, event=None, api_key="",
56
- retries=13):
57
+ def _get(self,
58
+ query: str,
59
+ feat: str = None,
60
+ trace: str = None,
61
+ event: str = None,
62
+ api_key: str = "",
63
+ timeout: float = None,
64
+ retries: int = 5):
65
+ """Fetch information via the DCOR API
66
+
67
+ Parameters
68
+ ----------
69
+ query: str
70
+ API route
71
+ feat: str
72
+ DEPRECATED (use basins instead), adds f"&feature={feat}" to query
73
+ trace: str
74
+ DEPRECATED (use basins instead), adds f"&trace={trace}" to query
75
+ event: str
76
+ DEPRECATED (use basins instead), adds f"&event={event}" to query
77
+ api_key: str
78
+ DCOR API token to use
79
+ timeout: float
80
+ Request timeout
81
+ retries: int
82
+ Number of retries to fetch the request. For every retry, the
83
+ timeout is increased by two seconds.
84
+ """
85
+ if timeout is None:
86
+ timeout = 1
57
87
  # "version=2" introduced in dclab 0.54.3
58
88
  # (supported since ckanext.dc_serve 0.13.2)
59
89
  qstr = f"&version={self.dcserv_api_version}&query={query}"
@@ -65,13 +95,13 @@ class APIHandler:
65
95
  qstr += f"&event={event}"
66
96
  apicall = self.url + qstr
67
97
  fail_reasons = []
68
- for _ in range(retries):
98
+ for ii in range(retries):
69
99
  try:
70
100
  # try-except both requests and json conversion
71
101
  req = self.session.get(apicall,
72
102
  headers={"Authorization": api_key},
73
103
  verify=self.verify,
74
- timeout=1,
104
+ timeout=timeout + ii * 2,
75
105
  )
76
106
  jreq = req.json()
77
107
  except requests.urllib3.exceptions.ConnectionError: # requests
@@ -92,13 +122,45 @@ class APIHandler:
92
122
  f"Messages: {fail_reasons}")
93
123
  return jreq
94
124
 
95
- def get(self, query, feat=None, trace=None, event=None):
125
+ def get(self,
126
+ query: str,
127
+ feat: str = None,
128
+ trace: str = None,
129
+ event: str = None,
130
+ timeout: float = None,
131
+ retries: int = 5,
132
+ ):
133
+ """Fetch information from DCOR
134
+
135
+ Parameters
136
+ ----------
137
+ query: str
138
+ API route
139
+ feat: str
140
+ DEPRECATED (use basins instead), adds f"&feature={feat}" to query
141
+ trace: str
142
+ DEPRECATED (use basins instead), adds f"&trace={trace}" to query
143
+ event: str
144
+ DEPRECATED (use basins instead), adds f"&event={event}" to query
145
+ timeout: float
146
+ Request timeout
147
+ retries: int
148
+ Number of retries to fetch the request. For every retry, the
149
+ timeout is increased by two seconds.
150
+ """
96
151
  if query in APIHandler.cache_queries and query in self._cache:
97
152
  result = self._cache[query]
98
153
  else:
99
154
  req = {"error": {"message": "No access to API (api key?)"}}
100
155
  for api_key in [self.api_key] + APIHandler.api_keys:
101
- req = self._get(query, feat, trace, event, api_key)
156
+ req = self._get(query=query,
157
+ feat=feat,
158
+ trace=trace,
159
+ event=event,
160
+ api_key=api_key,
161
+ timeout=timeout,
162
+ retries=retries,
163
+ )
102
164
  if req["success"]:
103
165
  self.api_key = api_key # remember working key
104
166
  break
@@ -1,17 +1,23 @@
1
1
  """DCOR client interface"""
2
+ import logging
2
3
  import pathlib
3
4
  import re
5
+ import time
4
6
 
5
7
  from ...util import hashobj
6
8
 
7
9
  from ..config import Configuration
8
10
  from ..core import RTDCBase
11
+ from ..feat_basin import PerishableRecord
9
12
 
10
13
  from . import api
11
14
  from .logs import DCORLogs
12
15
  from .tables import DCORTables
13
16
 
14
17
 
18
+ logger = logging.getLogger(__name__)
19
+
20
+
15
21
  #: Append directories here where dclab should look for certificate bundles
16
22
  #: for a specific host. The directory should contain files named after the
17
23
  #: hostname, e.g. "dcor.mpl.mpg.de.cert".
@@ -73,6 +79,8 @@ class RTDC_DCOR(RTDCBase):
73
79
  super(RTDC_DCOR, self).__init__(*args, **kwargs)
74
80
 
75
81
  self._hash = None
82
+ self._cache_basin_dict = None
83
+ self.cache_basin_dict_time = 600
76
84
  self.path = RTDC_DCOR.get_full_url(url, use_ssl, host)
77
85
 
78
86
  if cert_path is None:
@@ -161,15 +169,106 @@ class RTDC_DCOR(RTDCBase):
161
169
  new_url = f"{scheme}://{netloc}/{api_path}"
162
170
  return new_url
163
171
 
164
- def basins_get_dicts(self):
165
- """Return list of dicts for all basins defined in `self.h5file`"""
172
+ def _basin_refresh(self, basin):
173
+ """Refresh the specified basin"""
174
+ # Retrieve the basin dictionary from DCOR
175
+ basin_dicts = self.basins_get_dicts()
176
+ for bn_dict in basin_dicts:
177
+ if bn_dict.get("name") == basin.name:
178
+ break
179
+ else:
180
+ raise ValueError(f"Basin '{basin.name}' not found in {self}")
181
+
182
+ tre = bn_dict["time_request"]
183
+ ttl = bn_dict["time_expiration"]
184
+ # remember time relative to time.time, subtract 30s to be on safe side
185
+ tex = bn_dict["time_local_request"] + (ttl - tre) - 30
186
+
187
+ if isinstance(basin.perishable, bool):
188
+ logger.debug("Initializing basin perishable %s", basin.name)
189
+ # create a perishable record
190
+ basin.perishable = PerishableRecord(
191
+ basin=basin,
192
+ expiration_func=self._basin_expiration,
193
+ expiration_kwargs={"time_local_expiration": tex},
194
+ refresh_func=self._basin_refresh,
195
+ )
196
+ else:
197
+ logger.debug("Refreshing basin perishable %s", basin.name)
198
+ # only update (this also works with weakref.ProxyType)
199
+ basin.perishable.expiration_kwargs = {"time_local_expiration": tex}
200
+
201
+ if len(bn_dict["urls"]) > 1:
202
+ logger.warning(f"Basin {basin.name} has multiple URLs. I am not "
203
+ f"checking their availability: {bn_dict}")
204
+ basin.location = bn_dict["urls"][0]
205
+
206
+ def _basin_expiration(self, basin, time_local_expiration):
207
+ """Check whether the basin has perished"""
208
+ return time_local_expiration < time.time()
209
+
210
+ def _basins_get_dicts(self):
166
211
  try:
167
- basins = self.api.get(query="basins")
212
+ basin_dicts = self.api.get(query="basins")
213
+ # Fill in missing timing information
214
+ for bn_dict in basin_dicts:
215
+ if (bn_dict.get("format") == "http"
216
+ and "perishable" not in bn_dict):
217
+ # We are communicating with an older version of
218
+ # ckanext-dc_serve. Take a look at the URL and check
219
+ # whether we have a perishable (~1 hour) URL or whether
220
+ # this is a public resource.
221
+ expires_regexp = re.compile(".*expires=([0-9]*)$")
222
+ for url in bn_dict.get("urls", []):
223
+ if match := expires_regexp.match(url.lower()):
224
+ logger.debug("Detected perishable basin: %s",
225
+ bn_dict["name"])
226
+ bn_dict["perishable"] = True
227
+ bn_dict["time_request"] = time.time()
228
+ bn_dict["time_expiration"] = int(match.group(1))
229
+ # add part of the resource ID to the name
230
+ infourl = url.split(bn_dict["name"], 1)[-1]
231
+ infourl = infourl.replace("/", "")
232
+ bn_dict["name"] += f"-{infourl[:5]}"
233
+ break
234
+ else:
235
+ bn_dict["perishable"] = False
236
+ # If we have a perishable basin, add the local request time
237
+ if bn_dict.get("perishable"):
238
+ bn_dict["time_local_request"] = time.time()
168
239
  except api.DCORAccessError:
169
240
  # TODO: Do not catch this exception when all DCOR instances
170
241
  # implement the 'basins' query.
171
242
  # This means that the server does not implement the 'basins' query.
172
- basins = []
243
+ basin_dicts = []
244
+ return basin_dicts
245
+
246
+ def basins_get_dicts(self):
247
+ """Return list of dicts for all basins defined on DCOR
248
+
249
+ The return value of this method is cached for 10 minutes
250
+ (cache time defined in the `cache_basin_dict_time` [s] property).
251
+ """
252
+ if (self._cache_basin_dict is None
253
+ or time.time() > (self._cache_basin_dict[1]
254
+ + self.cache_basin_dict_time)):
255
+ self._cache_basin_dict = (self._basins_get_dicts(), time.time())
256
+ return self._cache_basin_dict[0]
257
+
258
+ def basins_retrieve(self):
259
+ """Same as superclass, but add perishable information"""
260
+ basin_dicts = self.basins_get_dicts()
261
+ basins = super(RTDC_DCOR, self).basins_retrieve()
262
+ for bn in basins:
263
+ for bn_dict in basin_dicts:
264
+ if bn.name == bn_dict.get("name"):
265
+ # Determine whether we have to set a perishable record.
266
+ if bn_dict.get("perishable"):
267
+ # required for `_basin_refresh` to create a record
268
+ bn.perishable = True
269
+ # create the actual record
270
+ self._basin_refresh(bn)
271
+ break
173
272
  return basins
174
273
 
175
274