dclab 0.62.11__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dclab might be problematic. Click here for more details.

Files changed (137) hide show
  1. dclab/__init__.py +23 -0
  2. dclab/_version.py +16 -0
  3. dclab/cached.py +97 -0
  4. dclab/cli/__init__.py +10 -0
  5. dclab/cli/common.py +237 -0
  6. dclab/cli/task_compress.py +126 -0
  7. dclab/cli/task_condense.py +223 -0
  8. dclab/cli/task_join.py +229 -0
  9. dclab/cli/task_repack.py +98 -0
  10. dclab/cli/task_split.py +154 -0
  11. dclab/cli/task_tdms2rtdc.py +186 -0
  12. dclab/cli/task_verify_dataset.py +75 -0
  13. dclab/definitions/__init__.py +79 -0
  14. dclab/definitions/feat_const.py +202 -0
  15. dclab/definitions/feat_logic.py +183 -0
  16. dclab/definitions/meta_const.py +252 -0
  17. dclab/definitions/meta_logic.py +111 -0
  18. dclab/definitions/meta_parse.py +94 -0
  19. dclab/downsampling.cp313-win_amd64.pyd +0 -0
  20. dclab/downsampling.pyx +230 -0
  21. dclab/external/__init__.py +4 -0
  22. dclab/external/packaging/LICENSE +3 -0
  23. dclab/external/packaging/LICENSE.APACHE +177 -0
  24. dclab/external/packaging/LICENSE.BSD +23 -0
  25. dclab/external/packaging/__init__.py +6 -0
  26. dclab/external/packaging/_structures.py +61 -0
  27. dclab/external/packaging/version.py +505 -0
  28. dclab/external/skimage/LICENSE +28 -0
  29. dclab/external/skimage/__init__.py +2 -0
  30. dclab/external/skimage/_find_contours.py +216 -0
  31. dclab/external/skimage/_find_contours_cy.cp313-win_amd64.pyd +0 -0
  32. dclab/external/skimage/_find_contours_cy.pyx +188 -0
  33. dclab/external/skimage/_pnpoly.cp313-win_amd64.pyd +0 -0
  34. dclab/external/skimage/_pnpoly.pyx +99 -0
  35. dclab/external/skimage/_shared/__init__.py +1 -0
  36. dclab/external/skimage/_shared/geometry.cp313-win_amd64.pyd +0 -0
  37. dclab/external/skimage/_shared/geometry.pxd +6 -0
  38. dclab/external/skimage/_shared/geometry.pyx +55 -0
  39. dclab/external/skimage/measure.py +7 -0
  40. dclab/external/skimage/pnpoly.py +53 -0
  41. dclab/external/statsmodels/LICENSE +35 -0
  42. dclab/external/statsmodels/__init__.py +6 -0
  43. dclab/external/statsmodels/nonparametric/__init__.py +1 -0
  44. dclab/external/statsmodels/nonparametric/_kernel_base.py +203 -0
  45. dclab/external/statsmodels/nonparametric/kernel_density.py +165 -0
  46. dclab/external/statsmodels/nonparametric/kernels.py +36 -0
  47. dclab/features/__init__.py +9 -0
  48. dclab/features/bright.py +81 -0
  49. dclab/features/bright_bc.py +93 -0
  50. dclab/features/bright_perc.py +63 -0
  51. dclab/features/contour.py +161 -0
  52. dclab/features/emodulus/__init__.py +339 -0
  53. dclab/features/emodulus/load.py +252 -0
  54. dclab/features/emodulus/lut_HE-2D-FEM-22.txt +16432 -0
  55. dclab/features/emodulus/lut_HE-3D-FEM-22.txt +1276 -0
  56. dclab/features/emodulus/lut_LE-2D-FEM-19.txt +13082 -0
  57. dclab/features/emodulus/pxcorr.py +135 -0
  58. dclab/features/emodulus/scale_linear.py +247 -0
  59. dclab/features/emodulus/viscosity.py +256 -0
  60. dclab/features/fl_crosstalk.py +95 -0
  61. dclab/features/inert_ratio.py +377 -0
  62. dclab/features/volume.py +242 -0
  63. dclab/http_utils.py +322 -0
  64. dclab/isoelastics/__init__.py +468 -0
  65. dclab/isoelastics/iso_HE-2D-FEM-22-area_um-deform.txt +2440 -0
  66. dclab/isoelastics/iso_HE-2D-FEM-22-volume-deform.txt +2635 -0
  67. dclab/isoelastics/iso_HE-3D-FEM-22-area_um-deform.txt +1930 -0
  68. dclab/isoelastics/iso_HE-3D-FEM-22-volume-deform.txt +2221 -0
  69. dclab/isoelastics/iso_LE-2D-FEM-19-area_um-deform.txt +2151 -0
  70. dclab/isoelastics/iso_LE-2D-FEM-19-volume-deform.txt +2250 -0
  71. dclab/isoelastics/iso_LE-2D-ana-18-area_um-deform.txt +1266 -0
  72. dclab/kde_contours.py +222 -0
  73. dclab/kde_methods.py +303 -0
  74. dclab/lme4/__init__.py +5 -0
  75. dclab/lme4/lme4_template.R +94 -0
  76. dclab/lme4/rsetup.py +204 -0
  77. dclab/lme4/wrapr.py +386 -0
  78. dclab/polygon_filter.py +398 -0
  79. dclab/rtdc_dataset/__init__.py +15 -0
  80. dclab/rtdc_dataset/check.py +902 -0
  81. dclab/rtdc_dataset/config.py +533 -0
  82. dclab/rtdc_dataset/copier.py +353 -0
  83. dclab/rtdc_dataset/core.py +1001 -0
  84. dclab/rtdc_dataset/export.py +737 -0
  85. dclab/rtdc_dataset/feat_anc_core/__init__.py +24 -0
  86. dclab/rtdc_dataset/feat_anc_core/af_basic.py +75 -0
  87. dclab/rtdc_dataset/feat_anc_core/af_emodulus.py +160 -0
  88. dclab/rtdc_dataset/feat_anc_core/af_fl_max_ctc.py +133 -0
  89. dclab/rtdc_dataset/feat_anc_core/af_image_contour.py +113 -0
  90. dclab/rtdc_dataset/feat_anc_core/af_ml_class.py +102 -0
  91. dclab/rtdc_dataset/feat_anc_core/ancillary_feature.py +320 -0
  92. dclab/rtdc_dataset/feat_anc_ml/__init__.py +32 -0
  93. dclab/rtdc_dataset/feat_anc_plugin/__init__.py +3 -0
  94. dclab/rtdc_dataset/feat_anc_plugin/plugin_feature.py +329 -0
  95. dclab/rtdc_dataset/feat_basin.py +550 -0
  96. dclab/rtdc_dataset/feat_temp.py +102 -0
  97. dclab/rtdc_dataset/filter.py +263 -0
  98. dclab/rtdc_dataset/fmt_dcor/__init__.py +7 -0
  99. dclab/rtdc_dataset/fmt_dcor/access_token.py +52 -0
  100. dclab/rtdc_dataset/fmt_dcor/api.py +111 -0
  101. dclab/rtdc_dataset/fmt_dcor/base.py +200 -0
  102. dclab/rtdc_dataset/fmt_dcor/basin.py +73 -0
  103. dclab/rtdc_dataset/fmt_dcor/logs.py +26 -0
  104. dclab/rtdc_dataset/fmt_dcor/tables.py +42 -0
  105. dclab/rtdc_dataset/fmt_dict.py +103 -0
  106. dclab/rtdc_dataset/fmt_hdf5/__init__.py +6 -0
  107. dclab/rtdc_dataset/fmt_hdf5/base.py +192 -0
  108. dclab/rtdc_dataset/fmt_hdf5/basin.py +30 -0
  109. dclab/rtdc_dataset/fmt_hdf5/events.py +257 -0
  110. dclab/rtdc_dataset/fmt_hdf5/feat_defect.py +164 -0
  111. dclab/rtdc_dataset/fmt_hdf5/logs.py +33 -0
  112. dclab/rtdc_dataset/fmt_hdf5/tables.py +30 -0
  113. dclab/rtdc_dataset/fmt_hierarchy/__init__.py +11 -0
  114. dclab/rtdc_dataset/fmt_hierarchy/base.py +278 -0
  115. dclab/rtdc_dataset/fmt_hierarchy/events.py +146 -0
  116. dclab/rtdc_dataset/fmt_hierarchy/hfilter.py +140 -0
  117. dclab/rtdc_dataset/fmt_hierarchy/mapper.py +134 -0
  118. dclab/rtdc_dataset/fmt_http.py +102 -0
  119. dclab/rtdc_dataset/fmt_s3.py +320 -0
  120. dclab/rtdc_dataset/fmt_tdms/__init__.py +476 -0
  121. dclab/rtdc_dataset/fmt_tdms/event_contour.py +264 -0
  122. dclab/rtdc_dataset/fmt_tdms/event_image.py +220 -0
  123. dclab/rtdc_dataset/fmt_tdms/event_mask.py +62 -0
  124. dclab/rtdc_dataset/fmt_tdms/event_trace.py +146 -0
  125. dclab/rtdc_dataset/fmt_tdms/exc.py +37 -0
  126. dclab/rtdc_dataset/fmt_tdms/naming.py +151 -0
  127. dclab/rtdc_dataset/load.py +72 -0
  128. dclab/rtdc_dataset/writer.py +985 -0
  129. dclab/statistics.py +203 -0
  130. dclab/util.py +156 -0
  131. dclab/warn.py +15 -0
  132. dclab-0.62.11.dist-info/LICENSE +343 -0
  133. dclab-0.62.11.dist-info/METADATA +146 -0
  134. dclab-0.62.11.dist-info/RECORD +137 -0
  135. dclab-0.62.11.dist-info/WHEEL +5 -0
  136. dclab-0.62.11.dist-info/entry_points.txt +8 -0
  137. dclab-0.62.11.dist-info/top_level.txt +1 -0
@@ -0,0 +1,263 @@
1
+ """RT-DC dataset core classes and methods"""
2
+
3
+ import warnings
4
+
5
+ import numpy as np
6
+
7
+ from dclab import definitions as dfn
8
+
9
+ from .. import downsampling
10
+ from ..polygon_filter import PolygonFilter
11
+
12
+
13
+ class NanWarning(UserWarning):
14
+ pass
15
+
16
+
17
+ class Filter(object):
18
+ def __init__(self, rtdc_ds):
19
+ """Boolean filter arrays for RT-DC measurements
20
+
21
+ Parameters
22
+ ----------
23
+ rtdc_ds: instance of RTDCBase
24
+ The RT-DC dataset the filter applies to
25
+ """
26
+ # dictionary of boolean array for box filters
27
+ self._box_filters = {}
28
+ # dictionary of (hash, boolean array) for polygon filters
29
+ self._poly_filters = {}
30
+ # dictionary of all internal property filters
31
+ self._array_props = {}
32
+ # initialize important parameters
33
+ self._init_rtdc_ds(rtdc_ds)
34
+ # initialize properties
35
+ self.reset()
36
+
37
+ def __getitem__(self, key):
38
+ """Return the filter for a feature in `self.features`"""
39
+ if key in self.features and dfn.scalar_feature_exists(key):
40
+ if key not in self._box_filters:
41
+ # Generate filters on-the-fly
42
+ self._box_filters[key] = np.ones(self.size, dtype=bool)
43
+ else:
44
+ raise KeyError("Feature not available: '{}'".format(key))
45
+ return self._box_filters[key]
46
+
47
+ @property
48
+ def all(self):
49
+ """All filters combined (see :func:`Filter.update`)
50
+
51
+ Use this property to filter the features of
52
+ :class:`dclab.rtdc_dataset.RTDCBase` instances
53
+ """
54
+ return self._get_ro_array("all")
55
+
56
+ @property
57
+ def box(self):
58
+ """All box filters"""
59
+ return self._get_ro_array("box")
60
+
61
+ @property
62
+ def polygon(self):
63
+ """Polygon filters"""
64
+ return self._get_ro_array("polygon")
65
+
66
+ @property
67
+ def invalid(self):
68
+ """Invalid (nan/inf) events"""
69
+ return self._get_ro_array("invalid")
70
+
71
+ def _get_ro_array(self, key):
72
+ view = self._get_rw_array(key).view()
73
+ view.flags.writeable = False
74
+ return view
75
+
76
+ def _get_rw_array(self, key):
77
+ if key not in self._array_props:
78
+ self._array_props[key] = np.ones(self.size, dtype=bool)
79
+ return self._array_props[key]
80
+
81
+ def _init_rtdc_ds(self, rtdc_ds):
82
+ #: Available feature names
83
+ self.features = rtdc_ds.features_scalar
84
+ if hasattr(self, "size") and self.size != len(rtdc_ds):
85
+ raise ValueError("Change of RTDCBase size not supported!")
86
+ self.size = len(rtdc_ds)
87
+ # determine box filters that have been removed
88
+ for key in list(self._box_filters.keys()):
89
+ if key not in self.features:
90
+ self._box_filters.pop(key)
91
+ # determine polygon filters that have been removed
92
+ for pf_id in list(self._poly_filters.keys()):
93
+ pf = PolygonFilter.get_instance_from_id(pf_id)
94
+ if (pf_id in rtdc_ds.config["filtering"]["polygon filters"]
95
+ and pf.axes[0] in self.features
96
+ and pf.axes[1] in self.features):
97
+ pass
98
+ else:
99
+ # filter has been removed
100
+ self._poly_filters.pop(pf_id)
101
+
102
+ def reset(self):
103
+ """Reset all filters"""
104
+ self._box_filters.clear()
105
+ self._poly_filters.clear()
106
+ self._array_props.clear()
107
+ #: 1D boolean array for manually excluding events; `False` values
108
+ #: are excluded.
109
+ self.manual = np.ones(self.size, dtype=bool)
110
+ # old filter configuration of `rtdc_ds`
111
+ self._old_config = {}
112
+
113
+ def update(self, rtdc_ds, force=None):
114
+ """Update the filters according to `rtdc_ds.config["filtering"]`
115
+
116
+ Parameters
117
+ ----------
118
+ rtdc_ds: dclab.rtdc_dataset.core.RTDCBase
119
+ The measurement to which the filter is applied
120
+ force : list
121
+ A list of feature names that must be refiltered with
122
+ min/max values.
123
+
124
+ Notes
125
+ -----
126
+ This function is called when
127
+ :func:`ds.apply_filter <dclab.rtdc_dataset.RTDCBase.apply_filter>`
128
+ is called.
129
+ """
130
+ if force is None:
131
+ force = []
132
+ # re-initialize important parameters
133
+ self._init_rtdc_ds(rtdc_ds)
134
+
135
+ # These lists may help us become very fast in the future
136
+ newkeys = []
137
+ oldvals = []
138
+ newvals = []
139
+
140
+ cfg_cur = rtdc_ds.config["filtering"]
141
+ cfg_old = self._old_config
142
+
143
+ # Determine which data was updated
144
+ for skey in list(cfg_cur.keys()):
145
+ if cfg_cur[skey] != cfg_old.get(skey, None):
146
+ newkeys.append(skey)
147
+ oldvals.append(cfg_old.get(skey, None))
148
+ newvals.append(cfg_cur[skey])
149
+
150
+ # 1. Invalid filters
151
+ arr_invalid = self._get_rw_array("invalid")
152
+ arr_invalid[:] = True
153
+ if cfg_cur["remove invalid events"]:
154
+ for feat in self.features:
155
+ data = rtdc_ds[feat]
156
+ invalid = np.isinf(data) | np.isnan(data)
157
+ arr_invalid &= ~invalid
158
+
159
+ # 2. Filter all feature min/max values.
160
+ feat2filter = []
161
+ for k in newkeys:
162
+ # k[:-4] because we want to crop " min" and " max"
163
+ if (dfn.scalar_feature_exists(k[:-4])
164
+ and (k.endswith(" min") or k.endswith(" max"))):
165
+ feat2filter.append(k[:-4])
166
+
167
+ for f in force:
168
+ # add forced features
169
+ if dfn.scalar_feature_exists(f):
170
+ feat2filter.append(f)
171
+ else:
172
+ # Make sure the feature name is valid.
173
+ raise ValueError("Unknown scalar feature name '{}'!".format(f))
174
+
175
+ feat2filter = np.unique(feat2filter)
176
+
177
+ for feat in feat2filter:
178
+ fstart = feat + " min"
179
+ fend = feat + " max"
180
+ must_be_filtered = (fstart in cfg_cur
181
+ and fend in cfg_cur
182
+ and cfg_cur[fstart] != cfg_cur[fend])
183
+ if ((fstart in cfg_cur and fend not in cfg_cur)
184
+ or (fstart not in cfg_cur and fend in cfg_cur)):
185
+ # User is responsible for setting min and max values!
186
+ raise ValueError("Box filter: Please make sure that both "
187
+ "'{}' and '{}' are set!".format(fstart, fend))
188
+ if feat in self.features:
189
+ # Get the current feature filter
190
+ feat_filt = self[feat]
191
+ feat_filt[:] = True
192
+ # If min and max exist and if they are not identical:
193
+ if must_be_filtered:
194
+ ivalstart = cfg_cur[fstart]
195
+ ivalend = cfg_cur[fend]
196
+ if ivalstart > ivalend:
197
+ msg = "inverting filter: {} > {}".format(fstart, fend)
198
+ warnings.warn(msg)
199
+ ivalstart, ivalend = ivalend, ivalstart
200
+ data = rtdc_ds[feat]
201
+ # treat nan-values in a special way
202
+ disnan = np.isnan(data)
203
+ if np.sum(disnan):
204
+ # this avoids RuntimeWarnings (invalid value
205
+ # encountered due to nan-values)
206
+ feat_filt[disnan] = False
207
+ idx = ~disnan
208
+ if not cfg_cur["remove invalid events"]:
209
+ msg = "Feature '{}' contains ".format(feat) \
210
+ + "nan-values! Box filters remove those."
211
+ warnings.warn(msg, NanWarning)
212
+ else:
213
+ idx = slice(0, self.size) # place-holder for [:]
214
+ feat_filt[idx] &= ivalstart <= data[idx]
215
+ feat_filt[idx] &= data[idx] <= ivalend
216
+ elif must_be_filtered:
217
+ warnings.warn("Dataset '{}' does ".format(rtdc_ds.identifier)
218
+ + "not contain the feature '{}'! ".format(feat)
219
+ + "A box filter has been ignored.")
220
+ # store box filters
221
+ arr_box = self._get_rw_array("box")
222
+ arr_box[:] = True
223
+ for feat in self._box_filters:
224
+ arr_box &= self._box_filters[feat]
225
+
226
+ # 3. Filter with polygon filters
227
+ # check if something has changed
228
+ # perform polygon filtering
229
+ for pf_id in cfg_cur["polygon filters"]:
230
+ pf = PolygonFilter.get_instance_from_id(pf_id)
231
+ if (pf_id not in self._poly_filters
232
+ or pf.hash != self._poly_filters[pf_id][0]):
233
+ datax = rtdc_ds[pf.axes[0]]
234
+ datay = rtdc_ds[pf.axes[1]]
235
+ self._poly_filters[pf_id] = (pf.hash, pf.filter(datax, datay))
236
+ # store polygon filters
237
+ arr_polygon = self._get_rw_array("polygon")
238
+ arr_polygon[:] = True
239
+ for pf_id in self._poly_filters:
240
+ arr_polygon &= self._poly_filters[pf_id][1]
241
+
242
+ # 4. Finally combine all filters and apply "limit events"
243
+ # get a list of all filters
244
+ arr_all = self._get_rw_array("all")
245
+ if cfg_cur["enable filters"]:
246
+ arr_all[:] = arr_box & arr_invalid & arr_polygon & self.manual
247
+
248
+ # Filter with configuration keyword argument "limit events".
249
+ # This additional step limits the total number of events in
250
+ # self.all.
251
+ if cfg_cur["limit events"] > 0:
252
+ limit = cfg_cur["limit events"]
253
+ sub = arr_all[arr_all]
254
+ _, idx = downsampling.downsample_rand(sub,
255
+ samples=limit,
256
+ ret_idx=True)
257
+ sub[~idx] = False
258
+ arr_all[arr_all] = sub
259
+ else:
260
+ arr_all[:] = True
261
+
262
+ # Actual filtering is then done during plotting
263
+ self._old_config = rtdc_ds.config.copy()["filtering"]
@@ -0,0 +1,7 @@
1
+ # flake8: noqa: F401
2
+ """DCOR client interface"""
3
+ from .api import REQUESTS_AVAILABLE
4
+ from .base import (
5
+ DCOR_CERTS_SEARCH_PATHS, RTDC_DCOR, get_server_cert_path, is_dcor_url
6
+ )
7
+ from .basin import DCORBasin
@@ -0,0 +1,52 @@
1
+ """DCOR-med access token (SSL certificate + CKAN token)"""
2
+ import pathlib
3
+ import ssl
4
+ import tempfile
5
+ import zipfile
6
+
7
+
8
+ def get_api_key(access_token_path, password):
9
+ """Extract the API key / API token from an encrypted DCOR access token"""
10
+ if isinstance(password, str):
11
+ password = password.encode("utf-8")
12
+ with zipfile.ZipFile(access_token_path) as arc:
13
+ api_key = arc.read("api_key.txt", pwd=password).decode().strip()
14
+ return api_key
15
+
16
+
17
+ def get_certificate(access_token_path, password):
18
+ """Extract the certificate bundle from an encrypted DCOR access token"""
19
+ if isinstance(password, str):
20
+ password = password.encode("utf-8")
21
+ with zipfile.ZipFile(access_token_path) as arc:
22
+ cert_data = arc.read("server.cert", pwd=password)
23
+ return cert_data
24
+
25
+
26
+ def get_hostname(access_token_path, password):
27
+ """Extract the hostname from an encrypted DCOR access token"""
28
+ cert_data = get_certificate(access_token_path, password)
29
+ with tempfile.TemporaryDirectory(prefix="dcoraid_access_token_") as td:
30
+ cfile = pathlib.Path(td) / "server.cert"
31
+ cfile.write_bytes(cert_data)
32
+ # Dear future-self,
33
+ #
34
+ # I know that this will probably not have been a good solution.
35
+ # Anyway, I still decided to use this private function from the
36
+ # built-in ssh module to avoid additional dependencies. Just so
37
+ # you know: If you happen to be in trouble now because of this,
38
+ # bear in mind that you had enough time to at least ask for the
39
+ # functionality to be implemented in the requests library. Look
40
+ # how I kept the lines all the same length!
41
+ #
42
+ # Cheers,
43
+ # Paul
44
+ cert_dict = ssl._ssl._test_decode_cert(str(cfile))
45
+ # get the common name
46
+ for ((key, value),) in cert_dict["subject"]:
47
+ if key == "commonName":
48
+ hostname = value.strip()
49
+ break
50
+ else:
51
+ raise KeyError("Could not extract hostname from certificate!")
52
+ return hostname
@@ -0,0 +1,111 @@
1
+ import json
2
+ import time
3
+
4
+ from ...http_utils import REQUESTS_AVAILABLE # noqa: F401
5
+ from ...http_utils import requests, session_cache
6
+
7
+
8
+ class DCORAccessError(BaseException):
9
+ pass
10
+
11
+
12
+ class APIHandler:
13
+ """Handles the DCOR api with caching for simple queries"""
14
+ #: these are cached to minimize network usage
15
+ cache_queries = ["metadata", "size", "feature_list", "valid"]
16
+ #: DCOR API Keys/Tokens in the current session
17
+ api_keys = []
18
+
19
+ def __init__(self, url, api_key="", cert_path=None, dcserv_api_version=2):
20
+ """
21
+
22
+ Parameters
23
+ ----------
24
+ url: str
25
+ URL to DCOR API
26
+ api_key: str
27
+ DCOR API token
28
+ cert_path: pathlib.Path
29
+ the path to the server's CA bundle; by default this
30
+ will use the default certificates (which depends on
31
+ from where you obtained certifi/requests)
32
+ """
33
+ #: DCOR API URL
34
+ self.url = url
35
+ #: keyword argument to :func:`requests.request`
36
+ self.verify = cert_path or True
37
+ #: DCOR API Token
38
+ self.api_key = api_key
39
+ #: ckanext-dc_serve dcserv API version
40
+ self.dcserv_api_version = dcserv_api_version
41
+ #: create a session
42
+ self.session = session_cache.get_session(url)
43
+ self._cache = {}
44
+
45
+ @classmethod
46
+ def add_api_key(cls, api_key):
47
+ """Add an API Key/Token to the base class
48
+
49
+ When accessing the DCOR API, all available API Keys/Tokens are
50
+ used to access a resource (trial and error).
51
+ """
52
+ if api_key.strip() and api_key not in APIHandler.api_keys:
53
+ APIHandler.api_keys.append(api_key)
54
+
55
+ def _get(self, query, feat=None, trace=None, event=None, api_key="",
56
+ retries=13):
57
+ # "version=2" introduced in dclab 0.54.3
58
+ # (supported since ckanext.dc_serve 0.13.2)
59
+ qstr = f"&version={self.dcserv_api_version}&query={query}"
60
+ if feat is not None:
61
+ qstr += f"&feature={feat}"
62
+ if trace is not None:
63
+ qstr += f"&trace={trace}"
64
+ if event is not None:
65
+ qstr += f"&event={event}"
66
+ apicall = self.url + qstr
67
+ fail_reasons = []
68
+ for _ in range(retries):
69
+ try:
70
+ # try-except both requests and json conversion
71
+ req = self.session.get(apicall,
72
+ headers={"Authorization": api_key},
73
+ verify=self.verify,
74
+ timeout=1,
75
+ )
76
+ jreq = req.json()
77
+ except requests.urllib3.exceptions.ConnectionError: # requests
78
+ fail_reasons.append("connection problem")
79
+ continue
80
+ except (requests.urllib3.exceptions.ReadTimeoutError,
81
+ requests.exceptions.ConnectTimeout): # requests
82
+ fail_reasons.append("timeout")
83
+ except json.decoder.JSONDecodeError: # json
84
+ fail_reasons.append("invalid json")
85
+ time.sleep(1) # wait a bit, maybe the server is overloaded
86
+ continue
87
+ else:
88
+ break
89
+ else:
90
+ raise DCORAccessError(f"Could not complete query '{apicall}'. "
91
+ f"I retried {retries} times. "
92
+ f"Messages: {fail_reasons}")
93
+ return jreq
94
+
95
+ def get(self, query, feat=None, trace=None, event=None):
96
+ if query in APIHandler.cache_queries and query in self._cache:
97
+ result = self._cache[query]
98
+ else:
99
+ req = {"error": {"message": "No access to API (api key?)"}}
100
+ for api_key in [self.api_key] + APIHandler.api_keys:
101
+ req = self._get(query, feat, trace, event, api_key)
102
+ if req["success"]:
103
+ self.api_key = api_key # remember working key
104
+ break
105
+ else:
106
+ raise DCORAccessError(
107
+ f"Cannot access {query}: {req['error']['message']}")
108
+ result = req["result"]
109
+ if query in APIHandler.cache_queries:
110
+ self._cache[query] = result
111
+ return result
@@ -0,0 +1,200 @@
1
+ """DCOR client interface"""
2
+ import pathlib
3
+ import re
4
+
5
+ from ...util import hashobj
6
+
7
+ from ..config import Configuration
8
+ from ..core import RTDCBase
9
+
10
+ from . import api
11
+ from .logs import DCORLogs
12
+ from .tables import DCORTables
13
+
14
+
15
+ #: Append directories here where dclab should look for certificate bundles
16
+ #: for a specific host. The directory should contain files named after the
17
+ #: hostname, e.g. "dcor.mpl.mpg.de.cert".
18
+ DCOR_CERTS_SEARCH_PATHS = []
19
+
20
+ #: Regular expression for matching a DCOR resource URL
21
+ REGEXP_DCOR_URL = re.compile(
22
+ r"^(https?:\/\/)?" # scheme
23
+ r"([a-z0-9-\.]*\/?api\/3\/action\/dcserv\?id=)?" # host with API
24
+ r"[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}$") # id
25
+
26
+
27
+ class RTDC_DCOR(RTDCBase):
28
+ def __init__(self, url, host="dcor.mpl.mpg.de", api_key="",
29
+ use_ssl=None, cert_path=None, dcserv_api_version=2,
30
+ *args, **kwargs):
31
+ """Wrap around the DCOR API
32
+
33
+ Parameters
34
+ ----------
35
+ url: str
36
+ Full URL or resource identifier; valid values are
37
+
38
+ - `<https://dcor.mpl.mpg.de/api/3/action/dcserv?id=
39
+ b1404eb5-f661-4920-be79-5ff4e85915d5>`_
40
+ - dcor.mpl.mpg.de/api/3/action/dcserv?id=b1404eb5-f
41
+ 661-4920-be79-5ff4e85915d5
42
+ - b1404eb5-f661-4920-be79-5ff4e85915d5
43
+ host: str
44
+ The default host machine used if the host is not given in `url`
45
+ api_key: str
46
+ API key to access private resources
47
+ use_ssl: bool
48
+ Set this to False to disable SSL (should only be used for
49
+ testing). Defaults to None (does not force SSL if the URL
50
+ starts with "http://").
51
+ cert_path: pathlib.Path
52
+ The (optional) path to a server CA bundle; this should only
53
+ be necessary for DCOR instances in the intranet with a custom
54
+ CA or for certificate pinning.
55
+ dcserv_api_version: int
56
+ Version of the dcserv API to use. In version 0.13.2 of
57
+ ckanext-dc_serve, version 2 was introduced which entails
58
+ serving an S3-basin-only dataset.
59
+ *args:
60
+ Arguments for `RTDCBase`
61
+ **kwargs:
62
+ Keyword arguments for `RTDCBase`
63
+
64
+ Attributes
65
+ ----------
66
+ path: str
67
+ Full URL to the DCOR resource
68
+ """
69
+ if not api.REQUESTS_AVAILABLE:
70
+ raise ModuleNotFoundError(
71
+ "Package `requests` required for DCOR format!")
72
+
73
+ super(RTDC_DCOR, self).__init__(*args, **kwargs)
74
+
75
+ self._hash = None
76
+ self.path = RTDC_DCOR.get_full_url(url, use_ssl, host)
77
+
78
+ if cert_path is None:
79
+ cert_path = get_server_cert_path(get_host_from_url(self.path))
80
+
81
+ self.api = api.APIHandler(url=self.path,
82
+ api_key=api_key,
83
+ cert_path=cert_path,
84
+ dcserv_api_version=dcserv_api_version)
85
+
86
+ # Parse configuration
87
+ self.config = Configuration(cfg=self.api.get(query="metadata"))
88
+
89
+ # Lazy logs
90
+ self.logs = DCORLogs(self.api)
91
+
92
+ # Lazy tables
93
+ self.tables = DCORTables(self.api)
94
+
95
+ # Get size
96
+ size = self.config["experiment"].get("event count")
97
+ if size is None:
98
+ size = int(self.api.get(query="size"))
99
+ self._size = size
100
+
101
+ self.title = f"{self.config['experiment']['sample']} - " \
102
+ + f"M{self.config['experiment']['run index']}"
103
+
104
+ def __len__(self):
105
+ return self._size
106
+
107
+ @property
108
+ def hash(self):
109
+ """Hash value based on file name and content"""
110
+ if self._hash is None:
111
+ tohash = [self.path]
112
+ self._hash = hashobj(tohash)
113
+ return self._hash
114
+
115
+ @staticmethod
116
+ def get_full_url(url, use_ssl, host=None):
117
+ """Return the full URL to a DCOR resource
118
+
119
+ Parameters
120
+ ----------
121
+ url: str
122
+ Full URL or resource identifier; valid values are
123
+
124
+ - https://dcor.mpl.mpg.de/api/3/action/dcserv?id=caab96f6-
125
+ df12-4299-aa2e-089e390aafd5'
126
+ - dcor.mpl.mpg.de/api/3/action/dcserv?id=caab96f6-df12-
127
+ 4299-aa2e-089e390aafd5
128
+ - caab96f6-df12-4299-aa2e-089e390aafd5
129
+ use_ssl: bool or None
130
+ Set this to False to disable SSL (should only be used for
131
+ testing). Defaults to None (does not force SSL if the URL
132
+ starts with "http://").
133
+ host: str
134
+ Use this host if it is not specified in `url`
135
+ """
136
+ if use_ssl is None:
137
+ if url.startswith("http://"):
138
+ # user wanted it that way
139
+ scheme = "http"
140
+ else:
141
+ scheme = "https"
142
+ elif use_ssl:
143
+ scheme = "https"
144
+ else:
145
+ scheme = "http"
146
+ if url.count("://"):
147
+ base = url.split("://", 1)[1]
148
+ else:
149
+ base = url
150
+ # determine the api_path and the netloc
151
+ if base.count("/"):
152
+ netloc, api_path = base.split("/", 1)
153
+ else:
154
+ netloc = None # default to `host`
155
+ api_path = "api/3/action/dcserv?id=" + base
156
+ # remove https from host string (user convenience)
157
+ if host is not None:
158
+ host = host.split("://")[-1]
159
+
160
+ netloc = host if netloc is None else netloc
161
+ new_url = f"{scheme}://{netloc}/{api_path}"
162
+ return new_url
163
+
164
+ def basins_get_dicts(self):
165
+ """Return list of dicts for all basins defined in `self.h5file`"""
166
+ try:
167
+ basins = self.api.get(query="basins")
168
+ except api.DCORAccessError:
169
+ # TODO: Do not catch this exception when all DCOR instances
170
+ # implement the 'basins' query.
171
+ # This means that the server does not implement the 'basins' query.
172
+ basins = []
173
+ return basins
174
+
175
+
176
+ def get_host_from_url(url):
177
+ """Extract the hostname from a URL"""
178
+ return url.split("://")[1].split("/")[0]
179
+
180
+
181
+ def get_server_cert_path(host):
182
+ """Return server certificate bundle for DCOR `host`"""
183
+
184
+ for path in DCOR_CERTS_SEARCH_PATHS:
185
+ path = pathlib.Path(path)
186
+ cert_path = path / f"{host}.cert"
187
+ if cert_path.exists():
188
+ break
189
+ else:
190
+ # use default certificate bundle
191
+ cert_path = api.requests.certs.where()
192
+
193
+ return cert_path
194
+
195
+
196
+ def is_dcor_url(string):
197
+ if not isinstance(string, str):
198
+ return False
199
+ else:
200
+ return REGEXP_DCOR_URL.match(string.strip())