dclab 0.62.11__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dclab might be problematic. Click here for more details.
- dclab/__init__.py +23 -0
- dclab/_version.py +16 -0
- dclab/cached.py +97 -0
- dclab/cli/__init__.py +10 -0
- dclab/cli/common.py +237 -0
- dclab/cli/task_compress.py +126 -0
- dclab/cli/task_condense.py +223 -0
- dclab/cli/task_join.py +229 -0
- dclab/cli/task_repack.py +98 -0
- dclab/cli/task_split.py +154 -0
- dclab/cli/task_tdms2rtdc.py +186 -0
- dclab/cli/task_verify_dataset.py +75 -0
- dclab/definitions/__init__.py +79 -0
- dclab/definitions/feat_const.py +202 -0
- dclab/definitions/feat_logic.py +183 -0
- dclab/definitions/meta_const.py +252 -0
- dclab/definitions/meta_logic.py +111 -0
- dclab/definitions/meta_parse.py +94 -0
- dclab/downsampling.cp313-win_amd64.pyd +0 -0
- dclab/downsampling.pyx +230 -0
- dclab/external/__init__.py +4 -0
- dclab/external/packaging/LICENSE +3 -0
- dclab/external/packaging/LICENSE.APACHE +177 -0
- dclab/external/packaging/LICENSE.BSD +23 -0
- dclab/external/packaging/__init__.py +6 -0
- dclab/external/packaging/_structures.py +61 -0
- dclab/external/packaging/version.py +505 -0
- dclab/external/skimage/LICENSE +28 -0
- dclab/external/skimage/__init__.py +2 -0
- dclab/external/skimage/_find_contours.py +216 -0
- dclab/external/skimage/_find_contours_cy.cp313-win_amd64.pyd +0 -0
- dclab/external/skimage/_find_contours_cy.pyx +188 -0
- dclab/external/skimage/_pnpoly.cp313-win_amd64.pyd +0 -0
- dclab/external/skimage/_pnpoly.pyx +99 -0
- dclab/external/skimage/_shared/__init__.py +1 -0
- dclab/external/skimage/_shared/geometry.cp313-win_amd64.pyd +0 -0
- dclab/external/skimage/_shared/geometry.pxd +6 -0
- dclab/external/skimage/_shared/geometry.pyx +55 -0
- dclab/external/skimage/measure.py +7 -0
- dclab/external/skimage/pnpoly.py +53 -0
- dclab/external/statsmodels/LICENSE +35 -0
- dclab/external/statsmodels/__init__.py +6 -0
- dclab/external/statsmodels/nonparametric/__init__.py +1 -0
- dclab/external/statsmodels/nonparametric/_kernel_base.py +203 -0
- dclab/external/statsmodels/nonparametric/kernel_density.py +165 -0
- dclab/external/statsmodels/nonparametric/kernels.py +36 -0
- dclab/features/__init__.py +9 -0
- dclab/features/bright.py +81 -0
- dclab/features/bright_bc.py +93 -0
- dclab/features/bright_perc.py +63 -0
- dclab/features/contour.py +161 -0
- dclab/features/emodulus/__init__.py +339 -0
- dclab/features/emodulus/load.py +252 -0
- dclab/features/emodulus/lut_HE-2D-FEM-22.txt +16432 -0
- dclab/features/emodulus/lut_HE-3D-FEM-22.txt +1276 -0
- dclab/features/emodulus/lut_LE-2D-FEM-19.txt +13082 -0
- dclab/features/emodulus/pxcorr.py +135 -0
- dclab/features/emodulus/scale_linear.py +247 -0
- dclab/features/emodulus/viscosity.py +256 -0
- dclab/features/fl_crosstalk.py +95 -0
- dclab/features/inert_ratio.py +377 -0
- dclab/features/volume.py +242 -0
- dclab/http_utils.py +322 -0
- dclab/isoelastics/__init__.py +468 -0
- dclab/isoelastics/iso_HE-2D-FEM-22-area_um-deform.txt +2440 -0
- dclab/isoelastics/iso_HE-2D-FEM-22-volume-deform.txt +2635 -0
- dclab/isoelastics/iso_HE-3D-FEM-22-area_um-deform.txt +1930 -0
- dclab/isoelastics/iso_HE-3D-FEM-22-volume-deform.txt +2221 -0
- dclab/isoelastics/iso_LE-2D-FEM-19-area_um-deform.txt +2151 -0
- dclab/isoelastics/iso_LE-2D-FEM-19-volume-deform.txt +2250 -0
- dclab/isoelastics/iso_LE-2D-ana-18-area_um-deform.txt +1266 -0
- dclab/kde_contours.py +222 -0
- dclab/kde_methods.py +303 -0
- dclab/lme4/__init__.py +5 -0
- dclab/lme4/lme4_template.R +94 -0
- dclab/lme4/rsetup.py +204 -0
- dclab/lme4/wrapr.py +386 -0
- dclab/polygon_filter.py +398 -0
- dclab/rtdc_dataset/__init__.py +15 -0
- dclab/rtdc_dataset/check.py +902 -0
- dclab/rtdc_dataset/config.py +533 -0
- dclab/rtdc_dataset/copier.py +353 -0
- dclab/rtdc_dataset/core.py +1001 -0
- dclab/rtdc_dataset/export.py +737 -0
- dclab/rtdc_dataset/feat_anc_core/__init__.py +24 -0
- dclab/rtdc_dataset/feat_anc_core/af_basic.py +75 -0
- dclab/rtdc_dataset/feat_anc_core/af_emodulus.py +160 -0
- dclab/rtdc_dataset/feat_anc_core/af_fl_max_ctc.py +133 -0
- dclab/rtdc_dataset/feat_anc_core/af_image_contour.py +113 -0
- dclab/rtdc_dataset/feat_anc_core/af_ml_class.py +102 -0
- dclab/rtdc_dataset/feat_anc_core/ancillary_feature.py +320 -0
- dclab/rtdc_dataset/feat_anc_ml/__init__.py +32 -0
- dclab/rtdc_dataset/feat_anc_plugin/__init__.py +3 -0
- dclab/rtdc_dataset/feat_anc_plugin/plugin_feature.py +329 -0
- dclab/rtdc_dataset/feat_basin.py +550 -0
- dclab/rtdc_dataset/feat_temp.py +102 -0
- dclab/rtdc_dataset/filter.py +263 -0
- dclab/rtdc_dataset/fmt_dcor/__init__.py +7 -0
- dclab/rtdc_dataset/fmt_dcor/access_token.py +52 -0
- dclab/rtdc_dataset/fmt_dcor/api.py +111 -0
- dclab/rtdc_dataset/fmt_dcor/base.py +200 -0
- dclab/rtdc_dataset/fmt_dcor/basin.py +73 -0
- dclab/rtdc_dataset/fmt_dcor/logs.py +26 -0
- dclab/rtdc_dataset/fmt_dcor/tables.py +42 -0
- dclab/rtdc_dataset/fmt_dict.py +103 -0
- dclab/rtdc_dataset/fmt_hdf5/__init__.py +6 -0
- dclab/rtdc_dataset/fmt_hdf5/base.py +192 -0
- dclab/rtdc_dataset/fmt_hdf5/basin.py +30 -0
- dclab/rtdc_dataset/fmt_hdf5/events.py +257 -0
- dclab/rtdc_dataset/fmt_hdf5/feat_defect.py +164 -0
- dclab/rtdc_dataset/fmt_hdf5/logs.py +33 -0
- dclab/rtdc_dataset/fmt_hdf5/tables.py +30 -0
- dclab/rtdc_dataset/fmt_hierarchy/__init__.py +11 -0
- dclab/rtdc_dataset/fmt_hierarchy/base.py +278 -0
- dclab/rtdc_dataset/fmt_hierarchy/events.py +146 -0
- dclab/rtdc_dataset/fmt_hierarchy/hfilter.py +140 -0
- dclab/rtdc_dataset/fmt_hierarchy/mapper.py +134 -0
- dclab/rtdc_dataset/fmt_http.py +102 -0
- dclab/rtdc_dataset/fmt_s3.py +320 -0
- dclab/rtdc_dataset/fmt_tdms/__init__.py +476 -0
- dclab/rtdc_dataset/fmt_tdms/event_contour.py +264 -0
- dclab/rtdc_dataset/fmt_tdms/event_image.py +220 -0
- dclab/rtdc_dataset/fmt_tdms/event_mask.py +62 -0
- dclab/rtdc_dataset/fmt_tdms/event_trace.py +146 -0
- dclab/rtdc_dataset/fmt_tdms/exc.py +37 -0
- dclab/rtdc_dataset/fmt_tdms/naming.py +151 -0
- dclab/rtdc_dataset/load.py +72 -0
- dclab/rtdc_dataset/writer.py +985 -0
- dclab/statistics.py +203 -0
- dclab/util.py +156 -0
- dclab/warn.py +15 -0
- dclab-0.62.11.dist-info/LICENSE +343 -0
- dclab-0.62.11.dist-info/METADATA +146 -0
- dclab-0.62.11.dist-info/RECORD +137 -0
- dclab-0.62.11.dist-info/WHEEL +5 -0
- dclab-0.62.11.dist-info/entry_points.txt +8 -0
- dclab-0.62.11.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
|
|
3
|
+
from ..http_utils import HTTPFile, REQUESTS_AVAILABLE, is_url_available
|
|
4
|
+
from ..http_utils import is_http_url # noqa: F401
|
|
5
|
+
|
|
6
|
+
from .feat_basin import Basin
|
|
7
|
+
from .fmt_hdf5 import RTDC_HDF5
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class RTDC_HTTP(RTDC_HDF5):
|
|
11
|
+
def __init__(self,
|
|
12
|
+
url: str,
|
|
13
|
+
*args, **kwargs):
|
|
14
|
+
"""Access RT-DC measurements via HTTP
|
|
15
|
+
|
|
16
|
+
This class allows you to open .rtdc files accessible via an
|
|
17
|
+
HTTP URL, for instance files on an S3 object storage or
|
|
18
|
+
figshare download links.
|
|
19
|
+
|
|
20
|
+
This is essentially just a wrapper around :class:`.RTDC_HDF5`
|
|
21
|
+
with :class:`.HTTPFile` passing a file object to h5py.
|
|
22
|
+
|
|
23
|
+
Parameters
|
|
24
|
+
----------
|
|
25
|
+
url: str
|
|
26
|
+
Full URL to an HDF5 file
|
|
27
|
+
*args:
|
|
28
|
+
Arguments for `RTDCBase`
|
|
29
|
+
**kwargs:
|
|
30
|
+
Keyword arguments for `RTDCBase`
|
|
31
|
+
|
|
32
|
+
Attributes
|
|
33
|
+
----------
|
|
34
|
+
path: str
|
|
35
|
+
The URL to the object
|
|
36
|
+
|
|
37
|
+
Notes
|
|
38
|
+
-----
|
|
39
|
+
Since this format still requires random access to the file online,
|
|
40
|
+
i.e. not the entire file is downloaded, only parts of it, the
|
|
41
|
+
web server must support range requests.
|
|
42
|
+
"""
|
|
43
|
+
if not REQUESTS_AVAILABLE:
|
|
44
|
+
raise ModuleNotFoundError(
|
|
45
|
+
f"Package `requests` required for loading http data '{url}'!")
|
|
46
|
+
|
|
47
|
+
self._fhttp = HTTPFile(url)
|
|
48
|
+
if kwargs.get("identifier") is None:
|
|
49
|
+
if self._fhttp.etag is not None:
|
|
50
|
+
# Set the HTTP ETag as the identifier, it doesn't get
|
|
51
|
+
# more unique than that!
|
|
52
|
+
kwargs["identifier"] = self._fhttp.etag
|
|
53
|
+
else:
|
|
54
|
+
# Compute a hash of the first data chunk
|
|
55
|
+
kwargs["identifier"] = hashlib.md5(
|
|
56
|
+
self._fhttp.get_cache_chunk(0)).hexdigest()
|
|
57
|
+
|
|
58
|
+
# Initialize the HDF5 dataset
|
|
59
|
+
super(RTDC_HTTP, self).__init__(
|
|
60
|
+
h5path=self._fhttp,
|
|
61
|
+
*args,
|
|
62
|
+
**kwargs)
|
|
63
|
+
# Override self.path with the actual HTTP URL
|
|
64
|
+
self.path = url
|
|
65
|
+
|
|
66
|
+
def close(self):
|
|
67
|
+
super(RTDC_HTTP, self).close()
|
|
68
|
+
self._fhttp.close()
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class HTTPBasin(Basin):
|
|
72
|
+
basin_format = "http"
|
|
73
|
+
basin_type = "remote"
|
|
74
|
+
|
|
75
|
+
def __init__(self, *args, **kwargs):
|
|
76
|
+
self._available_verified = None
|
|
77
|
+
super(HTTPBasin, self).__init__(*args, **kwargs)
|
|
78
|
+
|
|
79
|
+
def _load_dataset(self, location, **kwargs):
|
|
80
|
+
h5file = RTDC_HTTP(location, **kwargs)
|
|
81
|
+
return h5file
|
|
82
|
+
|
|
83
|
+
def is_available(self):
|
|
84
|
+
"""Check for `requests` and object availability
|
|
85
|
+
|
|
86
|
+
Caching policy: Once this method returns True, it will always
|
|
87
|
+
return True.
|
|
88
|
+
"""
|
|
89
|
+
if self._available_verified is None:
|
|
90
|
+
with self._av_check_lock:
|
|
91
|
+
if not REQUESTS_AVAILABLE:
|
|
92
|
+
# don't even bother
|
|
93
|
+
self._available_verified = False
|
|
94
|
+
else:
|
|
95
|
+
avail, reason = is_url_available(self.location,
|
|
96
|
+
ret_reason=True)
|
|
97
|
+
if reason in ["forbidden", "not found"]:
|
|
98
|
+
# we cannot access the URL in the near future
|
|
99
|
+
self._available_verified = False
|
|
100
|
+
elif avail:
|
|
101
|
+
self._available_verified = True
|
|
102
|
+
return self._available_verified
|
|
@@ -0,0 +1,320 @@
|
|
|
1
|
+
import functools
|
|
2
|
+
# import multiprocessing BaseManager here, because there is some kind
|
|
3
|
+
# of circular dependency issue with s3transfer.compat and multiprocessing.
|
|
4
|
+
from multiprocessing.managers import BaseManager # noqa: F401
|
|
5
|
+
import os
|
|
6
|
+
import pathlib
|
|
7
|
+
import re
|
|
8
|
+
import socket
|
|
9
|
+
from urllib.parse import urlparse
|
|
10
|
+
import warnings
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
try:
|
|
14
|
+
import boto3
|
|
15
|
+
import botocore
|
|
16
|
+
import botocore.client
|
|
17
|
+
import botocore.exceptions
|
|
18
|
+
import botocore.session
|
|
19
|
+
except ModuleNotFoundError:
|
|
20
|
+
BOTO3_AVAILABLE = False
|
|
21
|
+
else:
|
|
22
|
+
BOTO3_AVAILABLE = True
|
|
23
|
+
|
|
24
|
+
from ..http_utils import HTTPFile
|
|
25
|
+
|
|
26
|
+
from .feat_basin import Basin
|
|
27
|
+
|
|
28
|
+
from .fmt_hdf5 import RTDC_HDF5
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
#: Regular expression for matching a DCOR resource URL
|
|
32
|
+
REGEXP_S3_URL = re.compile(
|
|
33
|
+
r"^(https?:\/\/)" # protocol (http or https or omitted)
|
|
34
|
+
r"([a-z0-9-\.]*)(\:[0-9]*)?\/" # host:port
|
|
35
|
+
r".+\/" # bucket
|
|
36
|
+
r".+" # key
|
|
37
|
+
)
|
|
38
|
+
REGEXP_S3_BUCKET_KEY = re.compile(r"^[0-9a-z-]+(\/[0-9a-z-]+)+$")
|
|
39
|
+
|
|
40
|
+
S3_ENDPOINT_URL = os.environ.get("DCLAB_S3_ENDPOINT_URL")
|
|
41
|
+
S3_ACCESS_KEY_ID = os.environ.get("DCLAB_S3_ACCESS_KEY_ID")
|
|
42
|
+
S3_SECRET_ACCESS_KEY = os.environ.get("DCLAB_S3_SECRET_ACCESS_KEY")
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class S3File(HTTPFile):
|
|
46
|
+
"""Monkeypatched `HTTPFile` to support authenticated access to S3"""
|
|
47
|
+
def __init__(self,
|
|
48
|
+
object_path: str,
|
|
49
|
+
endpoint_url: str,
|
|
50
|
+
access_key_id: str = "",
|
|
51
|
+
secret_access_key: str = "",
|
|
52
|
+
use_ssl: bool = True,
|
|
53
|
+
verify_ssl: bool = True):
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
Parameters
|
|
57
|
+
----------
|
|
58
|
+
object_path: str
|
|
59
|
+
bucket/key path to object in the object store
|
|
60
|
+
endpoint_url: str
|
|
61
|
+
the explicit endpoint URL for accessing the object store
|
|
62
|
+
access_key_id:
|
|
63
|
+
S3 access key
|
|
64
|
+
secret_access_key:
|
|
65
|
+
secret S3 key mathcing `access_key_id`
|
|
66
|
+
use_ssl: bool
|
|
67
|
+
use SSL to connect to the endpoint, only disabled for testing
|
|
68
|
+
verify_ssl: bool
|
|
69
|
+
make sure the SSL certificate is sound, only used for testing
|
|
70
|
+
"""
|
|
71
|
+
if endpoint_url is None:
|
|
72
|
+
raise ValueError(
|
|
73
|
+
"The S3 endpoint URL is empty. This could mean that you did "
|
|
74
|
+
"not specify the full S3 URL or that you forgot to set "
|
|
75
|
+
"the `S3_ENDPOINT_URL` environment variable.")
|
|
76
|
+
endpoint_url = endpoint_url.strip().rstrip("/")
|
|
77
|
+
self.botocore_session = botocore.session.get_session()
|
|
78
|
+
self.s3_session = boto3.Session(
|
|
79
|
+
aws_access_key_id=access_key_id,
|
|
80
|
+
aws_secret_access_key=secret_access_key,
|
|
81
|
+
botocore_session=self.botocore_session)
|
|
82
|
+
self.s3_client = self.s3_session.client(
|
|
83
|
+
service_name='s3',
|
|
84
|
+
use_ssl=use_ssl,
|
|
85
|
+
verify=verify_ssl,
|
|
86
|
+
endpoint_url=endpoint_url,
|
|
87
|
+
)
|
|
88
|
+
# Use a configuration that allows anonymous access
|
|
89
|
+
# https://stackoverflow.com/a/34866092
|
|
90
|
+
if not secret_access_key:
|
|
91
|
+
config = botocore.client.Config(
|
|
92
|
+
signature_version=botocore.UNSIGNED,
|
|
93
|
+
region_name='us-east-1')
|
|
94
|
+
else:
|
|
95
|
+
config = None
|
|
96
|
+
|
|
97
|
+
self.s3_resource = self.s3_session.resource(
|
|
98
|
+
service_name="s3",
|
|
99
|
+
use_ssl=use_ssl,
|
|
100
|
+
verify=verify_ssl,
|
|
101
|
+
endpoint_url=endpoint_url,
|
|
102
|
+
config=config)
|
|
103
|
+
|
|
104
|
+
bucket_name, object_name = object_path.strip("/").split("/", 1)
|
|
105
|
+
self.s3_object = self.s3_resource.Object(
|
|
106
|
+
bucket_name=bucket_name,
|
|
107
|
+
key=object_name)
|
|
108
|
+
|
|
109
|
+
super(S3File, self).__init__(f"{endpoint_url}/{object_path}")
|
|
110
|
+
|
|
111
|
+
def _parse_header(self):
|
|
112
|
+
if self._len is None:
|
|
113
|
+
self._len = self.s3_object.content_length
|
|
114
|
+
self._etag = self.s3_object.e_tag
|
|
115
|
+
|
|
116
|
+
def close(self):
|
|
117
|
+
super(S3File, self).close()
|
|
118
|
+
self.s3_client.close()
|
|
119
|
+
|
|
120
|
+
def download_range(self, start, stop):
|
|
121
|
+
"""Download bytes given by the range (`start`, `stop`)
|
|
122
|
+
|
|
123
|
+
`stop` is not inclusive (In the HTTP range request it normally is).
|
|
124
|
+
"""
|
|
125
|
+
stream = self.s3_object.get(Range=f"bytes={start}-{stop-1}")['Body']
|
|
126
|
+
return stream.read()
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
class RTDC_S3(RTDC_HDF5):
|
|
130
|
+
def __init__(self,
|
|
131
|
+
url: str,
|
|
132
|
+
endpoint_url: str = None,
|
|
133
|
+
access_key_id: str = None,
|
|
134
|
+
secret_access_key: str = None,
|
|
135
|
+
use_ssl: bool = True,
|
|
136
|
+
*args, **kwargs):
|
|
137
|
+
"""Access RT-DC measurements in an S3-compatible object store
|
|
138
|
+
|
|
139
|
+
This is essentially just a wrapper around :class:`.RTDC_HDF5`
|
|
140
|
+
with :mod:`boto3` and :class:`.HTTPFile` passing a file object to h5py.
|
|
141
|
+
|
|
142
|
+
Parameters
|
|
143
|
+
----------
|
|
144
|
+
url: str
|
|
145
|
+
URL to an object in an S3 instance; this can be either a full
|
|
146
|
+
URL (including the endpoint), or just `bucket/key`
|
|
147
|
+
access_key_id: str
|
|
148
|
+
S3 access identifier
|
|
149
|
+
secret_access_key: str
|
|
150
|
+
Secret S3 access key
|
|
151
|
+
use_ssl: bool
|
|
152
|
+
Whether to enforce SSL (defaults to True)
|
|
153
|
+
*args:
|
|
154
|
+
Arguments for `RTDCBase`
|
|
155
|
+
**kwargs:
|
|
156
|
+
Keyword arguments for `RTDCBase`
|
|
157
|
+
|
|
158
|
+
Attributes
|
|
159
|
+
----------
|
|
160
|
+
path: str
|
|
161
|
+
The URL to the object
|
|
162
|
+
"""
|
|
163
|
+
if not BOTO3_AVAILABLE:
|
|
164
|
+
raise ModuleNotFoundError(
|
|
165
|
+
f"Package `boto3` required for loading S3 data '{url}'!")
|
|
166
|
+
|
|
167
|
+
self._s3file = S3File(
|
|
168
|
+
object_path=get_object_path(url),
|
|
169
|
+
endpoint_url=(endpoint_url
|
|
170
|
+
or get_endpoint_url(url)
|
|
171
|
+
or S3_ENDPOINT_URL),
|
|
172
|
+
access_key_id=(access_key_id
|
|
173
|
+
or S3_ACCESS_KEY_ID
|
|
174
|
+
or ""),
|
|
175
|
+
secret_access_key=(secret_access_key
|
|
176
|
+
or S3_SECRET_ACCESS_KEY
|
|
177
|
+
or ""),
|
|
178
|
+
use_ssl=use_ssl,
|
|
179
|
+
verify_ssl=use_ssl,
|
|
180
|
+
)
|
|
181
|
+
# Initialize the HDF5 dataset
|
|
182
|
+
super(RTDC_S3, self).__init__(
|
|
183
|
+
h5path=self._s3file,
|
|
184
|
+
*args,
|
|
185
|
+
**kwargs)
|
|
186
|
+
# Override self.path with the actual S3 URL
|
|
187
|
+
self.path = self._s3file.url
|
|
188
|
+
|
|
189
|
+
def close(self):
|
|
190
|
+
super(RTDC_S3, self).close()
|
|
191
|
+
self._s3file.close()
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
class S3Basin(Basin):
|
|
195
|
+
basin_format = "s3"
|
|
196
|
+
basin_type = "remote"
|
|
197
|
+
|
|
198
|
+
def __init__(self, *args, **kwargs):
|
|
199
|
+
self._available_verified = None
|
|
200
|
+
super(S3Basin, self).__init__(*args, **kwargs)
|
|
201
|
+
|
|
202
|
+
def _load_dataset(self, location, **kwargs):
|
|
203
|
+
h5file = RTDC_S3(location, **kwargs)
|
|
204
|
+
return h5file
|
|
205
|
+
|
|
206
|
+
def is_available(self):
|
|
207
|
+
"""Check for boto3 and object availability
|
|
208
|
+
|
|
209
|
+
Caching policy: Once this method returns True, it will always
|
|
210
|
+
return True.
|
|
211
|
+
"""
|
|
212
|
+
if self._available_verified is None:
|
|
213
|
+
with self._av_check_lock:
|
|
214
|
+
if not BOTO3_AVAILABLE:
|
|
215
|
+
self._available_verified = False
|
|
216
|
+
else:
|
|
217
|
+
self._available_verified = \
|
|
218
|
+
is_s3_object_available(self.location)
|
|
219
|
+
return self._available_verified
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def is_s3_object_available(url: str,
|
|
223
|
+
access_key_id: str = None,
|
|
224
|
+
secret_access_key: str = None,
|
|
225
|
+
):
|
|
226
|
+
"""Check whether an S3 object is available
|
|
227
|
+
|
|
228
|
+
Parameters
|
|
229
|
+
----------
|
|
230
|
+
url: str
|
|
231
|
+
full URL to the object
|
|
232
|
+
access_key_id: str
|
|
233
|
+
S3 access identifier
|
|
234
|
+
secret_access_key: str
|
|
235
|
+
Secret S3 access key
|
|
236
|
+
"""
|
|
237
|
+
avail = False
|
|
238
|
+
if is_s3_url(url):
|
|
239
|
+
endpoint_url = get_endpoint_url(url) or S3_ENDPOINT_URL
|
|
240
|
+
if not endpoint_url:
|
|
241
|
+
warnings.warn(
|
|
242
|
+
f"Could not determine endpoint from URL '{url}'. Please "
|
|
243
|
+
f"set the `S3_ENDPOINT_URL` environment variable or pass "
|
|
244
|
+
f"a full object URL.")
|
|
245
|
+
else:
|
|
246
|
+
# default to https if no scheme or port is specified
|
|
247
|
+
urlp = urlparse(endpoint_url)
|
|
248
|
+
port = urlp.port or (80 if urlp.scheme == "http" else 443)
|
|
249
|
+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
|
250
|
+
s.settimeout(1)
|
|
251
|
+
# Try to connect to the host
|
|
252
|
+
try:
|
|
253
|
+
# Use `hostname`, not `netloc`, because `netloc` contains
|
|
254
|
+
# the port number which we do not want here.
|
|
255
|
+
s.connect((urlp.hostname, port))
|
|
256
|
+
except (socket.gaierror, OSError):
|
|
257
|
+
pass
|
|
258
|
+
else:
|
|
259
|
+
# Try to access the object
|
|
260
|
+
s3file = S3File(
|
|
261
|
+
object_path=get_object_path(url),
|
|
262
|
+
endpoint_url=endpoint_url,
|
|
263
|
+
access_key_id=(access_key_id
|
|
264
|
+
or S3_ACCESS_KEY_ID
|
|
265
|
+
or ""),
|
|
266
|
+
secret_access_key=(secret_access_key
|
|
267
|
+
or S3_SECRET_ACCESS_KEY
|
|
268
|
+
or ""),
|
|
269
|
+
)
|
|
270
|
+
try:
|
|
271
|
+
s3file.s3_object.load()
|
|
272
|
+
except botocore.exceptions.ClientError:
|
|
273
|
+
avail = False
|
|
274
|
+
else:
|
|
275
|
+
avail = True
|
|
276
|
+
return avail
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
@functools.lru_cache()
|
|
280
|
+
def get_endpoint_url(url):
|
|
281
|
+
"""Given a URL of an S3 object, return the endpoint URL
|
|
282
|
+
|
|
283
|
+
Return None if no endpoint URL can be extracted (e.g. because
|
|
284
|
+
just `bucket_name/object_path` was passed).
|
|
285
|
+
"""
|
|
286
|
+
urlp = urlparse(url=url)
|
|
287
|
+
if urlp.hostname:
|
|
288
|
+
scheme = urlp.scheme or "https"
|
|
289
|
+
port = urlp.port or (80 if scheme == "http" else 443)
|
|
290
|
+
return f"{scheme}://{urlp.hostname}:{port}"
|
|
291
|
+
else:
|
|
292
|
+
return None
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
@functools.lru_cache()
|
|
296
|
+
def get_object_path(url):
|
|
297
|
+
"""Given a URL of an S3 object, return the `bucket_name/object_path` part
|
|
298
|
+
|
|
299
|
+
Return object paths always without leading slash `/`.
|
|
300
|
+
"""
|
|
301
|
+
urlp = urlparse(url=url)
|
|
302
|
+
return urlp.path.lstrip("/")
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
@functools.lru_cache()
|
|
306
|
+
def is_s3_url(string):
|
|
307
|
+
"""Check whether `string` is a valid S3 URL using regexp"""
|
|
308
|
+
if not isinstance(string, str):
|
|
309
|
+
return False
|
|
310
|
+
elif REGEXP_S3_URL.match(string.strip()):
|
|
311
|
+
# this is pretty clear
|
|
312
|
+
return True
|
|
313
|
+
elif pathlib.Path(string).exists():
|
|
314
|
+
# this is actually a file
|
|
315
|
+
return False
|
|
316
|
+
elif REGEXP_S3_BUCKET_KEY.match(string.strip()):
|
|
317
|
+
# bucket_name/key
|
|
318
|
+
return True
|
|
319
|
+
else:
|
|
320
|
+
return False
|