dclab 0.62.11__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dclab might be problematic. Click here for more details.

Files changed (137) hide show
  1. dclab/__init__.py +23 -0
  2. dclab/_version.py +16 -0
  3. dclab/cached.py +97 -0
  4. dclab/cli/__init__.py +10 -0
  5. dclab/cli/common.py +237 -0
  6. dclab/cli/task_compress.py +126 -0
  7. dclab/cli/task_condense.py +223 -0
  8. dclab/cli/task_join.py +229 -0
  9. dclab/cli/task_repack.py +98 -0
  10. dclab/cli/task_split.py +154 -0
  11. dclab/cli/task_tdms2rtdc.py +186 -0
  12. dclab/cli/task_verify_dataset.py +75 -0
  13. dclab/definitions/__init__.py +79 -0
  14. dclab/definitions/feat_const.py +202 -0
  15. dclab/definitions/feat_logic.py +183 -0
  16. dclab/definitions/meta_const.py +252 -0
  17. dclab/definitions/meta_logic.py +111 -0
  18. dclab/definitions/meta_parse.py +94 -0
  19. dclab/downsampling.cp313-win_amd64.pyd +0 -0
  20. dclab/downsampling.pyx +230 -0
  21. dclab/external/__init__.py +4 -0
  22. dclab/external/packaging/LICENSE +3 -0
  23. dclab/external/packaging/LICENSE.APACHE +177 -0
  24. dclab/external/packaging/LICENSE.BSD +23 -0
  25. dclab/external/packaging/__init__.py +6 -0
  26. dclab/external/packaging/_structures.py +61 -0
  27. dclab/external/packaging/version.py +505 -0
  28. dclab/external/skimage/LICENSE +28 -0
  29. dclab/external/skimage/__init__.py +2 -0
  30. dclab/external/skimage/_find_contours.py +216 -0
  31. dclab/external/skimage/_find_contours_cy.cp313-win_amd64.pyd +0 -0
  32. dclab/external/skimage/_find_contours_cy.pyx +188 -0
  33. dclab/external/skimage/_pnpoly.cp313-win_amd64.pyd +0 -0
  34. dclab/external/skimage/_pnpoly.pyx +99 -0
  35. dclab/external/skimage/_shared/__init__.py +1 -0
  36. dclab/external/skimage/_shared/geometry.cp313-win_amd64.pyd +0 -0
  37. dclab/external/skimage/_shared/geometry.pxd +6 -0
  38. dclab/external/skimage/_shared/geometry.pyx +55 -0
  39. dclab/external/skimage/measure.py +7 -0
  40. dclab/external/skimage/pnpoly.py +53 -0
  41. dclab/external/statsmodels/LICENSE +35 -0
  42. dclab/external/statsmodels/__init__.py +6 -0
  43. dclab/external/statsmodels/nonparametric/__init__.py +1 -0
  44. dclab/external/statsmodels/nonparametric/_kernel_base.py +203 -0
  45. dclab/external/statsmodels/nonparametric/kernel_density.py +165 -0
  46. dclab/external/statsmodels/nonparametric/kernels.py +36 -0
  47. dclab/features/__init__.py +9 -0
  48. dclab/features/bright.py +81 -0
  49. dclab/features/bright_bc.py +93 -0
  50. dclab/features/bright_perc.py +63 -0
  51. dclab/features/contour.py +161 -0
  52. dclab/features/emodulus/__init__.py +339 -0
  53. dclab/features/emodulus/load.py +252 -0
  54. dclab/features/emodulus/lut_HE-2D-FEM-22.txt +16432 -0
  55. dclab/features/emodulus/lut_HE-3D-FEM-22.txt +1276 -0
  56. dclab/features/emodulus/lut_LE-2D-FEM-19.txt +13082 -0
  57. dclab/features/emodulus/pxcorr.py +135 -0
  58. dclab/features/emodulus/scale_linear.py +247 -0
  59. dclab/features/emodulus/viscosity.py +256 -0
  60. dclab/features/fl_crosstalk.py +95 -0
  61. dclab/features/inert_ratio.py +377 -0
  62. dclab/features/volume.py +242 -0
  63. dclab/http_utils.py +322 -0
  64. dclab/isoelastics/__init__.py +468 -0
  65. dclab/isoelastics/iso_HE-2D-FEM-22-area_um-deform.txt +2440 -0
  66. dclab/isoelastics/iso_HE-2D-FEM-22-volume-deform.txt +2635 -0
  67. dclab/isoelastics/iso_HE-3D-FEM-22-area_um-deform.txt +1930 -0
  68. dclab/isoelastics/iso_HE-3D-FEM-22-volume-deform.txt +2221 -0
  69. dclab/isoelastics/iso_LE-2D-FEM-19-area_um-deform.txt +2151 -0
  70. dclab/isoelastics/iso_LE-2D-FEM-19-volume-deform.txt +2250 -0
  71. dclab/isoelastics/iso_LE-2D-ana-18-area_um-deform.txt +1266 -0
  72. dclab/kde_contours.py +222 -0
  73. dclab/kde_methods.py +303 -0
  74. dclab/lme4/__init__.py +5 -0
  75. dclab/lme4/lme4_template.R +94 -0
  76. dclab/lme4/rsetup.py +204 -0
  77. dclab/lme4/wrapr.py +386 -0
  78. dclab/polygon_filter.py +398 -0
  79. dclab/rtdc_dataset/__init__.py +15 -0
  80. dclab/rtdc_dataset/check.py +902 -0
  81. dclab/rtdc_dataset/config.py +533 -0
  82. dclab/rtdc_dataset/copier.py +353 -0
  83. dclab/rtdc_dataset/core.py +1001 -0
  84. dclab/rtdc_dataset/export.py +737 -0
  85. dclab/rtdc_dataset/feat_anc_core/__init__.py +24 -0
  86. dclab/rtdc_dataset/feat_anc_core/af_basic.py +75 -0
  87. dclab/rtdc_dataset/feat_anc_core/af_emodulus.py +160 -0
  88. dclab/rtdc_dataset/feat_anc_core/af_fl_max_ctc.py +133 -0
  89. dclab/rtdc_dataset/feat_anc_core/af_image_contour.py +113 -0
  90. dclab/rtdc_dataset/feat_anc_core/af_ml_class.py +102 -0
  91. dclab/rtdc_dataset/feat_anc_core/ancillary_feature.py +320 -0
  92. dclab/rtdc_dataset/feat_anc_ml/__init__.py +32 -0
  93. dclab/rtdc_dataset/feat_anc_plugin/__init__.py +3 -0
  94. dclab/rtdc_dataset/feat_anc_plugin/plugin_feature.py +329 -0
  95. dclab/rtdc_dataset/feat_basin.py +550 -0
  96. dclab/rtdc_dataset/feat_temp.py +102 -0
  97. dclab/rtdc_dataset/filter.py +263 -0
  98. dclab/rtdc_dataset/fmt_dcor/__init__.py +7 -0
  99. dclab/rtdc_dataset/fmt_dcor/access_token.py +52 -0
  100. dclab/rtdc_dataset/fmt_dcor/api.py +111 -0
  101. dclab/rtdc_dataset/fmt_dcor/base.py +200 -0
  102. dclab/rtdc_dataset/fmt_dcor/basin.py +73 -0
  103. dclab/rtdc_dataset/fmt_dcor/logs.py +26 -0
  104. dclab/rtdc_dataset/fmt_dcor/tables.py +42 -0
  105. dclab/rtdc_dataset/fmt_dict.py +103 -0
  106. dclab/rtdc_dataset/fmt_hdf5/__init__.py +6 -0
  107. dclab/rtdc_dataset/fmt_hdf5/base.py +192 -0
  108. dclab/rtdc_dataset/fmt_hdf5/basin.py +30 -0
  109. dclab/rtdc_dataset/fmt_hdf5/events.py +257 -0
  110. dclab/rtdc_dataset/fmt_hdf5/feat_defect.py +164 -0
  111. dclab/rtdc_dataset/fmt_hdf5/logs.py +33 -0
  112. dclab/rtdc_dataset/fmt_hdf5/tables.py +30 -0
  113. dclab/rtdc_dataset/fmt_hierarchy/__init__.py +11 -0
  114. dclab/rtdc_dataset/fmt_hierarchy/base.py +278 -0
  115. dclab/rtdc_dataset/fmt_hierarchy/events.py +146 -0
  116. dclab/rtdc_dataset/fmt_hierarchy/hfilter.py +140 -0
  117. dclab/rtdc_dataset/fmt_hierarchy/mapper.py +134 -0
  118. dclab/rtdc_dataset/fmt_http.py +102 -0
  119. dclab/rtdc_dataset/fmt_s3.py +320 -0
  120. dclab/rtdc_dataset/fmt_tdms/__init__.py +476 -0
  121. dclab/rtdc_dataset/fmt_tdms/event_contour.py +264 -0
  122. dclab/rtdc_dataset/fmt_tdms/event_image.py +220 -0
  123. dclab/rtdc_dataset/fmt_tdms/event_mask.py +62 -0
  124. dclab/rtdc_dataset/fmt_tdms/event_trace.py +146 -0
  125. dclab/rtdc_dataset/fmt_tdms/exc.py +37 -0
  126. dclab/rtdc_dataset/fmt_tdms/naming.py +151 -0
  127. dclab/rtdc_dataset/load.py +72 -0
  128. dclab/rtdc_dataset/writer.py +985 -0
  129. dclab/statistics.py +203 -0
  130. dclab/util.py +156 -0
  131. dclab/warn.py +15 -0
  132. dclab-0.62.11.dist-info/LICENSE +343 -0
  133. dclab-0.62.11.dist-info/METADATA +146 -0
  134. dclab-0.62.11.dist-info/RECORD +137 -0
  135. dclab-0.62.11.dist-info/WHEEL +5 -0
  136. dclab-0.62.11.dist-info/entry_points.txt +8 -0
  137. dclab-0.62.11.dist-info/top_level.txt +1 -0
@@ -0,0 +1,102 @@
1
+ import hashlib
2
+
3
+ from ..http_utils import HTTPFile, REQUESTS_AVAILABLE, is_url_available
4
+ from ..http_utils import is_http_url # noqa: F401
5
+
6
+ from .feat_basin import Basin
7
+ from .fmt_hdf5 import RTDC_HDF5
8
+
9
+
10
+ class RTDC_HTTP(RTDC_HDF5):
11
+ def __init__(self,
12
+ url: str,
13
+ *args, **kwargs):
14
+ """Access RT-DC measurements via HTTP
15
+
16
+ This class allows you to open .rtdc files accessible via an
17
+ HTTP URL, for instance files on an S3 object storage or
18
+ figshare download links.
19
+
20
+ This is essentially just a wrapper around :class:`.RTDC_HDF5`
21
+ with :class:`.HTTPFile` passing a file object to h5py.
22
+
23
+ Parameters
24
+ ----------
25
+ url: str
26
+ Full URL to an HDF5 file
27
+ *args:
28
+ Arguments for `RTDCBase`
29
+ **kwargs:
30
+ Keyword arguments for `RTDCBase`
31
+
32
+ Attributes
33
+ ----------
34
+ path: str
35
+ The URL to the object
36
+
37
+ Notes
38
+ -----
39
+ Since this format still requires random access to the file online,
40
+ i.e. not the entire file is downloaded, only parts of it, the
41
+ web server must support range requests.
42
+ """
43
+ if not REQUESTS_AVAILABLE:
44
+ raise ModuleNotFoundError(
45
+ f"Package `requests` required for loading http data '{url}'!")
46
+
47
+ self._fhttp = HTTPFile(url)
48
+ if kwargs.get("identifier") is None:
49
+ if self._fhttp.etag is not None:
50
+ # Set the HTTP ETag as the identifier, it doesn't get
51
+ # more unique than that!
52
+ kwargs["identifier"] = self._fhttp.etag
53
+ else:
54
+ # Compute a hash of the first data chunk
55
+ kwargs["identifier"] = hashlib.md5(
56
+ self._fhttp.get_cache_chunk(0)).hexdigest()
57
+
58
+ # Initialize the HDF5 dataset
59
+ super(RTDC_HTTP, self).__init__(
60
+ h5path=self._fhttp,
61
+ *args,
62
+ **kwargs)
63
+ # Override self.path with the actual HTTP URL
64
+ self.path = url
65
+
66
+ def close(self):
67
+ super(RTDC_HTTP, self).close()
68
+ self._fhttp.close()
69
+
70
+
71
+ class HTTPBasin(Basin):
72
+ basin_format = "http"
73
+ basin_type = "remote"
74
+
75
+ def __init__(self, *args, **kwargs):
76
+ self._available_verified = None
77
+ super(HTTPBasin, self).__init__(*args, **kwargs)
78
+
79
+ def _load_dataset(self, location, **kwargs):
80
+ h5file = RTDC_HTTP(location, **kwargs)
81
+ return h5file
82
+
83
+ def is_available(self):
84
+ """Check for `requests` and object availability
85
+
86
+ Caching policy: Once this method returns True, it will always
87
+ return True.
88
+ """
89
+ if self._available_verified is None:
90
+ with self._av_check_lock:
91
+ if not REQUESTS_AVAILABLE:
92
+ # don't even bother
93
+ self._available_verified = False
94
+ else:
95
+ avail, reason = is_url_available(self.location,
96
+ ret_reason=True)
97
+ if reason in ["forbidden", "not found"]:
98
+ # we cannot access the URL in the near future
99
+ self._available_verified = False
100
+ elif avail:
101
+ self._available_verified = True
102
+ return self._available_verified
@@ -0,0 +1,320 @@
1
+ import functools
2
+ # import multiprocessing BaseManager here, because there is some kind
3
+ # of circular dependency issue with s3transfer.compat and multiprocessing.
4
+ from multiprocessing.managers import BaseManager # noqa: F401
5
+ import os
6
+ import pathlib
7
+ import re
8
+ import socket
9
+ from urllib.parse import urlparse
10
+ import warnings
11
+
12
+
13
+ try:
14
+ import boto3
15
+ import botocore
16
+ import botocore.client
17
+ import botocore.exceptions
18
+ import botocore.session
19
+ except ModuleNotFoundError:
20
+ BOTO3_AVAILABLE = False
21
+ else:
22
+ BOTO3_AVAILABLE = True
23
+
24
+ from ..http_utils import HTTPFile
25
+
26
+ from .feat_basin import Basin
27
+
28
+ from .fmt_hdf5 import RTDC_HDF5
29
+
30
+
31
+ #: Regular expression for matching a DCOR resource URL
32
+ REGEXP_S3_URL = re.compile(
33
+ r"^(https?:\/\/)" # protocol (http or https or omitted)
34
+ r"([a-z0-9-\.]*)(\:[0-9]*)?\/" # host:port
35
+ r".+\/" # bucket
36
+ r".+" # key
37
+ )
38
+ REGEXP_S3_BUCKET_KEY = re.compile(r"^[0-9a-z-]+(\/[0-9a-z-]+)+$")
39
+
40
+ S3_ENDPOINT_URL = os.environ.get("DCLAB_S3_ENDPOINT_URL")
41
+ S3_ACCESS_KEY_ID = os.environ.get("DCLAB_S3_ACCESS_KEY_ID")
42
+ S3_SECRET_ACCESS_KEY = os.environ.get("DCLAB_S3_SECRET_ACCESS_KEY")
43
+
44
+
45
+ class S3File(HTTPFile):
46
+ """Monkeypatched `HTTPFile` to support authenticated access to S3"""
47
+ def __init__(self,
48
+ object_path: str,
49
+ endpoint_url: str,
50
+ access_key_id: str = "",
51
+ secret_access_key: str = "",
52
+ use_ssl: bool = True,
53
+ verify_ssl: bool = True):
54
+ """
55
+
56
+ Parameters
57
+ ----------
58
+ object_path: str
59
+ bucket/key path to object in the object store
60
+ endpoint_url: str
61
+ the explicit endpoint URL for accessing the object store
62
+ access_key_id:
63
+ S3 access key
64
+ secret_access_key:
65
+ secret S3 key mathcing `access_key_id`
66
+ use_ssl: bool
67
+ use SSL to connect to the endpoint, only disabled for testing
68
+ verify_ssl: bool
69
+ make sure the SSL certificate is sound, only used for testing
70
+ """
71
+ if endpoint_url is None:
72
+ raise ValueError(
73
+ "The S3 endpoint URL is empty. This could mean that you did "
74
+ "not specify the full S3 URL or that you forgot to set "
75
+ "the `S3_ENDPOINT_URL` environment variable.")
76
+ endpoint_url = endpoint_url.strip().rstrip("/")
77
+ self.botocore_session = botocore.session.get_session()
78
+ self.s3_session = boto3.Session(
79
+ aws_access_key_id=access_key_id,
80
+ aws_secret_access_key=secret_access_key,
81
+ botocore_session=self.botocore_session)
82
+ self.s3_client = self.s3_session.client(
83
+ service_name='s3',
84
+ use_ssl=use_ssl,
85
+ verify=verify_ssl,
86
+ endpoint_url=endpoint_url,
87
+ )
88
+ # Use a configuration that allows anonymous access
89
+ # https://stackoverflow.com/a/34866092
90
+ if not secret_access_key:
91
+ config = botocore.client.Config(
92
+ signature_version=botocore.UNSIGNED,
93
+ region_name='us-east-1')
94
+ else:
95
+ config = None
96
+
97
+ self.s3_resource = self.s3_session.resource(
98
+ service_name="s3",
99
+ use_ssl=use_ssl,
100
+ verify=verify_ssl,
101
+ endpoint_url=endpoint_url,
102
+ config=config)
103
+
104
+ bucket_name, object_name = object_path.strip("/").split("/", 1)
105
+ self.s3_object = self.s3_resource.Object(
106
+ bucket_name=bucket_name,
107
+ key=object_name)
108
+
109
+ super(S3File, self).__init__(f"{endpoint_url}/{object_path}")
110
+
111
+ def _parse_header(self):
112
+ if self._len is None:
113
+ self._len = self.s3_object.content_length
114
+ self._etag = self.s3_object.e_tag
115
+
116
+ def close(self):
117
+ super(S3File, self).close()
118
+ self.s3_client.close()
119
+
120
+ def download_range(self, start, stop):
121
+ """Download bytes given by the range (`start`, `stop`)
122
+
123
+ `stop` is not inclusive (In the HTTP range request it normally is).
124
+ """
125
+ stream = self.s3_object.get(Range=f"bytes={start}-{stop-1}")['Body']
126
+ return stream.read()
127
+
128
+
129
+ class RTDC_S3(RTDC_HDF5):
130
+ def __init__(self,
131
+ url: str,
132
+ endpoint_url: str = None,
133
+ access_key_id: str = None,
134
+ secret_access_key: str = None,
135
+ use_ssl: bool = True,
136
+ *args, **kwargs):
137
+ """Access RT-DC measurements in an S3-compatible object store
138
+
139
+ This is essentially just a wrapper around :class:`.RTDC_HDF5`
140
+ with :mod:`boto3` and :class:`.HTTPFile` passing a file object to h5py.
141
+
142
+ Parameters
143
+ ----------
144
+ url: str
145
+ URL to an object in an S3 instance; this can be either a full
146
+ URL (including the endpoint), or just `bucket/key`
147
+ access_key_id: str
148
+ S3 access identifier
149
+ secret_access_key: str
150
+ Secret S3 access key
151
+ use_ssl: bool
152
+ Whether to enforce SSL (defaults to True)
153
+ *args:
154
+ Arguments for `RTDCBase`
155
+ **kwargs:
156
+ Keyword arguments for `RTDCBase`
157
+
158
+ Attributes
159
+ ----------
160
+ path: str
161
+ The URL to the object
162
+ """
163
+ if not BOTO3_AVAILABLE:
164
+ raise ModuleNotFoundError(
165
+ f"Package `boto3` required for loading S3 data '{url}'!")
166
+
167
+ self._s3file = S3File(
168
+ object_path=get_object_path(url),
169
+ endpoint_url=(endpoint_url
170
+ or get_endpoint_url(url)
171
+ or S3_ENDPOINT_URL),
172
+ access_key_id=(access_key_id
173
+ or S3_ACCESS_KEY_ID
174
+ or ""),
175
+ secret_access_key=(secret_access_key
176
+ or S3_SECRET_ACCESS_KEY
177
+ or ""),
178
+ use_ssl=use_ssl,
179
+ verify_ssl=use_ssl,
180
+ )
181
+ # Initialize the HDF5 dataset
182
+ super(RTDC_S3, self).__init__(
183
+ h5path=self._s3file,
184
+ *args,
185
+ **kwargs)
186
+ # Override self.path with the actual S3 URL
187
+ self.path = self._s3file.url
188
+
189
+ def close(self):
190
+ super(RTDC_S3, self).close()
191
+ self._s3file.close()
192
+
193
+
194
+ class S3Basin(Basin):
195
+ basin_format = "s3"
196
+ basin_type = "remote"
197
+
198
+ def __init__(self, *args, **kwargs):
199
+ self._available_verified = None
200
+ super(S3Basin, self).__init__(*args, **kwargs)
201
+
202
+ def _load_dataset(self, location, **kwargs):
203
+ h5file = RTDC_S3(location, **kwargs)
204
+ return h5file
205
+
206
+ def is_available(self):
207
+ """Check for boto3 and object availability
208
+
209
+ Caching policy: Once this method returns True, it will always
210
+ return True.
211
+ """
212
+ if self._available_verified is None:
213
+ with self._av_check_lock:
214
+ if not BOTO3_AVAILABLE:
215
+ self._available_verified = False
216
+ else:
217
+ self._available_verified = \
218
+ is_s3_object_available(self.location)
219
+ return self._available_verified
220
+
221
+
222
+ def is_s3_object_available(url: str,
223
+ access_key_id: str = None,
224
+ secret_access_key: str = None,
225
+ ):
226
+ """Check whether an S3 object is available
227
+
228
+ Parameters
229
+ ----------
230
+ url: str
231
+ full URL to the object
232
+ access_key_id: str
233
+ S3 access identifier
234
+ secret_access_key: str
235
+ Secret S3 access key
236
+ """
237
+ avail = False
238
+ if is_s3_url(url):
239
+ endpoint_url = get_endpoint_url(url) or S3_ENDPOINT_URL
240
+ if not endpoint_url:
241
+ warnings.warn(
242
+ f"Could not determine endpoint from URL '{url}'. Please "
243
+ f"set the `S3_ENDPOINT_URL` environment variable or pass "
244
+ f"a full object URL.")
245
+ else:
246
+ # default to https if no scheme or port is specified
247
+ urlp = urlparse(endpoint_url)
248
+ port = urlp.port or (80 if urlp.scheme == "http" else 443)
249
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
250
+ s.settimeout(1)
251
+ # Try to connect to the host
252
+ try:
253
+ # Use `hostname`, not `netloc`, because `netloc` contains
254
+ # the port number which we do not want here.
255
+ s.connect((urlp.hostname, port))
256
+ except (socket.gaierror, OSError):
257
+ pass
258
+ else:
259
+ # Try to access the object
260
+ s3file = S3File(
261
+ object_path=get_object_path(url),
262
+ endpoint_url=endpoint_url,
263
+ access_key_id=(access_key_id
264
+ or S3_ACCESS_KEY_ID
265
+ or ""),
266
+ secret_access_key=(secret_access_key
267
+ or S3_SECRET_ACCESS_KEY
268
+ or ""),
269
+ )
270
+ try:
271
+ s3file.s3_object.load()
272
+ except botocore.exceptions.ClientError:
273
+ avail = False
274
+ else:
275
+ avail = True
276
+ return avail
277
+
278
+
279
+ @functools.lru_cache()
280
+ def get_endpoint_url(url):
281
+ """Given a URL of an S3 object, return the endpoint URL
282
+
283
+ Return None if no endpoint URL can be extracted (e.g. because
284
+ just `bucket_name/object_path` was passed).
285
+ """
286
+ urlp = urlparse(url=url)
287
+ if urlp.hostname:
288
+ scheme = urlp.scheme or "https"
289
+ port = urlp.port or (80 if scheme == "http" else 443)
290
+ return f"{scheme}://{urlp.hostname}:{port}"
291
+ else:
292
+ return None
293
+
294
+
295
+ @functools.lru_cache()
296
+ def get_object_path(url):
297
+ """Given a URL of an S3 object, return the `bucket_name/object_path` part
298
+
299
+ Return object paths always without leading slash `/`.
300
+ """
301
+ urlp = urlparse(url=url)
302
+ return urlp.path.lstrip("/")
303
+
304
+
305
+ @functools.lru_cache()
306
+ def is_s3_url(string):
307
+ """Check whether `string` is a valid S3 URL using regexp"""
308
+ if not isinstance(string, str):
309
+ return False
310
+ elif REGEXP_S3_URL.match(string.strip()):
311
+ # this is pretty clear
312
+ return True
313
+ elif pathlib.Path(string).exists():
314
+ # this is actually a file
315
+ return False
316
+ elif REGEXP_S3_BUCKET_KEY.match(string.strip()):
317
+ # bucket_name/key
318
+ return True
319
+ else:
320
+ return False