oasis-data-manager 0.1.6__py2.py3-none-any.whl → 0.2.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oasis_data_manager/__init__.py +1 -1
- oasis_data_manager/filestore/backends/azure_abfs.py +15 -18
- oasis_data_manager/filestore/backends/base.py +83 -30
- {oasis_data_manager-0.1.6.dist-info → oasis_data_manager-0.2.0.dist-info}/METADATA +13 -9
- {oasis_data_manager-0.1.6.dist-info → oasis_data_manager-0.2.0.dist-info}/RECORD +7 -7
- {oasis_data_manager-0.1.6.dist-info → oasis_data_manager-0.2.0.dist-info}/WHEEL +1 -1
- {oasis_data_manager-0.1.6.dist-info → oasis_data_manager-0.2.0.dist-info}/top_level.txt +0 -0
oasis_data_manager/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = '0.
|
|
1
|
+
__version__ = '0.2.0'
|
|
@@ -114,24 +114,21 @@ class AzureABFSStorage(BaseStorage):
|
|
|
114
114
|
def connection_string(self):
|
|
115
115
|
if self._connection_string:
|
|
116
116
|
return self._connection_string
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
if self.
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
cs += f"AccountKey={fs.account_key};"
|
|
133
|
-
|
|
134
|
-
return cs
|
|
117
|
+
|
|
118
|
+
cs_parts = [
|
|
119
|
+
f"DefaultEndpointsProtocol={'https' if self.azure_ssl else 'http'}",
|
|
120
|
+
f"AccountName={self.account_name}",
|
|
121
|
+
f"AccountKey={self.account_key}"
|
|
122
|
+
]
|
|
123
|
+
|
|
124
|
+
if self.endpoint_url:
|
|
125
|
+
# Azurite requires the account name in the endpoint path
|
|
126
|
+
endpoint = self.endpoint_url.rstrip('/')
|
|
127
|
+
if self.account_name not in endpoint:
|
|
128
|
+
endpoint = f"{endpoint}/{self.account_name}"
|
|
129
|
+
cs_parts.append(f"BlobEndpoint={endpoint};")
|
|
130
|
+
|
|
131
|
+
return ";".join(cs_parts)
|
|
135
132
|
|
|
136
133
|
def get_storage_url(self, filename=None, suffix="tar.gz", encode_params=True):
|
|
137
134
|
filename = (
|
|
@@ -1,6 +1,4 @@
|
|
|
1
|
-
import base64
|
|
2
1
|
import contextlib
|
|
3
|
-
import io
|
|
4
2
|
import logging
|
|
5
3
|
import os
|
|
6
4
|
import shutil
|
|
@@ -16,6 +14,7 @@ import fsspec
|
|
|
16
14
|
from fsspec.implementations.dirfs import DirFileSystem
|
|
17
15
|
|
|
18
16
|
from oasis_data_manager.errors import OasisException
|
|
17
|
+
import xxhash
|
|
19
18
|
|
|
20
19
|
LOG_FILE_SUFFIX = "txt"
|
|
21
20
|
ARCHIVE_FILE_SUFFIX = "tar.gz"
|
|
@@ -191,39 +190,93 @@ class BaseStorage(object):
|
|
|
191
190
|
:return: Absolute filepath to stored Object
|
|
192
191
|
:rtype str
|
|
193
192
|
"""
|
|
194
|
-
# null ref given
|
|
195
193
|
if not reference:
|
|
196
194
|
if required:
|
|
197
195
|
raise MissingInputsException(reference)
|
|
198
|
-
|
|
199
|
-
return None
|
|
196
|
+
return None
|
|
200
197
|
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
if self.cache_root:
|
|
210
|
-
if os.path.exists(cached_file):
|
|
211
|
-
logging.info("Get from Cache: {}".format(reference))
|
|
212
|
-
return cached_file
|
|
213
|
-
|
|
214
|
-
if self._is_valid_url(reference):
|
|
215
|
-
# if the file is not in the path, and is a url download it to the cache
|
|
216
|
-
response = urlopen(reference)
|
|
217
|
-
fdata = response.read()
|
|
218
|
-
|
|
219
|
-
with io.open(cached_file, "w+b") as f:
|
|
220
|
-
f.write(fdata)
|
|
221
|
-
logging.info("Get from URL: {}".format(reference))
|
|
198
|
+
fs_protocol = getattr(self.fs.fs, "protocol", None)
|
|
199
|
+
|
|
200
|
+
# Normalize protocol into a tuple of strings
|
|
201
|
+
if isinstance(fs_protocol, str):
|
|
202
|
+
protocols = (fs_protocol,)
|
|
203
|
+
elif isinstance(fs_protocol, (list, tuple)):
|
|
204
|
+
protocols = tuple(fs_protocol)
|
|
222
205
|
else:
|
|
223
|
-
|
|
224
|
-
|
|
206
|
+
protocols = ()
|
|
207
|
+
|
|
208
|
+
enable_etag_cache = (
|
|
209
|
+
self.cache_root
|
|
210
|
+
and not self._is_valid_url(reference)
|
|
211
|
+
and any(p in ("s3", "s3a", "az", "abfs", "abfss") for p in protocols)
|
|
212
|
+
)
|
|
225
213
|
|
|
226
|
-
return
|
|
214
|
+
# No cache root configured, just return data
|
|
215
|
+
if not enable_etag_cache:
|
|
216
|
+
if not no_cache_target:
|
|
217
|
+
raise OasisException("Error: caching disabled for this filesystem and no_cache_target not provided")
|
|
218
|
+
Path(no_cache_target).parent.mkdir(parents=True, exist_ok=True)
|
|
219
|
+
if self._is_valid_url(reference):
|
|
220
|
+
with urlopen(reference, timeout=30) as r:
|
|
221
|
+
data = r.read()
|
|
222
|
+
with open(no_cache_target, "wb") as f:
|
|
223
|
+
f.write(data)
|
|
224
|
+
logging.info("Get from URL: {}".format(reference))
|
|
225
|
+
else:
|
|
226
|
+
self.fs.get(reference, no_cache_target, recursive=True)
|
|
227
|
+
logging.info("Get from Filestore: {}".format(reference))
|
|
228
|
+
return no_cache_target
|
|
229
|
+
|
|
230
|
+
# Caching enabled
|
|
231
|
+
# Get metadata
|
|
232
|
+
try:
|
|
233
|
+
info = self.fs.info(reference)
|
|
234
|
+
except FileNotFoundError:
|
|
235
|
+
if required:
|
|
236
|
+
raise MissingInputsException(reference)
|
|
237
|
+
return None
|
|
238
|
+
|
|
239
|
+
# Raise error if type is not file
|
|
240
|
+
if info.get("type") == "directory":
|
|
241
|
+
raise OasisException(f"Directories are not supported in get_from_cache: {reference}")
|
|
242
|
+
|
|
243
|
+
remote_etag = info.get("ETag") or info.get("etag")
|
|
244
|
+
if remote_etag is None:
|
|
245
|
+
self.logger.warning(f"ETag missing for {reference} — skipping cache and returning fresh download")
|
|
246
|
+
if no_cache_target is not None:
|
|
247
|
+
dest_path = no_cache_target
|
|
248
|
+
Path(dest_path).parent.mkdir(parents=True, exist_ok=True)
|
|
249
|
+
else:
|
|
250
|
+
tmp = tempfile.NamedTemporaryFile(delete=False)
|
|
251
|
+
dest_path = tmp.name
|
|
252
|
+
tmp.close()
|
|
253
|
+
with self.fs.open(reference, "rb") as src, open(dest_path, "wb") as out:
|
|
254
|
+
shutil.copyfileobj(src, out)
|
|
255
|
+
return dest_path
|
|
256
|
+
|
|
257
|
+
# Create Cache dir
|
|
258
|
+
content_dir = Path(self.cache_root)
|
|
259
|
+
content_dir.mkdir(parents=True, exist_ok=True)
|
|
260
|
+
|
|
261
|
+
# Create reference hash for fast lookup
|
|
262
|
+
ref_hash = xxhash.xxh64(reference.encode()).hexdigest()
|
|
263
|
+
file_dir = content_dir / ref_hash
|
|
264
|
+
file_dir.mkdir(parents=True, exist_ok=True)
|
|
265
|
+
file_path = file_dir / "data"
|
|
266
|
+
etag_path = file_dir / "etag"
|
|
267
|
+
|
|
268
|
+
# Return if etag matches
|
|
269
|
+
if file_path.exists() and etag_path.exists():
|
|
270
|
+
cached_etag = etag_path.read_text()
|
|
271
|
+
if cached_etag == remote_etag:
|
|
272
|
+
return str(file_path)
|
|
273
|
+
|
|
274
|
+
# Redownload data and write etag
|
|
275
|
+
with self.fs.open(reference, "rb") as f, open(file_path, "wb") as out:
|
|
276
|
+
shutil.copyfileobj(f, out)
|
|
277
|
+
etag_path.write_text(remote_etag)
|
|
278
|
+
|
|
279
|
+
return str(file_path)
|
|
227
280
|
|
|
228
281
|
def get(self, reference, output_path="", subdir="", required=False):
|
|
229
282
|
"""Retrieve stored object and stores it in the output path
|
|
@@ -386,7 +439,7 @@ class BaseStorage(object):
|
|
|
386
439
|
self._fs = StrictRootDirFs(
|
|
387
440
|
path=self.root_dir,
|
|
388
441
|
fs=(
|
|
389
|
-
self.fsspec_filesystem_class(**self.get_fsspec_storage_options())
|
|
442
|
+
self.fsspec_filesystem_class(**self.get_fsspec_storage_options(), asynchronous=False)
|
|
390
443
|
if self.fsspec_filesystem_class
|
|
391
444
|
else None
|
|
392
445
|
),
|
|
@@ -1,12 +1,9 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: oasis-data-manager
|
|
3
|
-
Version: 0.
|
|
4
|
-
Summary: UNKNOWN
|
|
3
|
+
Version: 0.2.0
|
|
5
4
|
Home-page: https://github.com/OasisLMF/OasisDataManager
|
|
6
5
|
Author: Oasis LMF
|
|
7
6
|
Author-email: support@oasislmf.org
|
|
8
|
-
License: UNKNOWN
|
|
9
|
-
Platform: UNKNOWN
|
|
10
7
|
Classifier: Development Status :: 4 - Beta
|
|
11
8
|
Classifier: Operating System :: OS Independent
|
|
12
9
|
Classifier: Programming Language :: Python
|
|
@@ -16,7 +13,8 @@ Description-Content-Type: text/markdown
|
|
|
16
13
|
Requires-Dist: fastparquet
|
|
17
14
|
Requires-Dist: fsspec>=2023.12.2
|
|
18
15
|
Requires-Dist: pandas
|
|
19
|
-
Requires-Dist:
|
|
16
|
+
Requires-Dist: typing_extensions
|
|
17
|
+
Requires-Dist: xxhash
|
|
20
18
|
Provides-Extra: extra
|
|
21
19
|
Requires-Dist: adlfs; extra == "extra"
|
|
22
20
|
Requires-Dist: boto3; extra == "extra"
|
|
@@ -27,6 +25,12 @@ Requires-Dist: distributed; extra == "extra"
|
|
|
27
25
|
Requires-Dist: geopandas==0.14.4; extra == "extra"
|
|
28
26
|
Requires-Dist: pyogrio; extra == "extra"
|
|
29
27
|
Requires-Dist: s3fs>=2023.12.2; extra == "extra"
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
28
|
+
Dynamic: author
|
|
29
|
+
Dynamic: author-email
|
|
30
|
+
Dynamic: classifier
|
|
31
|
+
Dynamic: description-content-type
|
|
32
|
+
Dynamic: home-page
|
|
33
|
+
Dynamic: keywords
|
|
34
|
+
Dynamic: provides-extra
|
|
35
|
+
Dynamic: requires-dist
|
|
36
|
+
Dynamic: requires-python
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
oasis_data_manager/__init__.py,sha256=
|
|
1
|
+
oasis_data_manager/__init__.py,sha256=FVHPBGkfhbQDi_z3v0PiKJrXXqXOx0vGW_1VaqNJi7U,22
|
|
2
2
|
oasis_data_manager/config.py,sha256=_qx2Mu5n0Jx3W5SKCiqLr1SPdWLrbFv_B82r6Eosp_k,534
|
|
3
3
|
oasis_data_manager/complex/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
4
|
oasis_data_manager/complex/complex.py,sha256=8oomV9WyLsa8sz8aMzlwv4naKCGOL3UdSlYQJxUFqCk,5382
|
|
@@ -19,10 +19,10 @@ oasis_data_manager/filestore/filestore.py,sha256=eaQGAer7Q9KM4B3bq9WmZAtjFdj9aRe
|
|
|
19
19
|
oasis_data_manager/filestore/log.py,sha256=8l54LoOJiOG2pr4o93LzMocjH7dHcsOp14JWJ_MrqHQ,693
|
|
20
20
|
oasis_data_manager/filestore/backends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
21
21
|
oasis_data_manager/filestore/backends/aws_s3.py,sha256=PhzHHoAHDnBAskVfAm3Vj_18GZSyl9D0DF7qCod9_Ok,10567
|
|
22
|
-
oasis_data_manager/filestore/backends/azure_abfs.py,sha256=
|
|
23
|
-
oasis_data_manager/filestore/backends/base.py,sha256=
|
|
22
|
+
oasis_data_manager/filestore/backends/azure_abfs.py,sha256=gSB0p3PdR9jV8TY3mqpM-Vveg9TOOOXr91qLXDaVxCs,5239
|
|
23
|
+
oasis_data_manager/filestore/backends/base.py,sha256=mKi0IWWjzEsjy4o3KijFe58F2Yr1X9-SNJ8j13u1dhQ,15792
|
|
24
24
|
oasis_data_manager/filestore/backends/local.py,sha256=MEX_CvwhsDfv9lvBjc8CdaDXaN53l9onQHmOgKjoJcg,1242
|
|
25
|
-
oasis_data_manager-0.
|
|
26
|
-
oasis_data_manager-0.
|
|
27
|
-
oasis_data_manager-0.
|
|
28
|
-
oasis_data_manager-0.
|
|
25
|
+
oasis_data_manager-0.2.0.dist-info/METADATA,sha256=Epm0t_ZiKUi1kscNeEQEt-imBtw5GshejFGBaAl945E,1178
|
|
26
|
+
oasis_data_manager-0.2.0.dist-info/WHEEL,sha256=AeO2BvogYWm3eGaHCvhzmUYt8ia7KfURiHzO_1atlys,109
|
|
27
|
+
oasis_data_manager-0.2.0.dist-info/top_level.txt,sha256=qMC39T9UvDCPbNJLVtgu8h6f7c4KJYel7SnIpz62wsU,19
|
|
28
|
+
oasis_data_manager-0.2.0.dist-info/RECORD,,
|
|
File without changes
|