oasis-data-manager 0.1.6__py2.py3-none-any.whl → 0.2.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- __version__ = '0.1.6'
1
+ __version__ = '0.2.0'
@@ -114,24 +114,21 @@ class AzureABFSStorage(BaseStorage):
114
114
  def connection_string(self):
115
115
  if self._connection_string:
116
116
  return self._connection_string
117
- else:
118
- fsspec_storage_options = {
119
- "anon": not self.account_key,
120
- "account_name": self.account_name,
121
- "account_key": self.account_key,
122
- "use_ssl": self.azure_ssl,
123
- }
124
- fs = self.fsspec_filesystem_class(**fsspec_storage_options)
125
-
126
- cs = ""
127
- if self.endpoint_url:
128
- cs += f"BlobEndpoint={self.endpoint_url};"
129
- if fs.account_name:
130
- cs += f"AccountName={fs.account_name};"
131
- if fs.account_key:
132
- cs += f"AccountKey={fs.account_key};"
133
-
134
- return cs
117
+
118
+ cs_parts = [
119
+ f"DefaultEndpointsProtocol={'https' if self.azure_ssl else 'http'}",
120
+ f"AccountName={self.account_name}",
121
+ f"AccountKey={self.account_key}"
122
+ ]
123
+
124
+ if self.endpoint_url:
125
+ # Azurite requires the account name in the endpoint path
126
+ endpoint = self.endpoint_url.rstrip('/')
127
+ if self.account_name not in endpoint:
128
+ endpoint = f"{endpoint}/{self.account_name}"
129
+ cs_parts.append(f"BlobEndpoint={endpoint};")
130
+
131
+ return ";".join(cs_parts)
135
132
 
136
133
  def get_storage_url(self, filename=None, suffix="tar.gz", encode_params=True):
137
134
  filename = (
@@ -1,6 +1,4 @@
1
- import base64
2
1
  import contextlib
3
- import io
4
2
  import logging
5
3
  import os
6
4
  import shutil
@@ -16,6 +14,7 @@ import fsspec
16
14
  from fsspec.implementations.dirfs import DirFileSystem
17
15
 
18
16
  from oasis_data_manager.errors import OasisException
17
+ import xxhash
19
18
 
20
19
  LOG_FILE_SUFFIX = "txt"
21
20
  ARCHIVE_FILE_SUFFIX = "tar.gz"
@@ -191,39 +190,93 @@ class BaseStorage(object):
191
190
  :return: Absolute filepath to stored Object
192
191
  :rtype str
193
192
  """
194
- # null ref given
195
193
  if not reference:
196
194
  if required:
197
195
  raise MissingInputsException(reference)
198
- else:
199
- return None
196
+ return None
200
197
 
201
- # check if the file is in the cache, if so return that path
202
- cache_filename = base64.b64encode(reference.encode()).decode()
203
- if self.cache_root:
204
- cached_file = os.path.join(self.cache_root, cache_filename)
205
- else:
206
- os.makedirs(os.path.dirname(no_cache_target), exist_ok=True)
207
- cached_file = no_cache_target
208
-
209
- if self.cache_root:
210
- if os.path.exists(cached_file):
211
- logging.info("Get from Cache: {}".format(reference))
212
- return cached_file
213
-
214
- if self._is_valid_url(reference):
215
- # if the file is not in the path, and is a url download it to the cache
216
- response = urlopen(reference)
217
- fdata = response.read()
218
-
219
- with io.open(cached_file, "w+b") as f:
220
- f.write(fdata)
221
- logging.info("Get from URL: {}".format(reference))
198
+ fs_protocol = getattr(self.fs.fs, "protocol", None)
199
+
200
+ # Normalize protocol into a tuple of strings
201
+ if isinstance(fs_protocol, str):
202
+ protocols = (fs_protocol,)
203
+ elif isinstance(fs_protocol, (list, tuple)):
204
+ protocols = tuple(fs_protocol)
222
205
  else:
223
- # otherwise get it from the storage and add it to the cache
224
- self.fs.get(reference, cached_file, recursive=True)
206
+ protocols = ()
207
+
208
+ enable_etag_cache = (
209
+ self.cache_root
210
+ and not self._is_valid_url(reference)
211
+ and any(p in ("s3", "s3a", "az", "abfs", "abfss") for p in protocols)
212
+ )
225
213
 
226
- return cached_file
214
+ # No cache root configured, just return data
215
+ if not enable_etag_cache:
216
+ if not no_cache_target:
217
+ raise OasisException("Error: caching disabled for this filesystem and no_cache_target not provided")
218
+ Path(no_cache_target).parent.mkdir(parents=True, exist_ok=True)
219
+ if self._is_valid_url(reference):
220
+ with urlopen(reference, timeout=30) as r:
221
+ data = r.read()
222
+ with open(no_cache_target, "wb") as f:
223
+ f.write(data)
224
+ logging.info("Get from URL: {}".format(reference))
225
+ else:
226
+ self.fs.get(reference, no_cache_target, recursive=True)
227
+ logging.info("Get from Filestore: {}".format(reference))
228
+ return no_cache_target
229
+
230
+ # Caching enabled
231
+ # Get metadata
232
+ try:
233
+ info = self.fs.info(reference)
234
+ except FileNotFoundError:
235
+ if required:
236
+ raise MissingInputsException(reference)
237
+ return None
238
+
239
+ # Raise error if type is not file
240
+ if info.get("type") == "directory":
241
+ raise OasisException(f"Directories are not supported in get_from_cache: {reference}")
242
+
243
+ remote_etag = info.get("ETag") or info.get("etag")
244
+ if remote_etag is None:
245
+ self.logger.warning(f"ETag missing for {reference} — skipping cache and returning fresh download")
246
+ if no_cache_target is not None:
247
+ dest_path = no_cache_target
248
+ Path(dest_path).parent.mkdir(parents=True, exist_ok=True)
249
+ else:
250
+ tmp = tempfile.NamedTemporaryFile(delete=False)
251
+ dest_path = tmp.name
252
+ tmp.close()
253
+ with self.fs.open(reference, "rb") as src, open(dest_path, "wb") as out:
254
+ shutil.copyfileobj(src, out)
255
+ return dest_path
256
+
257
+ # Create Cache dir
258
+ content_dir = Path(self.cache_root)
259
+ content_dir.mkdir(parents=True, exist_ok=True)
260
+
261
+ # Create reference hash for fast lookup
262
+ ref_hash = xxhash.xxh64(reference.encode()).hexdigest()
263
+ file_dir = content_dir / ref_hash
264
+ file_dir.mkdir(parents=True, exist_ok=True)
265
+ file_path = file_dir / "data"
266
+ etag_path = file_dir / "etag"
267
+
268
+ # Return if etag matches
269
+ if file_path.exists() and etag_path.exists():
270
+ cached_etag = etag_path.read_text()
271
+ if cached_etag == remote_etag:
272
+ return str(file_path)
273
+
274
+ # Redownload data and write etag
275
+ with self.fs.open(reference, "rb") as f, open(file_path, "wb") as out:
276
+ shutil.copyfileobj(f, out)
277
+ etag_path.write_text(remote_etag)
278
+
279
+ return str(file_path)
227
280
 
228
281
  def get(self, reference, output_path="", subdir="", required=False):
229
282
  """Retrieve stored object and stores it in the output path
@@ -386,7 +439,7 @@ class BaseStorage(object):
386
439
  self._fs = StrictRootDirFs(
387
440
  path=self.root_dir,
388
441
  fs=(
389
- self.fsspec_filesystem_class(**self.get_fsspec_storage_options())
442
+ self.fsspec_filesystem_class(**self.get_fsspec_storage_options(), asynchronous=False)
390
443
  if self.fsspec_filesystem_class
391
444
  else None
392
445
  ),
@@ -1,12 +1,9 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: oasis-data-manager
3
- Version: 0.1.6
4
- Summary: UNKNOWN
3
+ Version: 0.2.0
5
4
  Home-page: https://github.com/OasisLMF/OasisDataManager
6
5
  Author: Oasis LMF
7
6
  Author-email: support@oasislmf.org
8
- License: UNKNOWN
9
- Platform: UNKNOWN
10
7
  Classifier: Development Status :: 4 - Beta
11
8
  Classifier: Operating System :: OS Independent
12
9
  Classifier: Programming Language :: Python
@@ -16,7 +13,8 @@ Description-Content-Type: text/markdown
16
13
  Requires-Dist: fastparquet
17
14
  Requires-Dist: fsspec>=2023.12.2
18
15
  Requires-Dist: pandas
19
- Requires-Dist: typing-extensions
16
+ Requires-Dist: typing_extensions
17
+ Requires-Dist: xxhash
20
18
  Provides-Extra: extra
21
19
  Requires-Dist: adlfs; extra == "extra"
22
20
  Requires-Dist: boto3; extra == "extra"
@@ -27,6 +25,12 @@ Requires-Dist: distributed; extra == "extra"
27
25
  Requires-Dist: geopandas==0.14.4; extra == "extra"
28
26
  Requires-Dist: pyogrio; extra == "extra"
29
27
  Requires-Dist: s3fs>=2023.12.2; extra == "extra"
30
-
31
- UNKNOWN
32
-
28
+ Dynamic: author
29
+ Dynamic: author-email
30
+ Dynamic: classifier
31
+ Dynamic: description-content-type
32
+ Dynamic: home-page
33
+ Dynamic: keywords
34
+ Dynamic: provides-extra
35
+ Dynamic: requires-dist
36
+ Dynamic: requires-python
@@ -1,4 +1,4 @@
1
- oasis_data_manager/__init__.py,sha256=gW5NUxwGdPsiQjn0cOuuQT11pfthByI5DITDg_HMhLQ,22
1
+ oasis_data_manager/__init__.py,sha256=FVHPBGkfhbQDi_z3v0PiKJrXXqXOx0vGW_1VaqNJi7U,22
2
2
  oasis_data_manager/config.py,sha256=_qx2Mu5n0Jx3W5SKCiqLr1SPdWLrbFv_B82r6Eosp_k,534
3
3
  oasis_data_manager/complex/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  oasis_data_manager/complex/complex.py,sha256=8oomV9WyLsa8sz8aMzlwv4naKCGOL3UdSlYQJxUFqCk,5382
@@ -19,10 +19,10 @@ oasis_data_manager/filestore/filestore.py,sha256=eaQGAer7Q9KM4B3bq9WmZAtjFdj9aRe
19
19
  oasis_data_manager/filestore/log.py,sha256=8l54LoOJiOG2pr4o93LzMocjH7dHcsOp14JWJ_MrqHQ,693
20
20
  oasis_data_manager/filestore/backends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
21
  oasis_data_manager/filestore/backends/aws_s3.py,sha256=PhzHHoAHDnBAskVfAm3Vj_18GZSyl9D0DF7qCod9_Ok,10567
22
- oasis_data_manager/filestore/backends/azure_abfs.py,sha256=IFHByChXB1AHbesCHZUHqIqbPvWi7X06rzREIc0BKeo,5315
23
- oasis_data_manager/filestore/backends/base.py,sha256=O-JmKpUKiXIsqR_PBmMT4_NNBYMzsp9r04q8u4iWjp0,13597
22
+ oasis_data_manager/filestore/backends/azure_abfs.py,sha256=gSB0p3PdR9jV8TY3mqpM-Vveg9TOOOXr91qLXDaVxCs,5239
23
+ oasis_data_manager/filestore/backends/base.py,sha256=mKi0IWWjzEsjy4o3KijFe58F2Yr1X9-SNJ8j13u1dhQ,15792
24
24
  oasis_data_manager/filestore/backends/local.py,sha256=MEX_CvwhsDfv9lvBjc8CdaDXaN53l9onQHmOgKjoJcg,1242
25
- oasis_data_manager-0.1.6.dist-info/METADATA,sha256=7Syo6SMizc0Y5Q2MA21J3pMwKfmfi1SoreWDNi-dWsE,1017
26
- oasis_data_manager-0.1.6.dist-info/WHEEL,sha256=Kh9pAotZVRFj97E15yTA4iADqXdQfIVTHcNaZTjxeGM,110
27
- oasis_data_manager-0.1.6.dist-info/top_level.txt,sha256=qMC39T9UvDCPbNJLVtgu8h6f7c4KJYel7SnIpz62wsU,19
28
- oasis_data_manager-0.1.6.dist-info/RECORD,,
25
+ oasis_data_manager-0.2.0.dist-info/METADATA,sha256=Epm0t_ZiKUi1kscNeEQEt-imBtw5GshejFGBaAl945E,1178
26
+ oasis_data_manager-0.2.0.dist-info/WHEEL,sha256=AeO2BvogYWm3eGaHCvhzmUYt8ia7KfURiHzO_1atlys,109
27
+ oasis_data_manager-0.2.0.dist-info/top_level.txt,sha256=qMC39T9UvDCPbNJLVtgu8h6f7c4KJYel7SnIpz62wsU,19
28
+ oasis_data_manager-0.2.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.45.1)
2
+ Generator: setuptools (79.0.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py2-none-any
5
5
  Tag: py3-none-any