fsspec 2024.12.0__py3-none-any.whl → 2025.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
fsspec/_version.py CHANGED
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '2024.12.0'
16
- __version_tuple__ = version_tuple = (2024, 12, 0)
15
+ __version__ = version = '2025.2.0'
16
+ __version_tuple__ = version_tuple = (2025, 2, 0)
fsspec/archive.py CHANGED
@@ -1,3 +1,5 @@
1
+ import operator
2
+
1
3
  from fsspec import AbstractFileSystem
2
4
  from fsspec.utils import tokenize
3
5
 
@@ -67,7 +69,7 @@ class AbstractArchiveFileSystem(AbstractFileSystem):
67
69
  out = {"name": ppath, "size": 0, "type": "directory"}
68
70
  paths[ppath] = out
69
71
  if detail:
70
- out = sorted(paths.values(), key=lambda _: _["name"])
72
+ out = sorted(paths.values(), key=operator.itemgetter("name"))
71
73
  return out
72
74
  else:
73
75
  return sorted(paths)
@@ -57,8 +57,9 @@ class AsyncFileSystemWrapper(AsyncFileSystem):
57
57
  """
58
58
  Wrap all synchronous methods of the underlying filesystem with asynchronous versions.
59
59
  """
60
+ excluded_methods = {"open"}
60
61
  for method_name in dir(self.sync_fs):
61
- if method_name.startswith("_"):
62
+ if method_name.startswith("_") or method_name in excluded_methods:
62
63
  continue
63
64
 
64
65
  attr = inspect.getattr_static(self.sync_fs, method_name)
@@ -412,9 +412,9 @@ class DatabricksFile(AbstractBufferedFile):
412
412
  if block_size is None or block_size == "default":
413
413
  block_size = self.DEFAULT_BLOCK_SIZE
414
414
 
415
- assert (
416
- block_size == self.DEFAULT_BLOCK_SIZE
417
- ), f"Only the default block size is allowed, not {block_size}"
415
+ assert block_size == self.DEFAULT_BLOCK_SIZE, (
416
+ f"Only the default block size is allowed, not {block_size}"
417
+ )
418
418
 
419
419
  super().__init__(
420
420
  fs,
@@ -10,6 +10,7 @@ from itertools import chain
10
10
  from typing import TYPE_CHECKING, Literal
11
11
 
12
12
  import fsspec.core
13
+ from fsspec.spec import AbstractBufferedFile
13
14
 
14
15
  try:
15
16
  import ujson as json
@@ -394,10 +395,14 @@ class LazyReferenceMapper(collections.abc.MutableMapping):
394
395
  self.write(field, record)
395
396
  else:
396
397
  # metadata or top-level
397
- self._items[key] = value
398
- new_value = json.loads(
399
- value.decode() if isinstance(value, bytes) else value
400
- )
398
+ if hasattr(value, "to_bytes"):
399
+ val = value.to_bytes().decode()
400
+ elif isinstance(value, bytes):
401
+ val = value.decode()
402
+ else:
403
+ val = value
404
+ self._items[key] = val
405
+ new_value = json.loads(val)
401
406
  self.zmetadata[key] = {**self.zmetadata.get(key, {}), **new_value}
402
407
 
403
408
  @staticmethod
@@ -595,8 +600,7 @@ class ReferenceFileSystem(AsyncFileSystem):
595
600
  async, and must allow start and end args in _cat_file. Later versions
596
601
  may allow multiple arbitrary URLs for the targets.
597
602
  This FileSystem is read-only. It is designed to be used with async
598
- targets (for now). This FileSystem only allows whole-file access, no
599
- ``open``. We do not get original file details from the target FS.
603
+ targets (for now). We do not get original file details from the target FS.
600
604
  Configuration is by passing a dict of references at init, or a URL to
601
605
  a JSON file containing the same; this dict
602
606
  can also contain concrete data for some set of paths.
@@ -606,6 +610,7 @@ class ReferenceFileSystem(AsyncFileSystem):
606
610
  """
607
611
 
608
612
  protocol = "reference"
613
+ cachable = False
609
614
 
610
615
  def __init__(
611
616
  self,
@@ -762,6 +767,11 @@ class ReferenceFileSystem(AsyncFileSystem):
762
767
  for k, f in self.fss.items():
763
768
  if not f.async_impl:
764
769
  self.fss[k] = AsyncFileSystemWrapper(f)
770
+ elif self.asynchronous ^ f.asynchronous:
771
+ raise ValueError(
772
+ "Reference-FS's target filesystem must have same value"
773
+ "of asynchronous"
774
+ )
765
775
 
766
776
  def _cat_common(self, path, start=None, end=None):
767
777
  path = self._strip_protocol(path)
@@ -772,6 +782,8 @@ class ReferenceFileSystem(AsyncFileSystem):
772
782
  raise FileNotFoundError(path) from exc
773
783
  if isinstance(part, str):
774
784
  part = part.encode()
785
+ if hasattr(part, "to_bytes"):
786
+ part = part.to_bytes()
775
787
  if isinstance(part, bytes):
776
788
  logger.debug(f"Reference: {path}, type bytes")
777
789
  if part.startswith(b"base64:"):
@@ -1073,7 +1085,7 @@ class ReferenceFileSystem(AsyncFileSystem):
1073
1085
  self.dircache = {"": []}
1074
1086
  it = self.references.items()
1075
1087
  for path, part in it:
1076
- if isinstance(part, (bytes, str)):
1088
+ if isinstance(part, (bytes, str)) or hasattr(part, "to_bytes"):
1077
1089
  size = len(part)
1078
1090
  elif len(part) == 1:
1079
1091
  size = None
@@ -1100,10 +1112,33 @@ class ReferenceFileSystem(AsyncFileSystem):
1100
1112
  self.dircache[par].append({"name": path, "type": "file", "size": size})
1101
1113
 
1102
1114
  def _open(self, path, mode="rb", block_size=None, cache_options=None, **kwargs):
1103
- data = self.cat_file(path) # load whole chunk into memory
1104
- return io.BytesIO(data)
1115
+ part_or_url, start0, end0 = self._cat_common(path)
1116
+ # This logic is kept outside `ReferenceFile` to avoid unnecessary redirection.
1117
+ # That does mean `_cat_common` gets called twice if it eventually reaches `ReferenceFile`.
1118
+ if isinstance(part_or_url, bytes):
1119
+ return io.BytesIO(part_or_url[start0:end0])
1120
+
1121
+ protocol, _ = split_protocol(part_or_url)
1122
+ if start0 is None and end0 is None:
1123
+ return self.fss[protocol]._open(
1124
+ part_or_url,
1125
+ mode,
1126
+ block_size=block_size,
1127
+ cache_options=cache_options,
1128
+ **kwargs,
1129
+ )
1130
+
1131
+ return ReferenceFile(
1132
+ self,
1133
+ path,
1134
+ mode,
1135
+ block_size=block_size,
1136
+ cache_options=cache_options,
1137
+ **kwargs,
1138
+ )
1105
1139
 
1106
1140
  def ls(self, path, detail=True, **kwargs):
1141
+ logger.debug("list %s", path)
1107
1142
  path = self._strip_protocol(path)
1108
1143
  if isinstance(self.references, LazyReferenceMapper):
1109
1144
  try:
@@ -1214,3 +1249,58 @@ class ReferenceFileSystem(AsyncFileSystem):
1214
1249
  out[k] = v
1215
1250
  with fsspec.open(url, "wb", **storage_options) as f:
1216
1251
  f.write(json.dumps({"version": 1, "refs": out}).encode())
1252
+
1253
+
1254
+ class ReferenceFile(AbstractBufferedFile):
1255
+ def __init__(
1256
+ self,
1257
+ fs,
1258
+ path,
1259
+ mode="rb",
1260
+ block_size="default",
1261
+ autocommit=True,
1262
+ cache_type="readahead",
1263
+ cache_options=None,
1264
+ size=None,
1265
+ **kwargs,
1266
+ ):
1267
+ super().__init__(
1268
+ fs,
1269
+ path,
1270
+ mode=mode,
1271
+ block_size=block_size,
1272
+ autocommit=autocommit,
1273
+ size=size,
1274
+ cache_type=cache_type,
1275
+ cache_options=cache_options,
1276
+ **kwargs,
1277
+ )
1278
+ part_or_url, self.start, self.end = self.fs._cat_common(self.path)
1279
+ protocol, _ = split_protocol(part_or_url)
1280
+ self.src_fs = self.fs.fss[protocol]
1281
+ self.src_path = part_or_url
1282
+ self._f = None
1283
+
1284
+ @property
1285
+ def f(self):
1286
+ if self._f is None or self._f.closed:
1287
+ self._f = self.src_fs._open(
1288
+ self.src_path,
1289
+ mode=self.mode,
1290
+ block_size=self.blocksize,
1291
+ autocommit=self.autocommit,
1292
+ cache_type="none",
1293
+ **self.kwargs,
1294
+ )
1295
+ return self._f
1296
+
1297
+ def close(self):
1298
+ if self._f is not None:
1299
+ self._f.close()
1300
+ return super().close()
1301
+
1302
+ def _fetch_range(self, start, end):
1303
+ start = start + self.start
1304
+ end = min(end + self.start, self.end)
1305
+ self.f.seek(start)
1306
+ return self.f.read(end - start)
fsspec/registry.py CHANGED
@@ -218,9 +218,9 @@ known_implementations = {
218
218
  "zip": {"class": "fsspec.implementations.zip.ZipFileSystem"},
219
219
  }
220
220
 
221
- assert list(known_implementations) == sorted(
222
- known_implementations
223
- ), "Not in alphabetical order"
221
+ assert list(known_implementations) == sorted(known_implementations), (
222
+ "Not in alphabetical order"
223
+ )
224
224
 
225
225
 
226
226
  def get_filesystem_class(protocol):
fsspec/spec.py CHANGED
@@ -382,7 +382,7 @@ class AbstractFileSystem(metaclass=_Cached):
382
382
  pass
383
383
 
384
384
  def walk(self, path, maxdepth=None, topdown=True, on_error="omit", **kwargs):
385
- """Return all files belows path
385
+ """Return all files under the given path.
386
386
 
387
387
  List all files, recursing into subdirectories; output is iterator-style,
388
388
  like ``os.walk()``. For a simple list of files, ``find()`` is available.
@@ -2131,7 +2131,7 @@ class AbstractBufferedFile(io.IOBase):
2131
2131
  return b"".join(out)
2132
2132
 
2133
2133
  def readline(self):
2134
- """Read until first occurrence of newline character
2134
+ """Read until and including the first occurrence of newline character
2135
2135
 
2136
2136
  Note that, because of character encoding, this is not necessarily a
2137
2137
  true line ending.
@@ -2148,7 +2148,7 @@ class AbstractBufferedFile(io.IOBase):
2148
2148
  return self
2149
2149
 
2150
2150
  def readlines(self):
2151
- """Return all data, split by the newline character"""
2151
+ """Return all data, split by the newline character, including the newline character"""
2152
2152
  data = self.read()
2153
2153
  lines = data.split(b"\n")
2154
2154
  out = [l + b"\n" for l in lines[:-1]]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fsspec
3
- Version: 2024.12.0
3
+ Version: 2025.2.0
4
4
  Summary: File-system specification
5
5
  Project-URL: Changelog, https://filesystem-spec.readthedocs.io/en/latest/changelog.html
6
6
  Project-URL: Documentation, https://filesystem-spec.readthedocs.io/en/latest/
@@ -131,7 +131,6 @@ Requires-Dist: pytest-rerunfailures; extra == 'test'
131
131
  Requires-Dist: requests; extra == 'test'
132
132
  Provides-Extra: test-downstream
133
133
  Requires-Dist: aiobotocore<3.0.0,>=2.5.4; extra == 'test-downstream'
134
- Requires-Dist: dask-expr; extra == 'test-downstream'
135
134
  Requires-Dist: dask[dataframe,test]; extra == 'test-downstream'
136
135
  Requires-Dist: moto[server]<5,>4; extra == 'test-downstream'
137
136
  Requires-Dist: pytest-timeout; extra == 'test-downstream'
@@ -1,6 +1,6 @@
1
1
  fsspec/__init__.py,sha256=l9MJaNNV2d4wKpCtMvXDr55n92DkdrAayGy3F9ICjzk,1998
2
- fsspec/_version.py,sha256=wQ2VhCCZZrkDgAic2RGrr4PbmFQdFL4PTIWVnK3r9tM,419
3
- fsspec/archive.py,sha256=S__DzfZj-urAN3tp2W6jJ6YDiXG1fAl7FjvWUN73qIE,2386
2
+ fsspec/_version.py,sha256=IE7d_vZlkju9WTb8xdQYMiqPyQOYnfC9HN9w8nHfkrY,417
3
+ fsspec/archive.py,sha256=vM6t_lgV6lBWbBYwpm3S4ofBQFQxUPr5KkDQrrQcQro,2411
4
4
  fsspec/asyn.py,sha256=rsnCsFUmBZmKJqg9m-IDWInoQtE4wV0rGDZEXZwuU3c,36500
5
5
  fsspec/caching.py,sha256=oHVy9zpy4Oqk5f1t3-Q31bbw0tsmfddGGKLJs__OdKA,32790
6
6
  fsspec/callbacks.py,sha256=BDIwLzK6rr_0V5ch557fSzsivCElpdqhXr5dZ9Te-EE,9210
@@ -16,19 +16,19 @@ fsspec/gui.py,sha256=xBnHL2-r0LVwhDAtnHoPpXts7jd4Z32peawCJiI-7lI,13975
16
16
  fsspec/json.py,sha256=65sQ0Y7mTj33u_Y4IId5up4abQ3bAel4E4QzbKMiQSg,3826
17
17
  fsspec/mapping.py,sha256=m2ndB_gtRBXYmNJg0Ie1-BVR75TFleHmIQBzC-yWhjU,8343
18
18
  fsspec/parquet.py,sha256=6ibAmG527L5JNFS0VO8BDNlxHdA3bVYqdByeiFgpUVM,19448
19
- fsspec/registry.py,sha256=A2r3PiZd17192sGHLwWNFbK8RFiDA7gSbfboIJ07wTY,11471
20
- fsspec/spec.py,sha256=d_NY5YVuwV7YCRduKkaR_z8B9GUna4-H9mOinymEMFY,75971
19
+ fsspec/registry.py,sha256=QFyMiUV6fnksETJuapNplf6YjkNRIdHSOyd95IqPZe8,11473
20
+ fsspec/spec.py,sha256=l7ZEbgLsnrFuS-yrGl9re6ia1Yts1_10RqGV_mT-5P8,76032
21
21
  fsspec/transaction.py,sha256=xliRG6U2Zf3khG4xcw9WiB-yAoqJSHEGK_VjHOdtgo0,2398
22
22
  fsspec/utils.py,sha256=A11t25RnpiQ30RO6xeR0Qqlu3fGj8bnc40jg08tlYSI,22980
23
23
  fsspec/implementations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
24
  fsspec/implementations/arrow.py,sha256=721Dikne_lV_0tlgk9jyKmHL6W-5MT0h2LKGvOYQTPI,8623
25
- fsspec/implementations/asyn_wrapper.py,sha256=cXfSkF2AaboInIIA_6jmB796RP_BXd8u08loPAHQsxQ,2864
25
+ fsspec/implementations/asyn_wrapper.py,sha256=gmLy2voDAH9KRxhvd24UDPiOqX_NCK-3JY9rMX7R6Is,2935
26
26
  fsspec/implementations/cache_mapper.py,sha256=W4wlxyPxZbSp9ItJ0pYRVBMh6bw9eFypgP6kUYuuiI4,2421
27
27
  fsspec/implementations/cache_metadata.py,sha256=pcOJYcBQY5OaC7Yhw0F3wjg08QLYApGmoISCrbs59ks,8511
28
28
  fsspec/implementations/cached.py,sha256=KA6c4jqrGeeg8WNPLsh8FkL3KeRAQtGLzKw18vSF1CI,32820
29
29
  fsspec/implementations/dask.py,sha256=CXZbJzIVOhKV8ILcxuy3bTvcacCueAbyQxmvAkbPkrk,4466
30
30
  fsspec/implementations/data.py,sha256=LDLczxRh8h7x39Zjrd-GgzdQHr78yYxDlrv2C9Uxb5E,1658
31
- fsspec/implementations/dbfs.py,sha256=a0eNjLxyfFK7pbEa52U8K-PhNHukzdGVx1eLcVniaXY,15092
31
+ fsspec/implementations/dbfs.py,sha256=XwpotuS_ncz3XK1dkUteww9GnTja7HoY91c0m4GUfwI,15092
32
32
  fsspec/implementations/dirfs.py,sha256=ymakitNNQ07tW76EShyw3rC9RvIDHl4gtuOhE_h1vUg,12032
33
33
  fsspec/implementations/ftp.py,sha256=sorsczLp_2J3ukONsbZY-11sRZP6H5a3V7XXf6o6ip0,11936
34
34
  fsspec/implementations/git.py,sha256=4SElW9U5d3k3_ITlvUAx59Yk7XLNRTqkGa2C3hCUkWM,3754
@@ -38,7 +38,7 @@ fsspec/implementations/jupyter.py,sha256=B2uj7OEm7yIk-vRSsO37_ND0t0EBvn4B-Su43ib
38
38
  fsspec/implementations/libarchive.py,sha256=5_I2DiLXwQ1JC8x-K7jXu-tBwhO9dj7tFLnb0bTnVMQ,7102
39
39
  fsspec/implementations/local.py,sha256=YvR9b2MndSQIHszAMUkFvN65eWVbIfoGJJjAeS43ZS4,15259
40
40
  fsspec/implementations/memory.py,sha256=cLNrK9wk97sl4Tre9uVDXWj6mEHvvVVIgaVgNA5KVIg,10527
41
- fsspec/implementations/reference.py,sha256=E-XYtnsHpE1e4x2io1ILOioGsWpCLDH1bqYN3QPGUJI,45930
41
+ fsspec/implementations/reference.py,sha256=1VbyjAxq_8xHSQo2UV4ohuuoSAreB3OY4vjK05DnHsY,48646
42
42
  fsspec/implementations/sftp.py,sha256=fMY9XZcmpjszQ2tCqO_TPaJesaeD_Dv7ptYzgUPGoO0,5631
43
43
  fsspec/implementations/smb.py,sha256=5fhu8h06nOLBPh2c48aT7WBRqh9cEcbIwtyu06wTjec,15236
44
44
  fsspec/implementations/tar.py,sha256=dam78Tp_CozybNqCY2JYgGBS3Uc9FuJUAT9oB0lolOs,4111
@@ -52,7 +52,7 @@ fsspec/tests/abstract/mv.py,sha256=k8eUEBIrRrGMsBY5OOaDXdGnQUKGwDIfQyduB6YD3Ns,1
52
52
  fsspec/tests/abstract/open.py,sha256=Fi2PBPYLbRqysF8cFm0rwnB41kMdQVYjq8cGyDXp3BU,329
53
53
  fsspec/tests/abstract/pipe.py,sha256=LFzIrLCB5GLXf9rzFKJmE8AdG7LQ_h4bJo70r8FLPqM,402
54
54
  fsspec/tests/abstract/put.py,sha256=7aih17OKB_IZZh1Mkq1eBDIjobhtMQmI8x-Pw-S_aZk,21201
55
- fsspec-2024.12.0.dist-info/METADATA,sha256=bZRSxMVT7oHOUheJQEDMsChBFuby1pTg8WUbnLYATrc,11801
56
- fsspec-2024.12.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
57
- fsspec-2024.12.0.dist-info/licenses/LICENSE,sha256=LcNUls5TpzB5FcAIqESq1T53K0mzTN0ARFBnaRQH7JQ,1513
58
- fsspec-2024.12.0.dist-info/RECORD,,
55
+ fsspec-2025.2.0.dist-info/METADATA,sha256=GqrJBTBVJNmkbcEfTOamzq2KqYYbc9vRM0jpSWTZT04,11747
56
+ fsspec-2025.2.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
57
+ fsspec-2025.2.0.dist-info/licenses/LICENSE,sha256=LcNUls5TpzB5FcAIqESq1T53K0mzTN0ARFBnaRQH7JQ,1513
58
+ fsspec-2025.2.0.dist-info/RECORD,,