fsspec 2024.12.0__py3-none-any.whl → 2025.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fsspec/_version.py +2 -2
- fsspec/archive.py +3 -1
- fsspec/implementations/asyn_wrapper.py +2 -1
- fsspec/implementations/dbfs.py +3 -3
- fsspec/implementations/reference.py +99 -9
- fsspec/registry.py +3 -3
- fsspec/spec.py +3 -3
- {fsspec-2024.12.0.dist-info → fsspec-2025.2.0.dist-info}/METADATA +1 -2
- {fsspec-2024.12.0.dist-info → fsspec-2025.2.0.dist-info}/RECORD +11 -11
- {fsspec-2024.12.0.dist-info → fsspec-2025.2.0.dist-info}/WHEEL +0 -0
- {fsspec-2024.12.0.dist-info → fsspec-2025.2.0.dist-info}/licenses/LICENSE +0 -0
fsspec/_version.py
CHANGED
|
@@ -12,5 +12,5 @@ __version__: str
|
|
|
12
12
|
__version_tuple__: VERSION_TUPLE
|
|
13
13
|
version_tuple: VERSION_TUPLE
|
|
14
14
|
|
|
15
|
-
__version__ = version = '
|
|
16
|
-
__version_tuple__ = version_tuple = (
|
|
15
|
+
__version__ = version = '2025.2.0'
|
|
16
|
+
__version_tuple__ = version_tuple = (2025, 2, 0)
|
fsspec/archive.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import operator
|
|
2
|
+
|
|
1
3
|
from fsspec import AbstractFileSystem
|
|
2
4
|
from fsspec.utils import tokenize
|
|
3
5
|
|
|
@@ -67,7 +69,7 @@ class AbstractArchiveFileSystem(AbstractFileSystem):
|
|
|
67
69
|
out = {"name": ppath, "size": 0, "type": "directory"}
|
|
68
70
|
paths[ppath] = out
|
|
69
71
|
if detail:
|
|
70
|
-
out = sorted(paths.values(), key=
|
|
72
|
+
out = sorted(paths.values(), key=operator.itemgetter("name"))
|
|
71
73
|
return out
|
|
72
74
|
else:
|
|
73
75
|
return sorted(paths)
|
|
@@ -57,8 +57,9 @@ class AsyncFileSystemWrapper(AsyncFileSystem):
|
|
|
57
57
|
"""
|
|
58
58
|
Wrap all synchronous methods of the underlying filesystem with asynchronous versions.
|
|
59
59
|
"""
|
|
60
|
+
excluded_methods = {"open"}
|
|
60
61
|
for method_name in dir(self.sync_fs):
|
|
61
|
-
if method_name.startswith("_"):
|
|
62
|
+
if method_name.startswith("_") or method_name in excluded_methods:
|
|
62
63
|
continue
|
|
63
64
|
|
|
64
65
|
attr = inspect.getattr_static(self.sync_fs, method_name)
|
fsspec/implementations/dbfs.py
CHANGED
|
@@ -412,9 +412,9 @@ class DatabricksFile(AbstractBufferedFile):
|
|
|
412
412
|
if block_size is None or block_size == "default":
|
|
413
413
|
block_size = self.DEFAULT_BLOCK_SIZE
|
|
414
414
|
|
|
415
|
-
assert (
|
|
416
|
-
|
|
417
|
-
)
|
|
415
|
+
assert block_size == self.DEFAULT_BLOCK_SIZE, (
|
|
416
|
+
f"Only the default block size is allowed, not {block_size}"
|
|
417
|
+
)
|
|
418
418
|
|
|
419
419
|
super().__init__(
|
|
420
420
|
fs,
|
|
@@ -10,6 +10,7 @@ from itertools import chain
|
|
|
10
10
|
from typing import TYPE_CHECKING, Literal
|
|
11
11
|
|
|
12
12
|
import fsspec.core
|
|
13
|
+
from fsspec.spec import AbstractBufferedFile
|
|
13
14
|
|
|
14
15
|
try:
|
|
15
16
|
import ujson as json
|
|
@@ -394,10 +395,14 @@ class LazyReferenceMapper(collections.abc.MutableMapping):
|
|
|
394
395
|
self.write(field, record)
|
|
395
396
|
else:
|
|
396
397
|
# metadata or top-level
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
398
|
+
if hasattr(value, "to_bytes"):
|
|
399
|
+
val = value.to_bytes().decode()
|
|
400
|
+
elif isinstance(value, bytes):
|
|
401
|
+
val = value.decode()
|
|
402
|
+
else:
|
|
403
|
+
val = value
|
|
404
|
+
self._items[key] = val
|
|
405
|
+
new_value = json.loads(val)
|
|
401
406
|
self.zmetadata[key] = {**self.zmetadata.get(key, {}), **new_value}
|
|
402
407
|
|
|
403
408
|
@staticmethod
|
|
@@ -595,8 +600,7 @@ class ReferenceFileSystem(AsyncFileSystem):
|
|
|
595
600
|
async, and must allow start and end args in _cat_file. Later versions
|
|
596
601
|
may allow multiple arbitrary URLs for the targets.
|
|
597
602
|
This FileSystem is read-only. It is designed to be used with async
|
|
598
|
-
targets (for now).
|
|
599
|
-
``open``. We do not get original file details from the target FS.
|
|
603
|
+
targets (for now). We do not get original file details from the target FS.
|
|
600
604
|
Configuration is by passing a dict of references at init, or a URL to
|
|
601
605
|
a JSON file containing the same; this dict
|
|
602
606
|
can also contain concrete data for some set of paths.
|
|
@@ -606,6 +610,7 @@ class ReferenceFileSystem(AsyncFileSystem):
|
|
|
606
610
|
"""
|
|
607
611
|
|
|
608
612
|
protocol = "reference"
|
|
613
|
+
cachable = False
|
|
609
614
|
|
|
610
615
|
def __init__(
|
|
611
616
|
self,
|
|
@@ -762,6 +767,11 @@ class ReferenceFileSystem(AsyncFileSystem):
|
|
|
762
767
|
for k, f in self.fss.items():
|
|
763
768
|
if not f.async_impl:
|
|
764
769
|
self.fss[k] = AsyncFileSystemWrapper(f)
|
|
770
|
+
elif self.asynchronous ^ f.asynchronous:
|
|
771
|
+
raise ValueError(
|
|
772
|
+
"Reference-FS's target filesystem must have same value"
|
|
773
|
+
"of asynchronous"
|
|
774
|
+
)
|
|
765
775
|
|
|
766
776
|
def _cat_common(self, path, start=None, end=None):
|
|
767
777
|
path = self._strip_protocol(path)
|
|
@@ -772,6 +782,8 @@ class ReferenceFileSystem(AsyncFileSystem):
|
|
|
772
782
|
raise FileNotFoundError(path) from exc
|
|
773
783
|
if isinstance(part, str):
|
|
774
784
|
part = part.encode()
|
|
785
|
+
if hasattr(part, "to_bytes"):
|
|
786
|
+
part = part.to_bytes()
|
|
775
787
|
if isinstance(part, bytes):
|
|
776
788
|
logger.debug(f"Reference: {path}, type bytes")
|
|
777
789
|
if part.startswith(b"base64:"):
|
|
@@ -1073,7 +1085,7 @@ class ReferenceFileSystem(AsyncFileSystem):
|
|
|
1073
1085
|
self.dircache = {"": []}
|
|
1074
1086
|
it = self.references.items()
|
|
1075
1087
|
for path, part in it:
|
|
1076
|
-
if isinstance(part, (bytes, str)):
|
|
1088
|
+
if isinstance(part, (bytes, str)) or hasattr(part, "to_bytes"):
|
|
1077
1089
|
size = len(part)
|
|
1078
1090
|
elif len(part) == 1:
|
|
1079
1091
|
size = None
|
|
@@ -1100,10 +1112,33 @@ class ReferenceFileSystem(AsyncFileSystem):
|
|
|
1100
1112
|
self.dircache[par].append({"name": path, "type": "file", "size": size})
|
|
1101
1113
|
|
|
1102
1114
|
def _open(self, path, mode="rb", block_size=None, cache_options=None, **kwargs):
|
|
1103
|
-
|
|
1104
|
-
|
|
1115
|
+
part_or_url, start0, end0 = self._cat_common(path)
|
|
1116
|
+
# This logic is kept outside `ReferenceFile` to avoid unnecessary redirection.
|
|
1117
|
+
# That does mean `_cat_common` gets called twice if it eventually reaches `ReferenceFile`.
|
|
1118
|
+
if isinstance(part_or_url, bytes):
|
|
1119
|
+
return io.BytesIO(part_or_url[start0:end0])
|
|
1120
|
+
|
|
1121
|
+
protocol, _ = split_protocol(part_or_url)
|
|
1122
|
+
if start0 is None and end0 is None:
|
|
1123
|
+
return self.fss[protocol]._open(
|
|
1124
|
+
part_or_url,
|
|
1125
|
+
mode,
|
|
1126
|
+
block_size=block_size,
|
|
1127
|
+
cache_options=cache_options,
|
|
1128
|
+
**kwargs,
|
|
1129
|
+
)
|
|
1130
|
+
|
|
1131
|
+
return ReferenceFile(
|
|
1132
|
+
self,
|
|
1133
|
+
path,
|
|
1134
|
+
mode,
|
|
1135
|
+
block_size=block_size,
|
|
1136
|
+
cache_options=cache_options,
|
|
1137
|
+
**kwargs,
|
|
1138
|
+
)
|
|
1105
1139
|
|
|
1106
1140
|
def ls(self, path, detail=True, **kwargs):
|
|
1141
|
+
logger.debug("list %s", path)
|
|
1107
1142
|
path = self._strip_protocol(path)
|
|
1108
1143
|
if isinstance(self.references, LazyReferenceMapper):
|
|
1109
1144
|
try:
|
|
@@ -1214,3 +1249,58 @@ class ReferenceFileSystem(AsyncFileSystem):
|
|
|
1214
1249
|
out[k] = v
|
|
1215
1250
|
with fsspec.open(url, "wb", **storage_options) as f:
|
|
1216
1251
|
f.write(json.dumps({"version": 1, "refs": out}).encode())
|
|
1252
|
+
|
|
1253
|
+
|
|
1254
|
+
class ReferenceFile(AbstractBufferedFile):
|
|
1255
|
+
def __init__(
|
|
1256
|
+
self,
|
|
1257
|
+
fs,
|
|
1258
|
+
path,
|
|
1259
|
+
mode="rb",
|
|
1260
|
+
block_size="default",
|
|
1261
|
+
autocommit=True,
|
|
1262
|
+
cache_type="readahead",
|
|
1263
|
+
cache_options=None,
|
|
1264
|
+
size=None,
|
|
1265
|
+
**kwargs,
|
|
1266
|
+
):
|
|
1267
|
+
super().__init__(
|
|
1268
|
+
fs,
|
|
1269
|
+
path,
|
|
1270
|
+
mode=mode,
|
|
1271
|
+
block_size=block_size,
|
|
1272
|
+
autocommit=autocommit,
|
|
1273
|
+
size=size,
|
|
1274
|
+
cache_type=cache_type,
|
|
1275
|
+
cache_options=cache_options,
|
|
1276
|
+
**kwargs,
|
|
1277
|
+
)
|
|
1278
|
+
part_or_url, self.start, self.end = self.fs._cat_common(self.path)
|
|
1279
|
+
protocol, _ = split_protocol(part_or_url)
|
|
1280
|
+
self.src_fs = self.fs.fss[protocol]
|
|
1281
|
+
self.src_path = part_or_url
|
|
1282
|
+
self._f = None
|
|
1283
|
+
|
|
1284
|
+
@property
|
|
1285
|
+
def f(self):
|
|
1286
|
+
if self._f is None or self._f.closed:
|
|
1287
|
+
self._f = self.src_fs._open(
|
|
1288
|
+
self.src_path,
|
|
1289
|
+
mode=self.mode,
|
|
1290
|
+
block_size=self.blocksize,
|
|
1291
|
+
autocommit=self.autocommit,
|
|
1292
|
+
cache_type="none",
|
|
1293
|
+
**self.kwargs,
|
|
1294
|
+
)
|
|
1295
|
+
return self._f
|
|
1296
|
+
|
|
1297
|
+
def close(self):
|
|
1298
|
+
if self._f is not None:
|
|
1299
|
+
self._f.close()
|
|
1300
|
+
return super().close()
|
|
1301
|
+
|
|
1302
|
+
def _fetch_range(self, start, end):
|
|
1303
|
+
start = start + self.start
|
|
1304
|
+
end = min(end + self.start, self.end)
|
|
1305
|
+
self.f.seek(start)
|
|
1306
|
+
return self.f.read(end - start)
|
fsspec/registry.py
CHANGED
|
@@ -218,9 +218,9 @@ known_implementations = {
|
|
|
218
218
|
"zip": {"class": "fsspec.implementations.zip.ZipFileSystem"},
|
|
219
219
|
}
|
|
220
220
|
|
|
221
|
-
assert list(known_implementations) == sorted(
|
|
222
|
-
|
|
223
|
-
)
|
|
221
|
+
assert list(known_implementations) == sorted(known_implementations), (
|
|
222
|
+
"Not in alphabetical order"
|
|
223
|
+
)
|
|
224
224
|
|
|
225
225
|
|
|
226
226
|
def get_filesystem_class(protocol):
|
fsspec/spec.py
CHANGED
|
@@ -382,7 +382,7 @@ class AbstractFileSystem(metaclass=_Cached):
|
|
|
382
382
|
pass
|
|
383
383
|
|
|
384
384
|
def walk(self, path, maxdepth=None, topdown=True, on_error="omit", **kwargs):
|
|
385
|
-
"""Return all files
|
|
385
|
+
"""Return all files under the given path.
|
|
386
386
|
|
|
387
387
|
List all files, recursing into subdirectories; output is iterator-style,
|
|
388
388
|
like ``os.walk()``. For a simple list of files, ``find()`` is available.
|
|
@@ -2131,7 +2131,7 @@ class AbstractBufferedFile(io.IOBase):
|
|
|
2131
2131
|
return b"".join(out)
|
|
2132
2132
|
|
|
2133
2133
|
def readline(self):
|
|
2134
|
-
"""Read until first occurrence of newline character
|
|
2134
|
+
"""Read until and including the first occurrence of newline character
|
|
2135
2135
|
|
|
2136
2136
|
Note that, because of character encoding, this is not necessarily a
|
|
2137
2137
|
true line ending.
|
|
@@ -2148,7 +2148,7 @@ class AbstractBufferedFile(io.IOBase):
|
|
|
2148
2148
|
return self
|
|
2149
2149
|
|
|
2150
2150
|
def readlines(self):
|
|
2151
|
-
"""Return all data, split by the newline character"""
|
|
2151
|
+
"""Return all data, split by the newline character, including the newline character"""
|
|
2152
2152
|
data = self.read()
|
|
2153
2153
|
lines = data.split(b"\n")
|
|
2154
2154
|
out = [l + b"\n" for l in lines[:-1]]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: fsspec
|
|
3
|
-
Version:
|
|
3
|
+
Version: 2025.2.0
|
|
4
4
|
Summary: File-system specification
|
|
5
5
|
Project-URL: Changelog, https://filesystem-spec.readthedocs.io/en/latest/changelog.html
|
|
6
6
|
Project-URL: Documentation, https://filesystem-spec.readthedocs.io/en/latest/
|
|
@@ -131,7 +131,6 @@ Requires-Dist: pytest-rerunfailures; extra == 'test'
|
|
|
131
131
|
Requires-Dist: requests; extra == 'test'
|
|
132
132
|
Provides-Extra: test-downstream
|
|
133
133
|
Requires-Dist: aiobotocore<3.0.0,>=2.5.4; extra == 'test-downstream'
|
|
134
|
-
Requires-Dist: dask-expr; extra == 'test-downstream'
|
|
135
134
|
Requires-Dist: dask[dataframe,test]; extra == 'test-downstream'
|
|
136
135
|
Requires-Dist: moto[server]<5,>4; extra == 'test-downstream'
|
|
137
136
|
Requires-Dist: pytest-timeout; extra == 'test-downstream'
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
fsspec/__init__.py,sha256=l9MJaNNV2d4wKpCtMvXDr55n92DkdrAayGy3F9ICjzk,1998
|
|
2
|
-
fsspec/_version.py,sha256=
|
|
3
|
-
fsspec/archive.py,sha256=
|
|
2
|
+
fsspec/_version.py,sha256=IE7d_vZlkju9WTb8xdQYMiqPyQOYnfC9HN9w8nHfkrY,417
|
|
3
|
+
fsspec/archive.py,sha256=vM6t_lgV6lBWbBYwpm3S4ofBQFQxUPr5KkDQrrQcQro,2411
|
|
4
4
|
fsspec/asyn.py,sha256=rsnCsFUmBZmKJqg9m-IDWInoQtE4wV0rGDZEXZwuU3c,36500
|
|
5
5
|
fsspec/caching.py,sha256=oHVy9zpy4Oqk5f1t3-Q31bbw0tsmfddGGKLJs__OdKA,32790
|
|
6
6
|
fsspec/callbacks.py,sha256=BDIwLzK6rr_0V5ch557fSzsivCElpdqhXr5dZ9Te-EE,9210
|
|
@@ -16,19 +16,19 @@ fsspec/gui.py,sha256=xBnHL2-r0LVwhDAtnHoPpXts7jd4Z32peawCJiI-7lI,13975
|
|
|
16
16
|
fsspec/json.py,sha256=65sQ0Y7mTj33u_Y4IId5up4abQ3bAel4E4QzbKMiQSg,3826
|
|
17
17
|
fsspec/mapping.py,sha256=m2ndB_gtRBXYmNJg0Ie1-BVR75TFleHmIQBzC-yWhjU,8343
|
|
18
18
|
fsspec/parquet.py,sha256=6ibAmG527L5JNFS0VO8BDNlxHdA3bVYqdByeiFgpUVM,19448
|
|
19
|
-
fsspec/registry.py,sha256=
|
|
20
|
-
fsspec/spec.py,sha256=
|
|
19
|
+
fsspec/registry.py,sha256=QFyMiUV6fnksETJuapNplf6YjkNRIdHSOyd95IqPZe8,11473
|
|
20
|
+
fsspec/spec.py,sha256=l7ZEbgLsnrFuS-yrGl9re6ia1Yts1_10RqGV_mT-5P8,76032
|
|
21
21
|
fsspec/transaction.py,sha256=xliRG6U2Zf3khG4xcw9WiB-yAoqJSHEGK_VjHOdtgo0,2398
|
|
22
22
|
fsspec/utils.py,sha256=A11t25RnpiQ30RO6xeR0Qqlu3fGj8bnc40jg08tlYSI,22980
|
|
23
23
|
fsspec/implementations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
24
24
|
fsspec/implementations/arrow.py,sha256=721Dikne_lV_0tlgk9jyKmHL6W-5MT0h2LKGvOYQTPI,8623
|
|
25
|
-
fsspec/implementations/asyn_wrapper.py,sha256=
|
|
25
|
+
fsspec/implementations/asyn_wrapper.py,sha256=gmLy2voDAH9KRxhvd24UDPiOqX_NCK-3JY9rMX7R6Is,2935
|
|
26
26
|
fsspec/implementations/cache_mapper.py,sha256=W4wlxyPxZbSp9ItJ0pYRVBMh6bw9eFypgP6kUYuuiI4,2421
|
|
27
27
|
fsspec/implementations/cache_metadata.py,sha256=pcOJYcBQY5OaC7Yhw0F3wjg08QLYApGmoISCrbs59ks,8511
|
|
28
28
|
fsspec/implementations/cached.py,sha256=KA6c4jqrGeeg8WNPLsh8FkL3KeRAQtGLzKw18vSF1CI,32820
|
|
29
29
|
fsspec/implementations/dask.py,sha256=CXZbJzIVOhKV8ILcxuy3bTvcacCueAbyQxmvAkbPkrk,4466
|
|
30
30
|
fsspec/implementations/data.py,sha256=LDLczxRh8h7x39Zjrd-GgzdQHr78yYxDlrv2C9Uxb5E,1658
|
|
31
|
-
fsspec/implementations/dbfs.py,sha256=
|
|
31
|
+
fsspec/implementations/dbfs.py,sha256=XwpotuS_ncz3XK1dkUteww9GnTja7HoY91c0m4GUfwI,15092
|
|
32
32
|
fsspec/implementations/dirfs.py,sha256=ymakitNNQ07tW76EShyw3rC9RvIDHl4gtuOhE_h1vUg,12032
|
|
33
33
|
fsspec/implementations/ftp.py,sha256=sorsczLp_2J3ukONsbZY-11sRZP6H5a3V7XXf6o6ip0,11936
|
|
34
34
|
fsspec/implementations/git.py,sha256=4SElW9U5d3k3_ITlvUAx59Yk7XLNRTqkGa2C3hCUkWM,3754
|
|
@@ -38,7 +38,7 @@ fsspec/implementations/jupyter.py,sha256=B2uj7OEm7yIk-vRSsO37_ND0t0EBvn4B-Su43ib
|
|
|
38
38
|
fsspec/implementations/libarchive.py,sha256=5_I2DiLXwQ1JC8x-K7jXu-tBwhO9dj7tFLnb0bTnVMQ,7102
|
|
39
39
|
fsspec/implementations/local.py,sha256=YvR9b2MndSQIHszAMUkFvN65eWVbIfoGJJjAeS43ZS4,15259
|
|
40
40
|
fsspec/implementations/memory.py,sha256=cLNrK9wk97sl4Tre9uVDXWj6mEHvvVVIgaVgNA5KVIg,10527
|
|
41
|
-
fsspec/implementations/reference.py,sha256=
|
|
41
|
+
fsspec/implementations/reference.py,sha256=1VbyjAxq_8xHSQo2UV4ohuuoSAreB3OY4vjK05DnHsY,48646
|
|
42
42
|
fsspec/implementations/sftp.py,sha256=fMY9XZcmpjszQ2tCqO_TPaJesaeD_Dv7ptYzgUPGoO0,5631
|
|
43
43
|
fsspec/implementations/smb.py,sha256=5fhu8h06nOLBPh2c48aT7WBRqh9cEcbIwtyu06wTjec,15236
|
|
44
44
|
fsspec/implementations/tar.py,sha256=dam78Tp_CozybNqCY2JYgGBS3Uc9FuJUAT9oB0lolOs,4111
|
|
@@ -52,7 +52,7 @@ fsspec/tests/abstract/mv.py,sha256=k8eUEBIrRrGMsBY5OOaDXdGnQUKGwDIfQyduB6YD3Ns,1
|
|
|
52
52
|
fsspec/tests/abstract/open.py,sha256=Fi2PBPYLbRqysF8cFm0rwnB41kMdQVYjq8cGyDXp3BU,329
|
|
53
53
|
fsspec/tests/abstract/pipe.py,sha256=LFzIrLCB5GLXf9rzFKJmE8AdG7LQ_h4bJo70r8FLPqM,402
|
|
54
54
|
fsspec/tests/abstract/put.py,sha256=7aih17OKB_IZZh1Mkq1eBDIjobhtMQmI8x-Pw-S_aZk,21201
|
|
55
|
-
fsspec-
|
|
56
|
-
fsspec-
|
|
57
|
-
fsspec-
|
|
58
|
-
fsspec-
|
|
55
|
+
fsspec-2025.2.0.dist-info/METADATA,sha256=GqrJBTBVJNmkbcEfTOamzq2KqYYbc9vRM0jpSWTZT04,11747
|
|
56
|
+
fsspec-2025.2.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
57
|
+
fsspec-2025.2.0.dist-info/licenses/LICENSE,sha256=LcNUls5TpzB5FcAIqESq1T53K0mzTN0ARFBnaRQH7JQ,1513
|
|
58
|
+
fsspec-2025.2.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|