fsspec 2023.6.0__py3-none-any.whl → 2023.9.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fsspec/_version.py +3 -3
- fsspec/asyn.py +154 -92
- fsspec/caching.py +1 -1
- fsspec/compression.py +7 -2
- fsspec/core.py +16 -8
- fsspec/generic.py +111 -17
- fsspec/gui.py +4 -2
- fsspec/implementations/cache_mapper.py +80 -0
- fsspec/implementations/cache_metadata.py +232 -0
- fsspec/implementations/cached.py +74 -157
- fsspec/implementations/dirfs.py +3 -1
- fsspec/implementations/http.py +36 -19
- fsspec/implementations/local.py +4 -21
- fsspec/implementations/memory.py +8 -9
- fsspec/implementations/reference.py +8 -8
- fsspec/implementations/sftp.py +6 -2
- fsspec/implementations/smb.py +39 -23
- fsspec/mapping.py +8 -0
- fsspec/registry.py +22 -0
- fsspec/spec.py +164 -96
- fsspec/tests/abstract/__init__.py +147 -0
- fsspec/tests/abstract/common.py +175 -0
- fsspec/tests/abstract/copy.py +250 -56
- fsspec/tests/abstract/get.py +248 -38
- fsspec/tests/abstract/put.py +246 -66
- fsspec/utils.py +25 -8
- {fsspec-2023.6.0.dist-info → fsspec-2023.9.1.dist-info}/METADATA +1 -1
- fsspec-2023.9.1.dist-info/RECORD +54 -0
- fsspec-2023.6.0.dist-info/RECORD +0 -51
- {fsspec-2023.6.0.dist-info → fsspec-2023.9.1.dist-info}/LICENSE +0 -0
- {fsspec-2023.6.0.dist-info → fsspec-2023.9.1.dist-info}/WHEEL +0 -0
- {fsspec-2023.6.0.dist-info → fsspec-2023.9.1.dist-info}/top_level.txt +0 -0
|
@@ -150,7 +150,7 @@ class LazyReferenceMapper(collections.abc.MutableMapping):
|
|
|
150
150
|
"""List top-level directories"""
|
|
151
151
|
if self.dirs is None:
|
|
152
152
|
dirs = [p.split("/", 1)[0] for p in self.zmetadata]
|
|
153
|
-
self.dirs =
|
|
153
|
+
self.dirs = {p for p in dirs if p and not p.startswith(".")}
|
|
154
154
|
listing = self.dirs
|
|
155
155
|
if basename:
|
|
156
156
|
listing = [os.path.basename(path) for path in listing]
|
|
@@ -381,17 +381,17 @@ class LazyReferenceMapper(collections.abc.MutableMapping):
|
|
|
381
381
|
raws[j] = kerchunk.df._proc_raw(data)
|
|
382
382
|
# TODO: only save needed columns
|
|
383
383
|
df = pd.DataFrame(
|
|
384
|
-
|
|
385
|
-
path
|
|
386
|
-
offset
|
|
387
|
-
size
|
|
388
|
-
raw
|
|
389
|
-
|
|
384
|
+
{
|
|
385
|
+
"path": paths,
|
|
386
|
+
"offset": offsets,
|
|
387
|
+
"size": sizes,
|
|
388
|
+
"raw": raws,
|
|
389
|
+
},
|
|
390
390
|
copy=False,
|
|
391
391
|
)
|
|
392
392
|
if df.path.count() / (df.path.nunique() or 1) > self.cat_thresh:
|
|
393
393
|
df["path"] = df["path"].astype("category")
|
|
394
|
-
object_encoding =
|
|
394
|
+
object_encoding = {"raw": "bytes", "path": "utf8"}
|
|
395
395
|
has_nulls = ["path", "raw"]
|
|
396
396
|
|
|
397
397
|
self.fs.mkdirs(f"{base_url or self.out_root}/{field}", exist_ok=True)
|
fsspec/implementations/sftp.py
CHANGED
|
@@ -110,8 +110,12 @@ class SFTPFileSystem(AbstractFileSystem):
|
|
|
110
110
|
"type": t,
|
|
111
111
|
"uid": stat.st_uid,
|
|
112
112
|
"gid": stat.st_gid,
|
|
113
|
-
"time": datetime.datetime.
|
|
114
|
-
|
|
113
|
+
"time": datetime.datetime.fromtimestamp(
|
|
114
|
+
stat.st_atime, tz=datetime.timezone.utc
|
|
115
|
+
),
|
|
116
|
+
"mtime": datetime.datetime.fromtimestamp(
|
|
117
|
+
stat.st_mtime, tz=datetime.timezone.utc
|
|
118
|
+
),
|
|
115
119
|
}
|
|
116
120
|
if parent_path:
|
|
117
121
|
out["name"] = "/".join([parent_path.rstrip("/"), stat.filename])
|
fsspec/implementations/smb.py
CHANGED
|
@@ -81,7 +81,7 @@ class SMBFileSystem(AbstractFileSystem):
|
|
|
81
81
|
----------
|
|
82
82
|
host: str
|
|
83
83
|
The remote server name/ip to connect to
|
|
84
|
-
port: int
|
|
84
|
+
port: int or None
|
|
85
85
|
Port to connect with. Usually 445, sometimes 139.
|
|
86
86
|
username: str or None
|
|
87
87
|
Username to connect with. Required if Kerberos auth is not being used.
|
|
@@ -114,12 +114,16 @@ class SMBFileSystem(AbstractFileSystem):
|
|
|
114
114
|
self.share_access = share_access
|
|
115
115
|
self._connect()
|
|
116
116
|
|
|
117
|
+
@property
|
|
118
|
+
def _port(self):
|
|
119
|
+
return 445 if self.port is None else self.port
|
|
120
|
+
|
|
117
121
|
def _connect(self):
|
|
118
122
|
smbclient.register_session(
|
|
119
123
|
self.host,
|
|
120
124
|
username=self.username,
|
|
121
125
|
password=self.password,
|
|
122
|
-
port=
|
|
126
|
+
port=self._port,
|
|
123
127
|
encrypt=self.encrypt,
|
|
124
128
|
connection_timeout=self.timeout,
|
|
125
129
|
)
|
|
@@ -139,23 +143,23 @@ class SMBFileSystem(AbstractFileSystem):
|
|
|
139
143
|
def mkdir(self, path, create_parents=True, **kwargs):
|
|
140
144
|
wpath = _as_unc_path(self.host, path)
|
|
141
145
|
if create_parents:
|
|
142
|
-
smbclient.makedirs(wpath, exist_ok=False, **kwargs)
|
|
146
|
+
smbclient.makedirs(wpath, exist_ok=False, port=self._port, **kwargs)
|
|
143
147
|
else:
|
|
144
|
-
smbclient.mkdir(wpath, **kwargs)
|
|
148
|
+
smbclient.mkdir(wpath, port=self._port, **kwargs)
|
|
145
149
|
|
|
146
150
|
def makedirs(self, path, exist_ok=False):
|
|
147
151
|
if _share_has_path(path):
|
|
148
152
|
wpath = _as_unc_path(self.host, path)
|
|
149
|
-
smbclient.makedirs(wpath, exist_ok=exist_ok)
|
|
153
|
+
smbclient.makedirs(wpath, exist_ok=exist_ok, port=self._port)
|
|
150
154
|
|
|
151
155
|
def rmdir(self, path):
|
|
152
156
|
if _share_has_path(path):
|
|
153
157
|
wpath = _as_unc_path(self.host, path)
|
|
154
|
-
smbclient.rmdir(wpath)
|
|
158
|
+
smbclient.rmdir(wpath, port=self._port)
|
|
155
159
|
|
|
156
160
|
def info(self, path, **kwargs):
|
|
157
161
|
wpath = _as_unc_path(self.host, path)
|
|
158
|
-
stats = smbclient.stat(wpath, **kwargs)
|
|
162
|
+
stats = smbclient.stat(wpath, port=self._port, **kwargs)
|
|
159
163
|
if S_ISDIR(stats.st_mode):
|
|
160
164
|
stype = "directory"
|
|
161
165
|
elif S_ISLNK(stats.st_mode):
|
|
@@ -176,18 +180,18 @@ class SMBFileSystem(AbstractFileSystem):
|
|
|
176
180
|
def created(self, path):
|
|
177
181
|
"""Return the created timestamp of a file as a datetime.datetime"""
|
|
178
182
|
wpath = _as_unc_path(self.host, path)
|
|
179
|
-
stats = smbclient.stat(wpath)
|
|
180
|
-
return datetime.datetime.
|
|
183
|
+
stats = smbclient.stat(wpath, port=self._port)
|
|
184
|
+
return datetime.datetime.fromtimestamp(stats.st_ctime, tz=datetime.timezone.utc)
|
|
181
185
|
|
|
182
186
|
def modified(self, path):
|
|
183
187
|
"""Return the modified timestamp of a file as a datetime.datetime"""
|
|
184
188
|
wpath = _as_unc_path(self.host, path)
|
|
185
|
-
stats = smbclient.stat(wpath)
|
|
186
|
-
return datetime.datetime.
|
|
189
|
+
stats = smbclient.stat(wpath, port=self._port)
|
|
190
|
+
return datetime.datetime.fromtimestamp(stats.st_mtime, tz=datetime.timezone.utc)
|
|
187
191
|
|
|
188
192
|
def ls(self, path, detail=True, **kwargs):
|
|
189
193
|
unc = _as_unc_path(self.host, path)
|
|
190
|
-
listed = smbclient.listdir(unc, **kwargs)
|
|
194
|
+
listed = smbclient.listdir(unc, port=self._port, **kwargs)
|
|
191
195
|
dirs = ["/".join([path.rstrip("/"), p]) for p in listed]
|
|
192
196
|
if detail:
|
|
193
197
|
dirs = [self.info(d) for d in dirs]
|
|
@@ -217,30 +221,37 @@ class SMBFileSystem(AbstractFileSystem):
|
|
|
217
221
|
share_access = kwargs.pop("share_access", self.share_access)
|
|
218
222
|
if "w" in mode and autocommit is False:
|
|
219
223
|
temp = _as_temp_path(self.host, path, self.temppath)
|
|
220
|
-
return SMBFileOpener(
|
|
224
|
+
return SMBFileOpener(
|
|
225
|
+
wpath, temp, mode, port=self._port, block_size=bls, **kwargs
|
|
226
|
+
)
|
|
221
227
|
return smbclient.open_file(
|
|
222
|
-
wpath,
|
|
228
|
+
wpath,
|
|
229
|
+
mode,
|
|
230
|
+
buffering=bls,
|
|
231
|
+
share_access=share_access,
|
|
232
|
+
port=self._port,
|
|
233
|
+
**kwargs,
|
|
223
234
|
)
|
|
224
235
|
|
|
225
236
|
def copy(self, path1, path2, **kwargs):
|
|
226
237
|
"""Copy within two locations in the same filesystem"""
|
|
227
238
|
wpath1 = _as_unc_path(self.host, path1)
|
|
228
239
|
wpath2 = _as_unc_path(self.host, path2)
|
|
229
|
-
smbclient.copyfile(wpath1, wpath2, **kwargs)
|
|
240
|
+
smbclient.copyfile(wpath1, wpath2, port=self._port, **kwargs)
|
|
230
241
|
|
|
231
242
|
def _rm(self, path):
|
|
232
243
|
if _share_has_path(path):
|
|
233
244
|
wpath = _as_unc_path(self.host, path)
|
|
234
|
-
stats = smbclient.stat(wpath)
|
|
245
|
+
stats = smbclient.stat(wpath, port=self._port)
|
|
235
246
|
if S_ISDIR(stats.st_mode):
|
|
236
|
-
smbclient.rmdir(wpath)
|
|
247
|
+
smbclient.rmdir(wpath, port=self._port)
|
|
237
248
|
else:
|
|
238
|
-
smbclient.remove(wpath)
|
|
249
|
+
smbclient.remove(wpath, port=self._port)
|
|
239
250
|
|
|
240
251
|
def mv(self, path1, path2, **kwargs):
|
|
241
252
|
wpath1 = _as_unc_path(self.host, path1)
|
|
242
253
|
wpath2 = _as_unc_path(self.host, path2)
|
|
243
|
-
smbclient.rename(wpath1, wpath2, **kwargs)
|
|
254
|
+
smbclient.rename(wpath1, wpath2, port=self._port, **kwargs)
|
|
244
255
|
|
|
245
256
|
|
|
246
257
|
def _as_unc_path(host, path):
|
|
@@ -266,7 +277,7 @@ def _share_has_path(path):
|
|
|
266
277
|
class SMBFileOpener(object):
|
|
267
278
|
"""writes to remote temporary file, move on commit"""
|
|
268
279
|
|
|
269
|
-
def __init__(self, path, temp, mode, block_size=-1, **kwargs):
|
|
280
|
+
def __init__(self, path, temp, mode, port=445, block_size=-1, **kwargs):
|
|
270
281
|
self.path = path
|
|
271
282
|
self.temp = temp
|
|
272
283
|
self.mode = mode
|
|
@@ -274,22 +285,27 @@ class SMBFileOpener(object):
|
|
|
274
285
|
self.kwargs = kwargs
|
|
275
286
|
self.smbfile = None
|
|
276
287
|
self._incontext = False
|
|
288
|
+
self.port = port
|
|
277
289
|
self._open()
|
|
278
290
|
|
|
279
291
|
def _open(self):
|
|
280
292
|
if self.smbfile is None or self.smbfile.closed:
|
|
281
293
|
self.smbfile = smbclient.open_file(
|
|
282
|
-
self.temp,
|
|
294
|
+
self.temp,
|
|
295
|
+
self.mode,
|
|
296
|
+
port=self.port,
|
|
297
|
+
buffering=self.block_size,
|
|
298
|
+
**self.kwargs,
|
|
283
299
|
)
|
|
284
300
|
|
|
285
301
|
def commit(self):
|
|
286
302
|
"""Move temp file to definitive on success."""
|
|
287
303
|
# TODO: use transaction support in SMB protocol
|
|
288
|
-
smbclient.replace(self.temp, self.path)
|
|
304
|
+
smbclient.replace(self.temp, self.path, port=self.port)
|
|
289
305
|
|
|
290
306
|
def discard(self):
|
|
291
307
|
"""Remove the temp file on failure."""
|
|
292
|
-
smbclient.remove(self.temp)
|
|
308
|
+
smbclient.remove(self.temp, port=self.port)
|
|
293
309
|
|
|
294
310
|
def __fspath__(self):
|
|
295
311
|
return self.path
|
fsspec/mapping.py
CHANGED
|
@@ -2,6 +2,7 @@ import array
|
|
|
2
2
|
import posixpath
|
|
3
3
|
import warnings
|
|
4
4
|
from collections.abc import MutableMapping
|
|
5
|
+
from functools import cached_property
|
|
5
6
|
|
|
6
7
|
from .core import url_to_fs
|
|
7
8
|
|
|
@@ -59,6 +60,13 @@ class FSMap(MutableMapping):
|
|
|
59
60
|
self.fs.touch(root + "/a")
|
|
60
61
|
self.fs.rm(root + "/a")
|
|
61
62
|
|
|
63
|
+
@cached_property
|
|
64
|
+
def dirfs(self):
|
|
65
|
+
"""dirfs instance that can be used with the same keys as the mapper"""
|
|
66
|
+
from .implementations.dirfs import DirFileSystem
|
|
67
|
+
|
|
68
|
+
return DirFileSystem(path=self._root_key_to_str, fs=self.fs)
|
|
69
|
+
|
|
62
70
|
def clear(self):
|
|
63
71
|
"""Remove all keys below root - empties out mapping"""
|
|
64
72
|
try:
|
fsspec/registry.py
CHANGED
|
@@ -119,6 +119,10 @@ known_implementations = {
|
|
|
119
119
|
"class": "ocifs.OCIFileSystem",
|
|
120
120
|
"err": "Install ocifs to access OCI Object Storage",
|
|
121
121
|
},
|
|
122
|
+
"ocilake": {
|
|
123
|
+
"class": "ocifs.OCIFileSystem",
|
|
124
|
+
"err": "Install ocifs to access OCI Data Lake",
|
|
125
|
+
},
|
|
122
126
|
"asynclocal": {
|
|
123
127
|
"class": "morefs.asyn_local.AsyncLocalFileSystem",
|
|
124
128
|
"err": "Install 'morefs[asynclocalfs]' to use AsyncLocalFileSystem",
|
|
@@ -200,6 +204,10 @@ known_implementations = {
|
|
|
200
204
|
"class": "boxfs.BoxFileSystem",
|
|
201
205
|
"err": "Please install boxfs to access BoxFileSystem",
|
|
202
206
|
},
|
|
207
|
+
"lakefs": {
|
|
208
|
+
"class": "lakefs_spec.LakeFSFileSystem",
|
|
209
|
+
"err": "Please install lakefs-spec to access LakeFSFileSystem",
|
|
210
|
+
},
|
|
203
211
|
}
|
|
204
212
|
|
|
205
213
|
|
|
@@ -233,6 +241,14 @@ def get_filesystem_class(protocol):
|
|
|
233
241
|
return cls
|
|
234
242
|
|
|
235
243
|
|
|
244
|
+
s3_msg = """Your installed version of s3fs is very old and known to cause
|
|
245
|
+
severe performance issues, see also https://github.com/dask/dask/issues/10276
|
|
246
|
+
|
|
247
|
+
To fix, you should specify a lower version bound on s3fs, or
|
|
248
|
+
update the current installation.
|
|
249
|
+
"""
|
|
250
|
+
|
|
251
|
+
|
|
236
252
|
def _import_class(cls, minv=None):
|
|
237
253
|
"""Take a string FQP and return the imported class or identifier
|
|
238
254
|
|
|
@@ -240,13 +256,19 @@ def _import_class(cls, minv=None):
|
|
|
240
256
|
"""
|
|
241
257
|
if ":" in cls:
|
|
242
258
|
mod, name = cls.rsplit(":", 1)
|
|
259
|
+
s3 = mod == "s3fs"
|
|
243
260
|
mod = importlib.import_module(mod)
|
|
261
|
+
if s3 and mod.__version__.split(".") < ["0", "5"]:
|
|
262
|
+
warnings.warn(s3_msg)
|
|
244
263
|
for part in name.split("."):
|
|
245
264
|
mod = getattr(mod, part)
|
|
246
265
|
return mod
|
|
247
266
|
else:
|
|
248
267
|
mod, name = cls.rsplit(".", 1)
|
|
268
|
+
s3 = mod == "s3fs"
|
|
249
269
|
mod = importlib.import_module(mod)
|
|
270
|
+
if s3 and mod.__version__.split(".") < ["0", "5"]:
|
|
271
|
+
warnings.warn(s3_msg)
|
|
250
272
|
return getattr(mod, name)
|
|
251
273
|
|
|
252
274
|
|