fsspec 2024.6.0__tar.gz → 2024.9.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {fsspec-2024.6.0 → fsspec-2024.9.0}/.github/workflows/main.yaml +6 -13
- {fsspec-2024.6.0 → fsspec-2024.9.0}/PKG-INFO +1 -2
- {fsspec-2024.6.0 → fsspec-2024.9.0}/README.md +0 -1
- {fsspec-2024.6.0 → fsspec-2024.9.0}/ci/environment-downstream.yml +1 -1
- {fsspec-2024.6.0 → fsspec-2024.9.0}/ci/environment-friends.yml +1 -1
- {fsspec-2024.6.0 → fsspec-2024.9.0}/ci/environment-py38.yml +1 -0
- {fsspec-2024.6.0 → fsspec-2024.9.0}/ci/environment-win.yml +3 -1
- {fsspec-2024.6.0 → fsspec-2024.9.0}/docs/source/api.rst +2 -0
- {fsspec-2024.6.0 → fsspec-2024.9.0}/docs/source/changelog.rst +36 -0
- {fsspec-2024.6.0 → fsspec-2024.9.0}/fsspec/_version.py +2 -2
- {fsspec-2024.6.0 → fsspec-2024.9.0}/fsspec/asyn.py +1 -1
- {fsspec-2024.6.0 → fsspec-2024.9.0}/fsspec/core.py +1 -1
- {fsspec-2024.6.0 → fsspec-2024.9.0}/fsspec/fuse.py +4 -4
- {fsspec-2024.6.0 → fsspec-2024.9.0}/fsspec/generic.py +3 -0
- {fsspec-2024.6.0 → fsspec-2024.9.0}/fsspec/gui.py +4 -2
- {fsspec-2024.6.0 → fsspec-2024.9.0}/fsspec/implementations/arrow.py +1 -1
- {fsspec-2024.6.0 → fsspec-2024.9.0}/fsspec/implementations/cached.py +5 -3
- {fsspec-2024.6.0 → fsspec-2024.9.0}/fsspec/implementations/dbfs.py +21 -21
- {fsspec-2024.6.0 → fsspec-2024.9.0}/fsspec/implementations/dirfs.py +9 -1
- {fsspec-2024.6.0 → fsspec-2024.9.0}/fsspec/implementations/ftp.py +23 -13
- {fsspec-2024.6.0 → fsspec-2024.9.0}/fsspec/implementations/http.py +5 -4
- {fsspec-2024.6.0 → fsspec-2024.9.0}/fsspec/implementations/local.py +11 -7
- {fsspec-2024.6.0 → fsspec-2024.9.0}/fsspec/implementations/memory.py +6 -6
- {fsspec-2024.6.0 → fsspec-2024.9.0}/fsspec/implementations/reference.py +50 -43
- {fsspec-2024.6.0 → fsspec-2024.9.0}/fsspec/implementations/smb.py +78 -5
- {fsspec-2024.6.0 → fsspec-2024.9.0}/fsspec/implementations/webhdfs.py +2 -2
- {fsspec-2024.6.0 → fsspec-2024.9.0}/fsspec/implementations/zip.py +42 -0
- {fsspec-2024.6.0 → fsspec-2024.9.0}/fsspec/json.py +42 -2
- {fsspec-2024.6.0 → fsspec-2024.9.0}/fsspec/mapping.py +4 -4
- {fsspec-2024.6.0 → fsspec-2024.9.0}/fsspec/spec.py +51 -7
- {fsspec-2024.6.0 → fsspec-2024.9.0}/fsspec/tests/abstract/__init__.py +3 -3
- {fsspec-2024.6.0 → fsspec-2024.9.0}/fsspec/utils.py +1 -4
- {fsspec-2024.6.0 → fsspec-2024.9.0}/install_s3fs.sh +1 -1
- {fsspec-2024.6.0 → fsspec-2024.9.0}/pyproject.toml +6 -4
- {fsspec-2024.6.0 → fsspec-2024.9.0}/.codespellrc +0 -0
- {fsspec-2024.6.0 → fsspec-2024.9.0}/.coveragerc +0 -0
- {fsspec-2024.6.0 → fsspec-2024.9.0}/.gitattributes +0 -0
- {fsspec-2024.6.0 → fsspec-2024.9.0}/.github/workflows/codespell.yml +0 -0
- {fsspec-2024.6.0 → fsspec-2024.9.0}/.github/workflows/pypipublish.yaml +0 -0
- {fsspec-2024.6.0 → fsspec-2024.9.0}/.gitignore +0 -0
- {fsspec-2024.6.0 → fsspec-2024.9.0}/.pre-commit-config.yaml +0 -0
- {fsspec-2024.6.0 → fsspec-2024.9.0}/LICENSE +0 -0
- {fsspec-2024.6.0 → fsspec-2024.9.0}/ci/environment-typecheck.yml +0 -0
- {fsspec-2024.6.0 → fsspec-2024.9.0}/docs/Makefile +0 -0
- {fsspec-2024.6.0 → fsspec-2024.9.0}/docs/README.md +0 -0
- {fsspec-2024.6.0 → fsspec-2024.9.0}/docs/environment.yml +0 -0
- {fsspec-2024.6.0 → fsspec-2024.9.0}/docs/make.bat +0 -0
- {fsspec-2024.6.0 → fsspec-2024.9.0}/docs/source/_static/custom.css +0 -0
- {fsspec-2024.6.0 → fsspec-2024.9.0}/docs/source/async.rst +0 -0
- {fsspec-2024.6.0 → fsspec-2024.9.0}/docs/source/conf.py +0 -0
- {fsspec-2024.6.0 → fsspec-2024.9.0}/docs/source/copying.rst +0 -0
- {fsspec-2024.6.0 → fsspec-2024.9.0}/docs/source/developer.rst +0 -0
- {fsspec-2024.6.0 → fsspec-2024.9.0}/docs/source/features.rst +0 -0
- {fsspec-2024.6.0 → fsspec-2024.9.0}/docs/source/img/gui.png +0 -0
- {fsspec-2024.6.0 → fsspec-2024.9.0}/docs/source/index.rst +0 -0
- {fsspec-2024.6.0 → fsspec-2024.9.0}/docs/source/intro.rst +0 -0
- {fsspec-2024.6.0 → fsspec-2024.9.0}/docs/source/usage.rst +0 -0
- {fsspec-2024.6.0 → fsspec-2024.9.0}/fsspec/__init__.py +0 -0
- {fsspec-2024.6.0 → fsspec-2024.9.0}/fsspec/archive.py +0 -0
- {fsspec-2024.6.0 → fsspec-2024.9.0}/fsspec/caching.py +0 -0
- {fsspec-2024.6.0 → fsspec-2024.9.0}/fsspec/callbacks.py +0 -0
- {fsspec-2024.6.0 → fsspec-2024.9.0}/fsspec/compression.py +0 -0
- {fsspec-2024.6.0 → fsspec-2024.9.0}/fsspec/config.py +0 -0
- {fsspec-2024.6.0 → fsspec-2024.9.0}/fsspec/conftest.py +0 -0
- {fsspec-2024.6.0 → fsspec-2024.9.0}/fsspec/dircache.py +0 -0
- {fsspec-2024.6.0 → fsspec-2024.9.0}/fsspec/exceptions.py +0 -0
- {fsspec-2024.6.0 → fsspec-2024.9.0}/fsspec/implementations/__init__.py +0 -0
- {fsspec-2024.6.0 → fsspec-2024.9.0}/fsspec/implementations/cache_mapper.py +0 -0
- {fsspec-2024.6.0 → fsspec-2024.9.0}/fsspec/implementations/cache_metadata.py +0 -0
- {fsspec-2024.6.0 → fsspec-2024.9.0}/fsspec/implementations/dask.py +0 -0
- {fsspec-2024.6.0 → fsspec-2024.9.0}/fsspec/implementations/data.py +0 -0
- {fsspec-2024.6.0 → fsspec-2024.9.0}/fsspec/implementations/git.py +0 -0
- {fsspec-2024.6.0 → fsspec-2024.9.0}/fsspec/implementations/github.py +0 -0
- {fsspec-2024.6.0 → fsspec-2024.9.0}/fsspec/implementations/jupyter.py +0 -0
- {fsspec-2024.6.0 → fsspec-2024.9.0}/fsspec/implementations/libarchive.py +0 -0
- {fsspec-2024.6.0 → fsspec-2024.9.0}/fsspec/implementations/sftp.py +0 -0
- {fsspec-2024.6.0 → fsspec-2024.9.0}/fsspec/implementations/tar.py +0 -0
- {fsspec-2024.6.0 → fsspec-2024.9.0}/fsspec/parquet.py +0 -0
- {fsspec-2024.6.0 → fsspec-2024.9.0}/fsspec/registry.py +0 -0
- {fsspec-2024.6.0 → fsspec-2024.9.0}/fsspec/tests/abstract/common.py +0 -0
- {fsspec-2024.6.0 → fsspec-2024.9.0}/fsspec/tests/abstract/copy.py +0 -0
- {fsspec-2024.6.0 → fsspec-2024.9.0}/fsspec/tests/abstract/get.py +0 -0
- {fsspec-2024.6.0 → fsspec-2024.9.0}/fsspec/tests/abstract/mv.py +0 -0
- {fsspec-2024.6.0 → fsspec-2024.9.0}/fsspec/tests/abstract/put.py +0 -0
- {fsspec-2024.6.0 → fsspec-2024.9.0}/fsspec/transaction.py +0 -0
- {fsspec-2024.6.0 → fsspec-2024.9.0}/readthedocs.yml +0 -0
- {fsspec-2024.6.0 → fsspec-2024.9.0}/setup.cfg +0 -0
|
@@ -25,19 +25,15 @@ jobs:
|
|
|
25
25
|
fetch-depth: 0
|
|
26
26
|
|
|
27
27
|
- name: Setup conda
|
|
28
|
-
uses:
|
|
28
|
+
uses: conda-incubator/setup-miniconda@v3
|
|
29
29
|
with:
|
|
30
30
|
environment-file: ci/environment-py38.yml
|
|
31
|
-
|
|
32
|
-
python=${{ matrix.PY }}
|
|
31
|
+
python-version: ${{ matrix.PY }}
|
|
33
32
|
|
|
34
33
|
- name: Run Tests
|
|
35
34
|
shell: bash -l {0}
|
|
36
35
|
run: |
|
|
37
|
-
pip install s3fs
|
|
38
|
-
pip uninstall s3fs
|
|
39
36
|
pip install -e .[test_full]
|
|
40
|
-
pip install s3fs --no-deps
|
|
41
37
|
pytest -v
|
|
42
38
|
|
|
43
39
|
win:
|
|
@@ -54,17 +50,14 @@ jobs:
|
|
|
54
50
|
fetch-depth: 0
|
|
55
51
|
|
|
56
52
|
- name: Setup conda
|
|
57
|
-
uses:
|
|
53
|
+
uses: conda-incubator/setup-miniconda@v3
|
|
58
54
|
with:
|
|
59
55
|
environment-file: ci/environment-win.yml
|
|
60
56
|
|
|
61
57
|
- name: Run Tests
|
|
62
58
|
shell: bash -l {0}
|
|
63
59
|
run: |
|
|
64
|
-
pip install s3fs
|
|
65
|
-
pip uninstall s3fs
|
|
66
60
|
pip install -e .[test]
|
|
67
|
-
pip install s3fs --no-deps
|
|
68
61
|
pytest -v
|
|
69
62
|
|
|
70
63
|
lint:
|
|
@@ -84,7 +77,7 @@ jobs:
|
|
|
84
77
|
# uses: actions/checkout@v4
|
|
85
78
|
#
|
|
86
79
|
# - name: Setup conda
|
|
87
|
-
# uses:
|
|
80
|
+
# uses: conda-incubator/setup-miniconda@v3
|
|
88
81
|
# with:
|
|
89
82
|
# environment-file: ci/environment-typecheck.yml
|
|
90
83
|
#
|
|
@@ -104,7 +97,7 @@ jobs:
|
|
|
104
97
|
fetch-depth: 0
|
|
105
98
|
|
|
106
99
|
- name: Setup conda
|
|
107
|
-
uses:
|
|
100
|
+
uses: conda-incubator/setup-miniconda@v3
|
|
108
101
|
with:
|
|
109
102
|
environment-file: ci/environment-downstream.yml
|
|
110
103
|
|
|
@@ -145,7 +138,7 @@ jobs:
|
|
|
145
138
|
uses: actions/checkout@v4
|
|
146
139
|
|
|
147
140
|
- name: Setup conda
|
|
148
|
-
uses:
|
|
141
|
+
uses: conda-incubator/setup-miniconda@v3
|
|
149
142
|
with:
|
|
150
143
|
environment-file: ci/environment-friends.yml
|
|
151
144
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: fsspec
|
|
3
|
-
Version: 2024.
|
|
3
|
+
Version: 2024.9.0
|
|
4
4
|
Summary: File-system specification
|
|
5
5
|
Project-URL: Changelog, https://filesystem-spec.readthedocs.io/en/latest/changelog.html
|
|
6
6
|
Project-URL: Documentation, https://filesystem-spec.readthedocs.io/en/latest/
|
|
@@ -184,7 +184,6 @@ Description-Content-Type: text/markdown
|
|
|
184
184
|
[](https://anaconda.org/conda-forge/fsspec)
|
|
185
185
|

|
|
186
186
|
[](https://filesystem-spec.readthedocs.io/en/latest/?badge=latest)
|
|
187
|
-
[](https://piptrends.com/package/fsspec)
|
|
188
187
|
|
|
189
188
|
A specification for pythonic filesystems.
|
|
190
189
|
|
|
@@ -4,7 +4,6 @@
|
|
|
4
4
|
[](https://anaconda.org/conda-forge/fsspec)
|
|
5
5
|

|
|
6
6
|
[](https://filesystem-spec.readthedocs.io/en/latest/?badge=latest)
|
|
7
|
-
[](https://piptrends.com/package/fsspec)
|
|
8
7
|
|
|
9
8
|
A specification for pythonic filesystems.
|
|
10
9
|
|
|
@@ -226,6 +226,7 @@ Other Known Implementations
|
|
|
226
226
|
- `s3fs`_ for Amazon S3 and other compatible stores
|
|
227
227
|
- `wandbfs`_ to access Wandb run data (experimental)
|
|
228
228
|
- `webdav4`_ for WebDAV
|
|
229
|
+
- `xrootd`_ for xrootd, with protocol "root://"
|
|
229
230
|
|
|
230
231
|
.. _abfs: https://github.com/dask/adlfs
|
|
231
232
|
.. _adl: https://github.com/dask/adlfs
|
|
@@ -244,6 +245,7 @@ Other Known Implementations
|
|
|
244
245
|
.. _s3fs: https://s3fs.readthedocs.io/en/latest/
|
|
245
246
|
.. _wandbfs: https://github.com/jkulhanek/wandbfs
|
|
246
247
|
.. _webdav4: https://github.com/skshetry/webdav4
|
|
248
|
+
.. _xrootd: https://github.com/CoffeaTeam/fsspec-xrootd
|
|
247
249
|
|
|
248
250
|
.. _readbuffering:
|
|
249
251
|
|
|
@@ -1,6 +1,42 @@
|
|
|
1
1
|
Changelog
|
|
2
2
|
=========
|
|
3
3
|
|
|
4
|
+
2024.9.0
|
|
5
|
+
--------
|
|
6
|
+
|
|
7
|
+
Enhancements
|
|
8
|
+
|
|
9
|
+
- fewer stat calls in localFS (#1659)
|
|
10
|
+
- faster find in ZIP (#1664)
|
|
11
|
+
|
|
12
|
+
Fixes
|
|
13
|
+
|
|
14
|
+
- paths without "/" in dirFS (#1638)
|
|
15
|
+
- paths with "/" in FTS (#1643, 1644)
|
|
16
|
+
- ls in parquet-based nested reference sets, and append (#1645, 1657)
|
|
17
|
+
- exception handling for SMB (#1650)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
Other
|
|
21
|
+
|
|
22
|
+
- style (#1640, 1641, 1660)
|
|
23
|
+
- docs: xrootd (#1646)
|
|
24
|
+
- CI back on miniconda (#1658)
|
|
25
|
+
|
|
26
|
+
2024.6.1
|
|
27
|
+
--------
|
|
28
|
+
|
|
29
|
+
Fixes
|
|
30
|
+
|
|
31
|
+
- fix appending to non-dict reference sets (#1634)
|
|
32
|
+
- don't let generic edit info dicts (#1633)
|
|
33
|
+
- set https's loop before calling super (#1633)
|
|
34
|
+
- cached write file doesn't need to update it's size on close (#1633)
|
|
35
|
+
- fix JSON serialize for FSs with interior FSs (#1628, 1627)
|
|
36
|
+
- option to strip "password" when pickling (#1625)
|
|
37
|
+
- fix filecache write (#1622)
|
|
38
|
+
|
|
39
|
+
|
|
4
40
|
2024.6.0
|
|
5
41
|
--------
|
|
6
42
|
|
|
@@ -12,5 +12,5 @@ __version__: str
|
|
|
12
12
|
__version_tuple__: VERSION_TUPLE
|
|
13
13
|
version_tuple: VERSION_TUPLE
|
|
14
14
|
|
|
15
|
-
__version__ = version = '2024.
|
|
16
|
-
__version_tuple__ = version_tuple = (2024,
|
|
15
|
+
__version__ = version = '2024.9.0'
|
|
16
|
+
__version_tuple__ = version_tuple = (2024, 9, 0)
|
|
@@ -639,7 +639,7 @@ def get_fs_token_paths(
|
|
|
639
639
|
if isinstance(urlpath, (list, tuple, set)):
|
|
640
640
|
if not urlpath:
|
|
641
641
|
raise ValueError("empty urlpath sequence")
|
|
642
|
-
urlpath0 = stringify_path(
|
|
642
|
+
urlpath0 = stringify_path(next(iter(urlpath)))
|
|
643
643
|
else:
|
|
644
644
|
urlpath0 = stringify_path(urlpath)
|
|
645
645
|
storage_options = storage_options or {}
|
|
@@ -31,8 +31,8 @@ class FUSEr(Operations):
|
|
|
31
31
|
path = "".join([self.root, path.lstrip("/")]).rstrip("/")
|
|
32
32
|
try:
|
|
33
33
|
info = self.fs.info(path)
|
|
34
|
-
except FileNotFoundError:
|
|
35
|
-
raise FuseOSError(ENOENT)
|
|
34
|
+
except FileNotFoundError as exc:
|
|
35
|
+
raise FuseOSError(ENOENT) from exc
|
|
36
36
|
|
|
37
37
|
data = {"st_uid": info.get("uid", 1000), "st_gid": info.get("gid", 1000)}
|
|
38
38
|
perm = info.get("mode", 0o777)
|
|
@@ -119,8 +119,8 @@ class FUSEr(Operations):
|
|
|
119
119
|
fn = "".join([self.root, path.lstrip("/")])
|
|
120
120
|
try:
|
|
121
121
|
self.fs.rm(fn, False)
|
|
122
|
-
except (OSError, FileNotFoundError):
|
|
123
|
-
raise FuseOSError(EIO)
|
|
122
|
+
except (OSError, FileNotFoundError) as exc:
|
|
123
|
+
raise FuseOSError(EIO) from exc
|
|
124
124
|
|
|
125
125
|
def release(self, path, fh):
|
|
126
126
|
try:
|
|
@@ -197,6 +197,7 @@ class GenericFileSystem(AsyncFileSystem):
|
|
|
197
197
|
)
|
|
198
198
|
result = {}
|
|
199
199
|
for k, v in out.items():
|
|
200
|
+
v = v.copy() # don't corrupt target FS dircache
|
|
200
201
|
name = fs.unstrip_protocol(k)
|
|
201
202
|
v["name"] = name
|
|
202
203
|
result[name] = v
|
|
@@ -210,6 +211,7 @@ class GenericFileSystem(AsyncFileSystem):
|
|
|
210
211
|
out = await fs._info(url, **kwargs)
|
|
211
212
|
else:
|
|
212
213
|
out = fs.info(url, **kwargs)
|
|
214
|
+
out = out.copy() # don't edit originals
|
|
213
215
|
out["name"] = fs.unstrip_protocol(out["name"])
|
|
214
216
|
return out
|
|
215
217
|
|
|
@@ -224,6 +226,7 @@ class GenericFileSystem(AsyncFileSystem):
|
|
|
224
226
|
out = await fs._ls(url, detail=True, **kwargs)
|
|
225
227
|
else:
|
|
226
228
|
out = fs.ls(url, detail=True, **kwargs)
|
|
229
|
+
out = [o.copy() for o in out] # don't edit originals
|
|
227
230
|
for o in out:
|
|
228
231
|
o["name"] = fs.unstrip_protocol(o["name"])
|
|
229
232
|
if detail:
|
|
@@ -93,8 +93,10 @@ class SigSlot:
|
|
|
93
93
|
"""Display in a notebook or a server"""
|
|
94
94
|
try:
|
|
95
95
|
return self.panel._repr_mimebundle_(*args, **kwargs)
|
|
96
|
-
except (ValueError, AttributeError):
|
|
97
|
-
raise NotImplementedError(
|
|
96
|
+
except (ValueError, AttributeError) as exc:
|
|
97
|
+
raise NotImplementedError(
|
|
98
|
+
"Panel does not seem to be set up properly"
|
|
99
|
+
) from exc
|
|
98
100
|
|
|
99
101
|
def connect(self, signal, slot):
|
|
100
102
|
"""Associate call back with given event
|
|
@@ -128,7 +128,7 @@ class ArrowFSWrapper(AbstractFileSystem):
|
|
|
128
128
|
with self.open(tmp_fname, "wb") as rstream:
|
|
129
129
|
shutil.copyfileobj(lstream, rstream)
|
|
130
130
|
self.fs.move(tmp_fname, path2)
|
|
131
|
-
except BaseException:
|
|
131
|
+
except BaseException:
|
|
132
132
|
with suppress(FileNotFoundError):
|
|
133
133
|
self.fs.delete_file(tmp_fname)
|
|
134
134
|
raise
|
|
@@ -651,7 +651,8 @@ class WholeFileCacheFileSystem(CachingFileSystem):
|
|
|
651
651
|
def _open(self, path, mode="rb", **kwargs):
|
|
652
652
|
path = self._strip_protocol(path)
|
|
653
653
|
if "r" not in mode:
|
|
654
|
-
|
|
654
|
+
hash = self._mapper(path)
|
|
655
|
+
fn = os.path.join(self.storage[-1], hash)
|
|
655
656
|
user_specified_kwargs = {
|
|
656
657
|
k: v
|
|
657
658
|
for k, v in kwargs.items()
|
|
@@ -794,7 +795,8 @@ class SimpleCacheFileSystem(WholeFileCacheFileSystem):
|
|
|
794
795
|
if self._intrans:
|
|
795
796
|
f = [_ for _ in self.transaction.files if _.path == path]
|
|
796
797
|
if f:
|
|
797
|
-
|
|
798
|
+
size = os.path.getsize(f[0].fn) if f[0].closed else f[0].tell()
|
|
799
|
+
return {"name": path, "size": size, "type": "file"}
|
|
798
800
|
f = any(_.path.startswith(path + "/") for _ in self.transaction.files)
|
|
799
801
|
if f:
|
|
800
802
|
return {"name": path, "size": 0, "type": "directory"}
|
|
@@ -900,7 +902,7 @@ class LocalTempFile:
|
|
|
900
902
|
self.close()
|
|
901
903
|
|
|
902
904
|
def close(self):
|
|
903
|
-
self.size = self.fh.tell()
|
|
905
|
+
# self.size = self.fh.tell()
|
|
904
906
|
if self.closed:
|
|
905
907
|
return
|
|
906
908
|
self.fh.close()
|
|
@@ -77,9 +77,9 @@ class DatabricksFileSystem(AbstractFileSystem):
|
|
|
77
77
|
)
|
|
78
78
|
except DatabricksException as e:
|
|
79
79
|
if e.error_code == "RESOURCE_DOES_NOT_EXIST":
|
|
80
|
-
raise FileNotFoundError(e.message)
|
|
80
|
+
raise FileNotFoundError(e.message) from e
|
|
81
81
|
|
|
82
|
-
raise
|
|
82
|
+
raise
|
|
83
83
|
files = r["files"]
|
|
84
84
|
out = [
|
|
85
85
|
{
|
|
@@ -123,9 +123,9 @@ class DatabricksFileSystem(AbstractFileSystem):
|
|
|
123
123
|
self._send_to_api(method="post", endpoint="mkdirs", json={"path": path})
|
|
124
124
|
except DatabricksException as e:
|
|
125
125
|
if e.error_code == "RESOURCE_ALREADY_EXISTS":
|
|
126
|
-
raise FileExistsError(e.message)
|
|
126
|
+
raise FileExistsError(e.message) from e
|
|
127
127
|
|
|
128
|
-
raise
|
|
128
|
+
raise
|
|
129
129
|
self.invalidate_cache(self._parent(path))
|
|
130
130
|
|
|
131
131
|
def mkdir(self, path, create_parents=True, **kwargs):
|
|
@@ -169,9 +169,9 @@ class DatabricksFileSystem(AbstractFileSystem):
|
|
|
169
169
|
self.rm(path=path, recursive=recursive)
|
|
170
170
|
elif e.error_code == "IO_ERROR":
|
|
171
171
|
# Using the same exception as the os module would use here
|
|
172
|
-
raise OSError(e.message)
|
|
172
|
+
raise OSError(e.message) from e
|
|
173
173
|
|
|
174
|
-
raise
|
|
174
|
+
raise
|
|
175
175
|
self.invalidate_cache(self._parent(path))
|
|
176
176
|
|
|
177
177
|
def mv(
|
|
@@ -212,11 +212,11 @@ class DatabricksFileSystem(AbstractFileSystem):
|
|
|
212
212
|
)
|
|
213
213
|
except DatabricksException as e:
|
|
214
214
|
if e.error_code == "RESOURCE_DOES_NOT_EXIST":
|
|
215
|
-
raise FileNotFoundError(e.message)
|
|
215
|
+
raise FileNotFoundError(e.message) from e
|
|
216
216
|
elif e.error_code == "RESOURCE_ALREADY_EXISTS":
|
|
217
|
-
raise FileExistsError(e.message)
|
|
217
|
+
raise FileExistsError(e.message) from e
|
|
218
218
|
|
|
219
|
-
raise
|
|
219
|
+
raise
|
|
220
220
|
self.invalidate_cache(self._parent(source_path))
|
|
221
221
|
self.invalidate_cache(self._parent(destination_path))
|
|
222
222
|
|
|
@@ -264,9 +264,9 @@ class DatabricksFileSystem(AbstractFileSystem):
|
|
|
264
264
|
try:
|
|
265
265
|
exception_json = e.response.json()
|
|
266
266
|
except Exception:
|
|
267
|
-
raise e
|
|
267
|
+
raise e from None
|
|
268
268
|
|
|
269
|
-
raise DatabricksException(**exception_json)
|
|
269
|
+
raise DatabricksException(**exception_json) from e
|
|
270
270
|
|
|
271
271
|
return r.json()
|
|
272
272
|
|
|
@@ -297,9 +297,9 @@ class DatabricksFileSystem(AbstractFileSystem):
|
|
|
297
297
|
return r["handle"]
|
|
298
298
|
except DatabricksException as e:
|
|
299
299
|
if e.error_code == "RESOURCE_ALREADY_EXISTS":
|
|
300
|
-
raise FileExistsError(e.message)
|
|
300
|
+
raise FileExistsError(e.message) from e
|
|
301
301
|
|
|
302
|
-
raise
|
|
302
|
+
raise
|
|
303
303
|
|
|
304
304
|
def _close_handle(self, handle):
|
|
305
305
|
"""
|
|
@@ -314,9 +314,9 @@ class DatabricksFileSystem(AbstractFileSystem):
|
|
|
314
314
|
self._send_to_api(method="post", endpoint="close", json={"handle": handle})
|
|
315
315
|
except DatabricksException as e:
|
|
316
316
|
if e.error_code == "RESOURCE_DOES_NOT_EXIST":
|
|
317
|
-
raise FileNotFoundError(e.message)
|
|
317
|
+
raise FileNotFoundError(e.message) from e
|
|
318
318
|
|
|
319
|
-
raise
|
|
319
|
+
raise
|
|
320
320
|
|
|
321
321
|
def _add_data(self, handle, data):
|
|
322
322
|
"""
|
|
@@ -342,11 +342,11 @@ class DatabricksFileSystem(AbstractFileSystem):
|
|
|
342
342
|
)
|
|
343
343
|
except DatabricksException as e:
|
|
344
344
|
if e.error_code == "RESOURCE_DOES_NOT_EXIST":
|
|
345
|
-
raise FileNotFoundError(e.message)
|
|
345
|
+
raise FileNotFoundError(e.message) from e
|
|
346
346
|
elif e.error_code == "MAX_BLOCK_SIZE_EXCEEDED":
|
|
347
|
-
raise ValueError(e.message)
|
|
347
|
+
raise ValueError(e.message) from e
|
|
348
348
|
|
|
349
|
-
raise
|
|
349
|
+
raise
|
|
350
350
|
|
|
351
351
|
def _get_data(self, path, start, end):
|
|
352
352
|
"""
|
|
@@ -372,11 +372,11 @@ class DatabricksFileSystem(AbstractFileSystem):
|
|
|
372
372
|
return base64.b64decode(r["data"])
|
|
373
373
|
except DatabricksException as e:
|
|
374
374
|
if e.error_code == "RESOURCE_DOES_NOT_EXIST":
|
|
375
|
-
raise FileNotFoundError(e.message)
|
|
375
|
+
raise FileNotFoundError(e.message) from e
|
|
376
376
|
elif e.error_code in ["INVALID_PARAMETER_VALUE", "MAX_READ_SIZE_EXCEEDED"]:
|
|
377
|
-
raise ValueError(e.message)
|
|
377
|
+
raise ValueError(e.message) from e
|
|
378
378
|
|
|
379
|
-
raise
|
|
379
|
+
raise
|
|
380
380
|
|
|
381
381
|
def invalidate_cache(self, path=None):
|
|
382
382
|
if path is None:
|
|
@@ -56,15 +56,23 @@ class DirFileSystem(AsyncFileSystem):
|
|
|
56
56
|
if not path:
|
|
57
57
|
return self.path
|
|
58
58
|
return self.fs.sep.join((self.path, self._strip_protocol(path)))
|
|
59
|
+
if isinstance(path, dict):
|
|
60
|
+
return {self._join(_path): value for _path, value in path.items()}
|
|
59
61
|
return [self._join(_path) for _path in path]
|
|
60
62
|
|
|
61
63
|
def _relpath(self, path):
|
|
62
64
|
if isinstance(path, str):
|
|
63
65
|
if not self.path:
|
|
64
66
|
return path
|
|
65
|
-
|
|
67
|
+
# We need to account for S3FileSystem returning paths that do not
|
|
68
|
+
# start with a '/'
|
|
69
|
+
if path == self.path or (
|
|
70
|
+
self.path.startswith(self.fs.sep) and path == self.path[1:]
|
|
71
|
+
):
|
|
66
72
|
return ""
|
|
67
73
|
prefix = self.path + self.fs.sep
|
|
74
|
+
if self.path.startswith(self.fs.sep) and not path.startswith(self.fs.sep):
|
|
75
|
+
prefix = prefix[1:]
|
|
68
76
|
assert path.startswith(prefix)
|
|
69
77
|
return path[len(prefix) :]
|
|
70
78
|
return [self._relpath(_path) for _path in path]
|
|
@@ -2,7 +2,7 @@ import os
|
|
|
2
2
|
import sys
|
|
3
3
|
import uuid
|
|
4
4
|
import warnings
|
|
5
|
-
from ftplib import FTP, Error, error_perm
|
|
5
|
+
from ftplib import FTP, FTP_TLS, Error, error_perm
|
|
6
6
|
from typing import Any
|
|
7
7
|
|
|
8
8
|
from ..spec import AbstractBufferedFile, AbstractFileSystem
|
|
@@ -27,6 +27,7 @@ class FTPFileSystem(AbstractFileSystem):
|
|
|
27
27
|
tempdir=None,
|
|
28
28
|
timeout=30,
|
|
29
29
|
encoding="utf-8",
|
|
30
|
+
tls=False,
|
|
30
31
|
**kwargs,
|
|
31
32
|
):
|
|
32
33
|
"""
|
|
@@ -56,28 +57,37 @@ class FTPFileSystem(AbstractFileSystem):
|
|
|
56
57
|
Timeout of the ftp connection in seconds
|
|
57
58
|
encoding: str
|
|
58
59
|
Encoding to use for directories and filenames in FTP connection
|
|
60
|
+
tls: bool
|
|
61
|
+
Use FTP-TLS, by default False
|
|
59
62
|
"""
|
|
60
63
|
super().__init__(**kwargs)
|
|
61
64
|
self.host = host
|
|
62
65
|
self.port = port
|
|
63
66
|
self.tempdir = tempdir or "/tmp"
|
|
64
|
-
self.cred = username, password, acct
|
|
67
|
+
self.cred = username or "", password or "", acct or ""
|
|
65
68
|
self.timeout = timeout
|
|
66
69
|
self.encoding = encoding
|
|
67
70
|
if block_size is not None:
|
|
68
71
|
self.blocksize = block_size
|
|
69
72
|
else:
|
|
70
73
|
self.blocksize = 2**16
|
|
74
|
+
self.tls = tls
|
|
71
75
|
self._connect()
|
|
76
|
+
if self.tls:
|
|
77
|
+
self.ftp.prot_p()
|
|
72
78
|
|
|
73
79
|
def _connect(self):
|
|
80
|
+
if self.tls:
|
|
81
|
+
ftp_cls = FTP_TLS
|
|
82
|
+
else:
|
|
83
|
+
ftp_cls = FTP
|
|
74
84
|
if sys.version_info >= (3, 9):
|
|
75
|
-
self.ftp =
|
|
85
|
+
self.ftp = ftp_cls(timeout=self.timeout, encoding=self.encoding)
|
|
76
86
|
elif self.encoding:
|
|
77
87
|
warnings.warn("`encoding` not supported for python<3.9, ignoring")
|
|
78
|
-
self.ftp =
|
|
88
|
+
self.ftp = ftp_cls(timeout=self.timeout)
|
|
79
89
|
else:
|
|
80
|
-
self.ftp =
|
|
90
|
+
self.ftp = ftp_cls(timeout=self.timeout)
|
|
81
91
|
self.ftp.connect(self.host, self.port)
|
|
82
92
|
self.ftp.login(*self.cred)
|
|
83
93
|
|
|
@@ -107,9 +117,9 @@ class FTPFileSystem(AbstractFileSystem):
|
|
|
107
117
|
except error_perm:
|
|
108
118
|
out = _mlsd2(self.ftp, path) # Not platform independent
|
|
109
119
|
for fn, details in out:
|
|
110
|
-
|
|
111
|
-
path
|
|
112
|
-
|
|
120
|
+
details["name"] = "/".join(
|
|
121
|
+
["" if path == "/" else path, fn.lstrip("/")]
|
|
122
|
+
)
|
|
113
123
|
if details["type"] == "file":
|
|
114
124
|
details["size"] = int(details["size"])
|
|
115
125
|
else:
|
|
@@ -122,8 +132,8 @@ class FTPFileSystem(AbstractFileSystem):
|
|
|
122
132
|
info = self.info(path)
|
|
123
133
|
if info["type"] == "file":
|
|
124
134
|
out = [(path, info)]
|
|
125
|
-
except (Error, IndexError):
|
|
126
|
-
raise FileNotFoundError(path)
|
|
135
|
+
except (Error, IndexError) as exc:
|
|
136
|
+
raise FileNotFoundError(path) from exc
|
|
127
137
|
files = self.dircache.get(path, out)
|
|
128
138
|
if not detail:
|
|
129
139
|
return sorted([fn for fn, details in files])
|
|
@@ -137,9 +147,9 @@ class FTPFileSystem(AbstractFileSystem):
|
|
|
137
147
|
return {"name": "/", "size": 0, "type": "directory"}
|
|
138
148
|
files = self.ls(self._parent(path).lstrip("/"), True)
|
|
139
149
|
try:
|
|
140
|
-
out =
|
|
141
|
-
except
|
|
142
|
-
raise FileNotFoundError(path)
|
|
150
|
+
out = next(f for f in files if f["name"] == path)
|
|
151
|
+
except StopIteration as exc:
|
|
152
|
+
raise FileNotFoundError(path) from exc
|
|
143
153
|
return out
|
|
144
154
|
|
|
145
155
|
def get_file(self, rpath, lpath, **kwargs):
|
|
@@ -254,7 +254,7 @@ class HTTPFileSystem(AsyncFileSystem):
|
|
|
254
254
|
if isfilelike(lpath):
|
|
255
255
|
outfile = lpath
|
|
256
256
|
else:
|
|
257
|
-
outfile = open(lpath, "wb") # noqa: ASYNC101
|
|
257
|
+
outfile = open(lpath, "wb") # noqa: ASYNC101, ASYNC230
|
|
258
258
|
|
|
259
259
|
try:
|
|
260
260
|
chunk = True
|
|
@@ -282,7 +282,7 @@ class HTTPFileSystem(AsyncFileSystem):
|
|
|
282
282
|
context = nullcontext(lpath)
|
|
283
283
|
use_seek = False # might not support seeking
|
|
284
284
|
else:
|
|
285
|
-
context = open(lpath, "rb") # noqa: ASYNC101
|
|
285
|
+
context = open(lpath, "rb") # noqa: ASYNC101, ASYNC230
|
|
286
286
|
use_seek = True
|
|
287
287
|
|
|
288
288
|
with context as f:
|
|
@@ -560,6 +560,7 @@ class HTTPFile(AbstractBufferedFile):
|
|
|
560
560
|
if mode != "rb":
|
|
561
561
|
raise NotImplementedError("File mode not supported")
|
|
562
562
|
self.asynchronous = asynchronous
|
|
563
|
+
self.loop = loop
|
|
563
564
|
self.url = url
|
|
564
565
|
self.session = session
|
|
565
566
|
self.details = {"name": url, "size": size, "type": "file"}
|
|
@@ -572,7 +573,6 @@ class HTTPFile(AbstractBufferedFile):
|
|
|
572
573
|
cache_options=cache_options,
|
|
573
574
|
**kwargs,
|
|
574
575
|
)
|
|
575
|
-
self.loop = loop
|
|
576
576
|
|
|
577
577
|
def read(self, length=-1):
|
|
578
578
|
"""Read bytes from file
|
|
@@ -736,6 +736,7 @@ class HTTPStreamFile(AbstractBufferedFile):
|
|
|
736
736
|
return r
|
|
737
737
|
|
|
738
738
|
self.r = sync(self.loop, cor)
|
|
739
|
+
self.loop = fs.loop
|
|
739
740
|
|
|
740
741
|
def seek(self, loc, whence=0):
|
|
741
742
|
if loc == 0 and whence == 1:
|
|
@@ -804,7 +805,7 @@ async def get_range(session, url, start, end, file=None, **kwargs):
|
|
|
804
805
|
async with r:
|
|
805
806
|
out = await r.read()
|
|
806
807
|
if file:
|
|
807
|
-
with open(file, "r+b") as f: # noqa: ASYNC101
|
|
808
|
+
with open(file, "r+b") as f: # noqa: ASYNC101, ASYNC230
|
|
808
809
|
f.seek(start)
|
|
809
810
|
f.write(out)
|
|
810
811
|
else:
|
|
@@ -79,6 +79,14 @@ class LocalFileSystem(AbstractFileSystem):
|
|
|
79
79
|
t = "file"
|
|
80
80
|
else:
|
|
81
81
|
t = "other"
|
|
82
|
+
|
|
83
|
+
size = out.st_size
|
|
84
|
+
if link:
|
|
85
|
+
try:
|
|
86
|
+
out2 = path.stat(follow_symlinks=True)
|
|
87
|
+
size = out2.st_size
|
|
88
|
+
except OSError:
|
|
89
|
+
size = 0
|
|
82
90
|
path = self._strip_protocol(path.path)
|
|
83
91
|
else:
|
|
84
92
|
# str or path-like
|
|
@@ -87,6 +95,7 @@ class LocalFileSystem(AbstractFileSystem):
|
|
|
87
95
|
link = stat.S_ISLNK(out.st_mode)
|
|
88
96
|
if link:
|
|
89
97
|
out = os.stat(path, follow_symlinks=True)
|
|
98
|
+
size = out.st_size
|
|
90
99
|
if stat.S_ISDIR(out.st_mode):
|
|
91
100
|
t = "directory"
|
|
92
101
|
elif stat.S_ISREG(out.st_mode):
|
|
@@ -95,20 +104,15 @@ class LocalFileSystem(AbstractFileSystem):
|
|
|
95
104
|
t = "other"
|
|
96
105
|
result = {
|
|
97
106
|
"name": path,
|
|
98
|
-
"size":
|
|
107
|
+
"size": size,
|
|
99
108
|
"type": t,
|
|
100
109
|
"created": out.st_ctime,
|
|
101
110
|
"islink": link,
|
|
102
111
|
}
|
|
103
112
|
for field in ["mode", "uid", "gid", "mtime", "ino", "nlink"]:
|
|
104
113
|
result[field] = getattr(out, f"st_{field}")
|
|
105
|
-
if
|
|
114
|
+
if link:
|
|
106
115
|
result["destination"] = os.readlink(path)
|
|
107
|
-
try:
|
|
108
|
-
out2 = os.stat(path, follow_symlinks=True)
|
|
109
|
-
result["size"] = out2.st_size
|
|
110
|
-
except OSError:
|
|
111
|
-
result["size"] = 0
|
|
112
116
|
return result
|
|
113
117
|
|
|
114
118
|
def lexists(self, path, **kwargs):
|