fsspec 2024.3.0__py3-none-any.whl → 2024.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fsspec/__init__.py +2 -3
- fsspec/_version.py +14 -19
- fsspec/caching.py +83 -14
- fsspec/compression.py +1 -0
- fsspec/core.py +31 -6
- fsspec/exceptions.py +1 -0
- fsspec/generic.py +1 -1
- fsspec/gui.py +1 -1
- fsspec/implementations/arrow.py +0 -2
- fsspec/implementations/cache_mapper.py +1 -2
- fsspec/implementations/cache_metadata.py +7 -7
- fsspec/implementations/dirfs.py +2 -2
- fsspec/implementations/http.py +9 -9
- fsspec/implementations/local.py +97 -48
- fsspec/implementations/memory.py +9 -0
- fsspec/implementations/smb.py +3 -1
- fsspec/implementations/tests/__init__.py +0 -0
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_file_listing.yaml +112 -0
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_mkdir.yaml +582 -0
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_read_pyarrow_non_partitioned.yaml +873 -0
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_read_range.yaml +458 -0
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_read_range_chunked.yaml +1355 -0
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_write_and_read.yaml +795 -0
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_write_pyarrow_non_partitioned.yaml +613 -0
- fsspec/implementations/tests/conftest.py +39 -0
- fsspec/implementations/tests/local/__init__.py +0 -0
- fsspec/implementations/tests/local/local_fixtures.py +18 -0
- fsspec/implementations/tests/local/local_test.py +14 -0
- fsspec/implementations/tests/memory/__init__.py +0 -0
- fsspec/implementations/tests/memory/memory_fixtures.py +27 -0
- fsspec/implementations/tests/memory/memory_test.py +14 -0
- fsspec/implementations/tests/out.zip +0 -0
- fsspec/implementations/tests/test_archive.py +382 -0
- fsspec/implementations/tests/test_arrow.py +259 -0
- fsspec/implementations/tests/test_cached.py +1306 -0
- fsspec/implementations/tests/test_common.py +35 -0
- fsspec/implementations/tests/test_dask.py +29 -0
- fsspec/implementations/tests/test_data.py +20 -0
- fsspec/implementations/tests/test_dbfs.py +268 -0
- fsspec/implementations/tests/test_dirfs.py +588 -0
- fsspec/implementations/tests/test_ftp.py +178 -0
- fsspec/implementations/tests/test_git.py +76 -0
- fsspec/implementations/tests/test_http.py +577 -0
- fsspec/implementations/tests/test_jupyter.py +57 -0
- fsspec/implementations/tests/test_libarchive.py +33 -0
- fsspec/implementations/tests/test_local.py +1285 -0
- fsspec/implementations/tests/test_memory.py +382 -0
- fsspec/implementations/tests/test_reference.py +720 -0
- fsspec/implementations/tests/test_sftp.py +233 -0
- fsspec/implementations/tests/test_smb.py +139 -0
- fsspec/implementations/tests/test_tar.py +243 -0
- fsspec/implementations/tests/test_webhdfs.py +197 -0
- fsspec/implementations/tests/test_zip.py +134 -0
- fsspec/implementations/webhdfs.py +1 -3
- fsspec/mapping.py +2 -2
- fsspec/parquet.py +0 -8
- fsspec/registry.py +4 -0
- fsspec/spec.py +21 -4
- fsspec/tests/__init__.py +0 -0
- fsspec/tests/abstract/mv.py +57 -0
- fsspec/tests/conftest.py +188 -0
- fsspec/tests/data/listing.html +1 -0
- fsspec/tests/test_api.py +498 -0
- fsspec/tests/test_async.py +230 -0
- fsspec/tests/test_caches.py +255 -0
- fsspec/tests/test_callbacks.py +89 -0
- fsspec/tests/test_compression.py +164 -0
- fsspec/tests/test_config.py +129 -0
- fsspec/tests/test_core.py +466 -0
- fsspec/tests/test_downstream.py +40 -0
- fsspec/tests/test_file.py +200 -0
- fsspec/tests/test_fuse.py +147 -0
- fsspec/tests/test_generic.py +90 -0
- fsspec/tests/test_gui.py +23 -0
- fsspec/tests/test_mapping.py +228 -0
- fsspec/tests/test_parquet.py +140 -0
- fsspec/tests/test_registry.py +134 -0
- fsspec/tests/test_spec.py +1167 -0
- fsspec/tests/test_utils.py +478 -0
- fsspec/utils.py +0 -2
- fsspec-2024.5.0.dist-info/METADATA +273 -0
- fsspec-2024.5.0.dist-info/RECORD +111 -0
- {fsspec-2024.3.0.dist-info → fsspec-2024.5.0.dist-info}/WHEEL +1 -2
- fsspec-2024.3.0.dist-info/METADATA +0 -167
- fsspec-2024.3.0.dist-info/RECORD +0 -54
- fsspec-2024.3.0.dist-info/top_level.txt +0 -1
- {fsspec-2024.3.0.dist-info → fsspec-2024.5.0.dist-info/licenses}/LICENSE +0 -0
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
import pickle
|
|
2
|
+
import shlex
|
|
3
|
+
import subprocess
|
|
4
|
+
import time
|
|
5
|
+
|
|
6
|
+
import pytest
|
|
7
|
+
|
|
8
|
+
import fsspec
|
|
9
|
+
|
|
10
|
+
requests = pytest.importorskip("requests")
|
|
11
|
+
|
|
12
|
+
from fsspec.implementations.webhdfs import WebHDFS # noqa: E402
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@pytest.fixture(scope="module")
|
|
16
|
+
def hdfs_cluster():
|
|
17
|
+
cmd0 = shlex.split("htcluster shutdown")
|
|
18
|
+
try:
|
|
19
|
+
subprocess.check_output(cmd0, stderr=subprocess.STDOUT)
|
|
20
|
+
except FileNotFoundError:
|
|
21
|
+
pytest.skip("htcluster not found")
|
|
22
|
+
except subprocess.CalledProcessError as ex:
|
|
23
|
+
pytest.skip(f"htcluster failed: {ex.output.decode()}")
|
|
24
|
+
cmd1 = shlex.split("htcluster startup --image base")
|
|
25
|
+
subprocess.check_output(cmd1)
|
|
26
|
+
try:
|
|
27
|
+
while True:
|
|
28
|
+
t = 90
|
|
29
|
+
try:
|
|
30
|
+
requests.get("http://localhost:50070/webhdfs/v1/?op=LISTSTATUS")
|
|
31
|
+
except: # noqa: E722
|
|
32
|
+
t -= 1
|
|
33
|
+
assert t > 0, "Timeout waiting for HDFS"
|
|
34
|
+
time.sleep(1)
|
|
35
|
+
continue
|
|
36
|
+
break
|
|
37
|
+
time.sleep(7)
|
|
38
|
+
yield "localhost"
|
|
39
|
+
finally:
|
|
40
|
+
subprocess.check_output(cmd0)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def test_pickle(hdfs_cluster):
|
|
44
|
+
w = WebHDFS(hdfs_cluster, user="testuser")
|
|
45
|
+
w2 = pickle.loads(pickle.dumps(w))
|
|
46
|
+
assert w == w2
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def test_simple(hdfs_cluster):
|
|
50
|
+
w = WebHDFS(hdfs_cluster, user="testuser")
|
|
51
|
+
home = w.home_directory()
|
|
52
|
+
assert home == "/user/testuser"
|
|
53
|
+
with pytest.raises(PermissionError):
|
|
54
|
+
w.mkdir("/root")
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def test_url(hdfs_cluster):
|
|
58
|
+
url = "webhdfs://testuser@localhost:50070/user/testuser/myfile"
|
|
59
|
+
fo = fsspec.open(url, "wb", data_proxy={"worker.example.com": "localhost"})
|
|
60
|
+
with fo as f:
|
|
61
|
+
f.write(b"hello")
|
|
62
|
+
fo = fsspec.open(url, "rb", data_proxy={"worker.example.com": "localhost"})
|
|
63
|
+
with fo as f:
|
|
64
|
+
assert f.read() == b"hello"
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def test_workflow(hdfs_cluster):
|
|
68
|
+
w = WebHDFS(
|
|
69
|
+
hdfs_cluster, user="testuser", data_proxy={"worker.example.com": "localhost"}
|
|
70
|
+
)
|
|
71
|
+
fn = "/user/testuser/testrun/afile"
|
|
72
|
+
w.mkdir("/user/testuser/testrun")
|
|
73
|
+
with w.open(fn, "wb") as f:
|
|
74
|
+
f.write(b"hello")
|
|
75
|
+
assert w.exists(fn)
|
|
76
|
+
info = w.info(fn)
|
|
77
|
+
assert info["size"] == 5
|
|
78
|
+
assert w.isfile(fn)
|
|
79
|
+
assert w.cat(fn) == b"hello"
|
|
80
|
+
w.rm("/user/testuser/testrun", recursive=True)
|
|
81
|
+
assert not w.exists(fn)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def test_with_gzip(hdfs_cluster):
|
|
85
|
+
from gzip import GzipFile
|
|
86
|
+
|
|
87
|
+
w = WebHDFS(
|
|
88
|
+
hdfs_cluster, user="testuser", data_proxy={"worker.example.com": "localhost"}
|
|
89
|
+
)
|
|
90
|
+
fn = "/user/testuser/gzfile"
|
|
91
|
+
with w.open(fn, "wb") as f:
|
|
92
|
+
gf = GzipFile(fileobj=f, mode="w")
|
|
93
|
+
gf.write(b"hello")
|
|
94
|
+
gf.close()
|
|
95
|
+
with w.open(fn, "rb") as f:
|
|
96
|
+
gf = GzipFile(fileobj=f, mode="r")
|
|
97
|
+
assert gf.read() == b"hello"
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def test_workflow_transaction(hdfs_cluster):
|
|
101
|
+
w = WebHDFS(
|
|
102
|
+
hdfs_cluster, user="testuser", data_proxy={"worker.example.com": "localhost"}
|
|
103
|
+
)
|
|
104
|
+
fn = "/user/testuser/testrun/afile"
|
|
105
|
+
w.mkdirs("/user/testuser/testrun")
|
|
106
|
+
with w.transaction:
|
|
107
|
+
with w.open(fn, "wb") as f:
|
|
108
|
+
f.write(b"hello")
|
|
109
|
+
assert not w.exists(fn)
|
|
110
|
+
assert w.exists(fn)
|
|
111
|
+
assert w.ukey(fn)
|
|
112
|
+
files = w.ls("/user/testuser/testrun", True)
|
|
113
|
+
summ = w.content_summary("/user/testuser/testrun")
|
|
114
|
+
assert summ["length"] == files[0]["size"]
|
|
115
|
+
assert summ["fileCount"] == 1
|
|
116
|
+
|
|
117
|
+
w.rm("/user/testuser/testrun", recursive=True)
|
|
118
|
+
assert not w.exists(fn)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def test_webhdfs_cp_file(hdfs_cluster):
|
|
122
|
+
fs = WebHDFS(
|
|
123
|
+
hdfs_cluster, user="testuser", data_proxy={"worker.example.com": "localhost"}
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
src, dst = "/user/testuser/testrun/f1", "/user/testuser/testrun/f2"
|
|
127
|
+
|
|
128
|
+
fs.mkdir("/user/testuser/testrun")
|
|
129
|
+
|
|
130
|
+
with fs.open(src, "wb") as f:
|
|
131
|
+
f.write(b"hello")
|
|
132
|
+
|
|
133
|
+
fs.cp_file(src, dst)
|
|
134
|
+
|
|
135
|
+
assert fs.exists(src)
|
|
136
|
+
assert fs.exists(dst)
|
|
137
|
+
assert fs.cat(src) == fs.cat(dst)
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def test_path_with_equals(hdfs_cluster):
|
|
141
|
+
fs = WebHDFS(
|
|
142
|
+
hdfs_cluster, user="testuser", data_proxy={"worker.example.com": "localhost"}
|
|
143
|
+
)
|
|
144
|
+
path_with_equals = "/user/testuser/some_table/datestamp=2023-11-11"
|
|
145
|
+
|
|
146
|
+
fs.mkdir(path_with_equals)
|
|
147
|
+
|
|
148
|
+
result = fs.ls(path_with_equals)
|
|
149
|
+
assert result is not None
|
|
150
|
+
assert fs.exists(path_with_equals)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def test_error_handling_with_equals_in_path(hdfs_cluster):
|
|
154
|
+
fs = WebHDFS(hdfs_cluster, user="testuser")
|
|
155
|
+
invalid_path_with_equals = (
|
|
156
|
+
"/user/testuser/some_table/invalid_path=datestamp=2023-11-11"
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
with pytest.raises(FileNotFoundError):
|
|
160
|
+
fs.ls(invalid_path_with_equals)
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def test_create_and_touch_file_with_equals(hdfs_cluster):
|
|
164
|
+
fs = WebHDFS(
|
|
165
|
+
hdfs_cluster,
|
|
166
|
+
user="testuser",
|
|
167
|
+
data_proxy={"worker.example.com": "localhost"},
|
|
168
|
+
)
|
|
169
|
+
base_path = "/user/testuser/some_table/datestamp=2023-11-11"
|
|
170
|
+
file_path = f"{base_path}/testfile.txt"
|
|
171
|
+
|
|
172
|
+
fs.mkdir(base_path)
|
|
173
|
+
fs.touch(file_path, "wb")
|
|
174
|
+
assert fs.exists(file_path)
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def test_write_read_verify_file_with_equals(hdfs_cluster):
|
|
178
|
+
fs = WebHDFS(
|
|
179
|
+
hdfs_cluster,
|
|
180
|
+
user="testuser",
|
|
181
|
+
data_proxy={"worker.example.com": "localhost"},
|
|
182
|
+
)
|
|
183
|
+
base_path = "/user/testuser/some_table/datestamp=2023-11-11"
|
|
184
|
+
file_path = f"{base_path}/testfile.txt"
|
|
185
|
+
content = b"This is some content!"
|
|
186
|
+
|
|
187
|
+
fs.mkdir(base_path)
|
|
188
|
+
with fs.open(file_path, "wb") as f:
|
|
189
|
+
f.write(content)
|
|
190
|
+
|
|
191
|
+
with fs.open(file_path, "rb") as f:
|
|
192
|
+
assert f.read() == content
|
|
193
|
+
|
|
194
|
+
file_info = fs.ls(base_path, detail=True)
|
|
195
|
+
assert len(file_info) == 1
|
|
196
|
+
assert file_info[0]["name"] == file_path
|
|
197
|
+
assert file_info[0]["size"] == len(content)
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
import collections.abc
|
|
2
|
+
import os.path
|
|
3
|
+
|
|
4
|
+
import pytest
|
|
5
|
+
|
|
6
|
+
import fsspec
|
|
7
|
+
from fsspec.implementations.tests.test_archive import archive_data, tempzip
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def test_info():
|
|
11
|
+
with tempzip(archive_data) as z:
|
|
12
|
+
fs = fsspec.filesystem("zip", fo=z)
|
|
13
|
+
|
|
14
|
+
# Iterate over all files.
|
|
15
|
+
for f in archive_data:
|
|
16
|
+
lhs = fs.info(f)
|
|
17
|
+
|
|
18
|
+
# Probe some specific fields of Zip archives.
|
|
19
|
+
assert "CRC" in lhs
|
|
20
|
+
assert "compress_size" in lhs
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def test_fsspec_get_mapper():
|
|
24
|
+
"""Added for #788"""
|
|
25
|
+
|
|
26
|
+
with tempzip(archive_data) as z:
|
|
27
|
+
mapping = fsspec.get_mapper(f"zip::{z}")
|
|
28
|
+
|
|
29
|
+
assert isinstance(mapping, collections.abc.Mapping)
|
|
30
|
+
keys = sorted(mapping.keys())
|
|
31
|
+
assert keys == ["a", "b", "deeply/nested/path"]
|
|
32
|
+
|
|
33
|
+
# mapping.getitems() will call FSMap.fs.cat()
|
|
34
|
+
# which was not accurately implemented for zip.
|
|
35
|
+
assert isinstance(mapping, fsspec.mapping.FSMap)
|
|
36
|
+
items = dict(mapping.getitems(keys))
|
|
37
|
+
assert items == {"a": b"", "b": b"hello", "deeply/nested/path": b"stuff"}
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def test_not_cached():
|
|
41
|
+
with tempzip(archive_data) as z:
|
|
42
|
+
fs = fsspec.filesystem("zip", fo=z)
|
|
43
|
+
fs2 = fsspec.filesystem("zip", fo=z)
|
|
44
|
+
assert fs is not fs2
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def test_root_info():
|
|
48
|
+
with tempzip(archive_data) as z:
|
|
49
|
+
fs = fsspec.filesystem("zip", fo=z)
|
|
50
|
+
assert fs.info("/") == {"name": "", "type": "directory", "size": 0}
|
|
51
|
+
assert fs.info("") == {"name": "", "type": "directory", "size": 0}
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def test_write_seek(m):
|
|
55
|
+
with m.open("afile.zip", "wb") as f:
|
|
56
|
+
fs = fsspec.filesystem("zip", fo=f, mode="w")
|
|
57
|
+
fs.pipe("another", b"hi")
|
|
58
|
+
fs.zip.close()
|
|
59
|
+
|
|
60
|
+
with m.open("afile.zip", "rb") as f:
|
|
61
|
+
fs = fsspec.filesystem("zip", fo=f)
|
|
62
|
+
assert fs.cat("another") == b"hi"
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def test_rw(m):
|
|
66
|
+
# extra arg to zip means "create archive"
|
|
67
|
+
with fsspec.open(
|
|
68
|
+
"zip://afile::memory://out.zip", mode="wb", zip={"mode": "w"}
|
|
69
|
+
) as f:
|
|
70
|
+
f.write(b"data")
|
|
71
|
+
|
|
72
|
+
with fsspec.open("zip://afile::memory://out.zip", mode="rb") as f:
|
|
73
|
+
assert f.read() == b"data"
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def test_mapper(m):
|
|
77
|
+
# extra arg to zip means "create archive"
|
|
78
|
+
mapper = fsspec.get_mapper("zip::memory://out.zip", zip={"mode": "w"})
|
|
79
|
+
with pytest.raises(KeyError):
|
|
80
|
+
mapper["a"]
|
|
81
|
+
|
|
82
|
+
mapper["a"] = b"data"
|
|
83
|
+
with pytest.raises(OSError):
|
|
84
|
+
# fails because this is write mode and we cannot also read
|
|
85
|
+
mapper["a"]
|
|
86
|
+
assert "a" in mapper # but be can list
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def test_zip_glob_star(m):
|
|
90
|
+
with fsspec.open(
|
|
91
|
+
"zip://adir/afile::memory://out.zip", mode="wb", zip={"mode": "w"}
|
|
92
|
+
) as f:
|
|
93
|
+
f.write(b"data")
|
|
94
|
+
|
|
95
|
+
fs, _ = fsspec.core.url_to_fs("zip::memory://out.zip")
|
|
96
|
+
outfiles = fs.glob("*")
|
|
97
|
+
assert len(outfiles) == 1
|
|
98
|
+
|
|
99
|
+
fs = fsspec.filesystem("zip", fo="memory://out.zip", mode="w")
|
|
100
|
+
fs.mkdir("adir")
|
|
101
|
+
fs.pipe("adir/afile", b"data")
|
|
102
|
+
outfiles = fs.glob("*")
|
|
103
|
+
assert len(outfiles) == 1
|
|
104
|
+
|
|
105
|
+
fn = f"{os.path.dirname(os.path.abspath((__file__)))}/out.zip"
|
|
106
|
+
fs = fsspec.filesystem("zip", fo=fn, mode="r")
|
|
107
|
+
outfiles = fs.glob("*")
|
|
108
|
+
assert len(outfiles) == 1
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def test_append(m, tmpdir):
|
|
112
|
+
fs = fsspec.filesystem("zip", fo="memory://out.zip", mode="w")
|
|
113
|
+
with fs.open("afile", "wb") as f:
|
|
114
|
+
f.write(b"data")
|
|
115
|
+
fs.close()
|
|
116
|
+
|
|
117
|
+
fs = fsspec.filesystem("zip", fo="memory://out.zip", mode="a")
|
|
118
|
+
with fs.open("bfile", "wb") as f:
|
|
119
|
+
f.write(b"data")
|
|
120
|
+
fs.close()
|
|
121
|
+
|
|
122
|
+
assert len(fsspec.open_files("zip://*::memory://out.zip")) == 2
|
|
123
|
+
|
|
124
|
+
fs = fsspec.filesystem("zip", fo=f"{tmpdir}/out.zip", mode="w")
|
|
125
|
+
with fs.open("afile", "wb") as f:
|
|
126
|
+
f.write(b"data")
|
|
127
|
+
fs.close()
|
|
128
|
+
|
|
129
|
+
fs = fsspec.filesystem("zip", fo=f"{tmpdir}/out.zip", mode="a")
|
|
130
|
+
with fs.open("bfile", "wb") as f:
|
|
131
|
+
f.write(b"data")
|
|
132
|
+
fs.close()
|
|
133
|
+
|
|
134
|
+
assert len(fsspec.open_files("zip://*::memory://out.zip")) == 2
|
|
@@ -102,9 +102,7 @@ class WebHDFS(AbstractFileSystem):
|
|
|
102
102
|
if self._cached:
|
|
103
103
|
return
|
|
104
104
|
super().__init__(**kwargs)
|
|
105
|
-
self.url =
|
|
106
|
-
f"{'https' if use_https else 'http'}://{host}:{port}/webhdfs/v1" # noqa
|
|
107
|
-
)
|
|
105
|
+
self.url = f"{'https' if use_https else 'http'}://{host}:{port}/webhdfs/v1" # noqa
|
|
108
106
|
self.kerb = kerberos
|
|
109
107
|
self.kerb_kwargs = kerb_kwargs or {}
|
|
110
108
|
self.pars = {}
|
fsspec/mapping.py
CHANGED
|
@@ -40,7 +40,7 @@ class FSMap(MutableMapping):
|
|
|
40
40
|
|
|
41
41
|
def __init__(self, root, fs, check=False, create=False, missing_exceptions=None):
|
|
42
42
|
self.fs = fs
|
|
43
|
-
self.root = fs._strip_protocol(root)
|
|
43
|
+
self.root = fs._strip_protocol(root)
|
|
44
44
|
self._root_key_to_str = fs._strip_protocol(posixpath.join(root, "x"))[:-1]
|
|
45
45
|
if missing_exceptions is None:
|
|
46
46
|
missing_exceptions = (
|
|
@@ -142,7 +142,7 @@ class FSMap(MutableMapping):
|
|
|
142
142
|
if isinstance(key, list):
|
|
143
143
|
key = tuple(key)
|
|
144
144
|
key = str(key)
|
|
145
|
-
return f"{self._root_key_to_str}{key}"
|
|
145
|
+
return f"{self._root_key_to_str}{key}".rstrip("/")
|
|
146
146
|
|
|
147
147
|
def _str_to_key(self, s):
|
|
148
148
|
"""Strip path of to leave key name"""
|
fsspec/parquet.py
CHANGED
|
@@ -162,7 +162,6 @@ def _get_parquet_byte_ranges(
|
|
|
162
162
|
|
|
163
163
|
# Pass to specialized function if metadata is defined
|
|
164
164
|
if metadata is not None:
|
|
165
|
-
|
|
166
165
|
# Use the provided parquet metadata object
|
|
167
166
|
# to avoid transferring/parsing footer metadata
|
|
168
167
|
return _get_parquet_byte_ranges_from_metadata(
|
|
@@ -236,7 +235,6 @@ def _get_parquet_byte_ranges(
|
|
|
236
235
|
|
|
237
236
|
# Calculate required byte ranges for each path
|
|
238
237
|
for i, path in enumerate(paths):
|
|
239
|
-
|
|
240
238
|
# Deal with small-file case.
|
|
241
239
|
# Just include all remaining bytes of the file
|
|
242
240
|
# in a single range.
|
|
@@ -347,7 +345,6 @@ def _add_header_magic(data):
|
|
|
347
345
|
|
|
348
346
|
|
|
349
347
|
def _set_engine(engine_str):
|
|
350
|
-
|
|
351
348
|
# Define a list of parquet engines to try
|
|
352
349
|
if engine_str == "auto":
|
|
353
350
|
try_engines = ("fastparquet", "pyarrow")
|
|
@@ -383,7 +380,6 @@ def _set_engine(engine_str):
|
|
|
383
380
|
|
|
384
381
|
|
|
385
382
|
class FastparquetEngine:
|
|
386
|
-
|
|
387
383
|
# The purpose of the FastparquetEngine class is
|
|
388
384
|
# to check if fastparquet can be imported (on initialization)
|
|
389
385
|
# and to define a `_parquet_byte_ranges` method. In the
|
|
@@ -406,7 +402,6 @@ class FastparquetEngine:
|
|
|
406
402
|
footer=None,
|
|
407
403
|
footer_start=None,
|
|
408
404
|
):
|
|
409
|
-
|
|
410
405
|
# Initialize offset ranges and define ParqetFile metadata
|
|
411
406
|
pf = metadata
|
|
412
407
|
data_paths, data_starts, data_ends = [], [], []
|
|
@@ -440,7 +435,6 @@ class FastparquetEngine:
|
|
|
440
435
|
# Skip this row-group if we are targeting
|
|
441
436
|
# specific row-groups
|
|
442
437
|
if row_group_indices is None or r in row_group_indices:
|
|
443
|
-
|
|
444
438
|
# Find the target parquet-file path for `row_group`
|
|
445
439
|
fn = self._row_group_filename(row_group, pf)
|
|
446
440
|
|
|
@@ -471,7 +465,6 @@ class FastparquetEngine:
|
|
|
471
465
|
|
|
472
466
|
|
|
473
467
|
class PyarrowEngine:
|
|
474
|
-
|
|
475
468
|
# The purpose of the PyarrowEngine class is
|
|
476
469
|
# to check if pyarrow can be imported (on initialization)
|
|
477
470
|
# and to define a `_parquet_byte_ranges` method. In the
|
|
@@ -494,7 +487,6 @@ class PyarrowEngine:
|
|
|
494
487
|
footer=None,
|
|
495
488
|
footer_start=None,
|
|
496
489
|
):
|
|
497
|
-
|
|
498
490
|
if metadata is not None:
|
|
499
491
|
raise ValueError("metadata input not supported for PyarrowEngine")
|
|
500
492
|
|
fsspec/registry.py
CHANGED
|
@@ -214,6 +214,10 @@ known_implementations = {
|
|
|
214
214
|
"zip": {"class": "fsspec.implementations.zip.ZipFileSystem"},
|
|
215
215
|
}
|
|
216
216
|
|
|
217
|
+
assert list(known_implementations) == sorted(
|
|
218
|
+
known_implementations
|
|
219
|
+
), "Not in alphabetical order"
|
|
220
|
+
|
|
217
221
|
|
|
218
222
|
def get_filesystem_class(protocol):
|
|
219
223
|
"""Fetch named protocol implementation from the registry
|
fsspec/spec.py
CHANGED
|
@@ -358,8 +358,10 @@ class AbstractFileSystem(metaclass=_Cached):
|
|
|
358
358
|
but contains nothing), None if not in cache.
|
|
359
359
|
"""
|
|
360
360
|
parent = self._parent(path)
|
|
361
|
-
|
|
361
|
+
try:
|
|
362
362
|
return self.dircache[path.rstrip("/")]
|
|
363
|
+
except KeyError:
|
|
364
|
+
pass
|
|
363
365
|
try:
|
|
364
366
|
files = [
|
|
365
367
|
f
|
|
@@ -1175,7 +1177,10 @@ class AbstractFileSystem(metaclass=_Cached):
|
|
|
1175
1177
|
if path1 == path2:
|
|
1176
1178
|
logger.debug("%s mv: The paths are the same, so no files were moved.", self)
|
|
1177
1179
|
else:
|
|
1178
|
-
|
|
1180
|
+
# explicitly raise exception to prevent data corruption
|
|
1181
|
+
self.copy(
|
|
1182
|
+
path1, path2, recursive=recursive, maxdepth=maxdepth, onerror="raise"
|
|
1183
|
+
)
|
|
1179
1184
|
self.rm(path1, recursive=recursive)
|
|
1180
1185
|
|
|
1181
1186
|
def rm_file(self, path):
|
|
@@ -1693,7 +1698,12 @@ class AbstractBufferedFile(io.IOBase):
|
|
|
1693
1698
|
"""Files are equal if they have the same checksum, only in read mode"""
|
|
1694
1699
|
if self is other:
|
|
1695
1700
|
return True
|
|
1696
|
-
return
|
|
1701
|
+
return (
|
|
1702
|
+
isinstance(other, type(self))
|
|
1703
|
+
and self.mode == "rb"
|
|
1704
|
+
and other.mode == "rb"
|
|
1705
|
+
and hash(self) == hash(other)
|
|
1706
|
+
)
|
|
1697
1707
|
|
|
1698
1708
|
def commit(self):
|
|
1699
1709
|
"""Move from temp to final destination"""
|
|
@@ -1839,11 +1849,18 @@ class AbstractBufferedFile(io.IOBase):
|
|
|
1839
1849
|
length = self.size - self.loc
|
|
1840
1850
|
if self.closed:
|
|
1841
1851
|
raise ValueError("I/O operation on closed file.")
|
|
1842
|
-
logger.debug("%s read: %i - %i", self, self.loc, self.loc + length)
|
|
1843
1852
|
if length == 0:
|
|
1844
1853
|
# don't even bother calling fetch
|
|
1845
1854
|
return b""
|
|
1846
1855
|
out = self.cache._fetch(self.loc, self.loc + length)
|
|
1856
|
+
|
|
1857
|
+
logger.debug(
|
|
1858
|
+
"%s read: %i - %i %s",
|
|
1859
|
+
self,
|
|
1860
|
+
self.loc,
|
|
1861
|
+
self.loc + length,
|
|
1862
|
+
self.cache._log_stats(),
|
|
1863
|
+
)
|
|
1847
1864
|
self.loc += len(out)
|
|
1848
1865
|
return out
|
|
1849
1866
|
|
fsspec/tests/__init__.py
ADDED
|
File without changes
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
import fsspec
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def test_move_raises_error_with_tmpdir(tmpdir):
|
|
9
|
+
# Create a file in the temporary directory
|
|
10
|
+
source = tmpdir.join("source_file.txt")
|
|
11
|
+
source.write("content")
|
|
12
|
+
|
|
13
|
+
# Define a destination that simulates a protected or invalid path
|
|
14
|
+
destination = tmpdir.join("non_existent_directory/destination_file.txt")
|
|
15
|
+
|
|
16
|
+
# Instantiate the filesystem (assuming the local file system interface)
|
|
17
|
+
fs = fsspec.filesystem("file")
|
|
18
|
+
|
|
19
|
+
# Use the actual file paths as string
|
|
20
|
+
with pytest.raises(FileNotFoundError):
|
|
21
|
+
fs.mv(str(source), str(destination))
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@pytest.mark.parametrize("recursive", (True, False))
|
|
25
|
+
def test_move_raises_error_with_tmpdir_permission(recursive, tmpdir):
|
|
26
|
+
# Create a file in the temporary directory
|
|
27
|
+
source = tmpdir.join("source_file.txt")
|
|
28
|
+
source.write("content")
|
|
29
|
+
|
|
30
|
+
# Create a protected directory (non-writable)
|
|
31
|
+
protected_dir = tmpdir.mkdir("protected_directory")
|
|
32
|
+
protected_path = str(protected_dir)
|
|
33
|
+
|
|
34
|
+
# Set the directory to read-only
|
|
35
|
+
if os.name == "nt":
|
|
36
|
+
os.system(f'icacls "{protected_path}" /deny Everyone:(W)')
|
|
37
|
+
else:
|
|
38
|
+
os.chmod(protected_path, 0o555) # Sets the directory to read-only
|
|
39
|
+
|
|
40
|
+
# Define a destination inside the protected directory
|
|
41
|
+
destination = protected_dir.join("destination_file.txt")
|
|
42
|
+
|
|
43
|
+
# Instantiate the filesystem (assuming the local file system interface)
|
|
44
|
+
fs = fsspec.filesystem("file")
|
|
45
|
+
|
|
46
|
+
# Try to move the file to the read-only directory, expecting a permission error
|
|
47
|
+
with pytest.raises(PermissionError):
|
|
48
|
+
fs.mv(str(source), str(destination), recursive=recursive)
|
|
49
|
+
|
|
50
|
+
# Assert the file was not created in the destination
|
|
51
|
+
assert not os.path.exists(destination)
|
|
52
|
+
|
|
53
|
+
# Cleanup: Restore permissions so the directory can be cleaned up
|
|
54
|
+
if os.name == "nt":
|
|
55
|
+
os.system(f'icacls "{protected_path}" /remove:d Everyone')
|
|
56
|
+
else:
|
|
57
|
+
os.chmod(protected_path, 0o755) # Restore write permission for cleanup
|