fsspec 2024.3.1__py3-none-any.whl → 2024.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fsspec/__init__.py +2 -3
- fsspec/_version.py +14 -19
- fsspec/caching.py +83 -14
- fsspec/compression.py +1 -0
- fsspec/core.py +32 -8
- fsspec/exceptions.py +1 -0
- fsspec/generic.py +1 -1
- fsspec/gui.py +1 -1
- fsspec/implementations/arrow.py +0 -2
- fsspec/implementations/cache_mapper.py +1 -2
- fsspec/implementations/cache_metadata.py +7 -7
- fsspec/implementations/dirfs.py +2 -2
- fsspec/implementations/http.py +9 -9
- fsspec/implementations/local.py +78 -45
- fsspec/implementations/memory.py +9 -0
- fsspec/implementations/smb.py +3 -1
- fsspec/implementations/tests/__init__.py +0 -0
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_file_listing.yaml +112 -0
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_mkdir.yaml +582 -0
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_read_pyarrow_non_partitioned.yaml +873 -0
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_read_range.yaml +458 -0
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_read_range_chunked.yaml +1355 -0
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_write_and_read.yaml +795 -0
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_write_pyarrow_non_partitioned.yaml +613 -0
- fsspec/implementations/tests/conftest.py +39 -0
- fsspec/implementations/tests/local/__init__.py +0 -0
- fsspec/implementations/tests/local/local_fixtures.py +18 -0
- fsspec/implementations/tests/local/local_test.py +14 -0
- fsspec/implementations/tests/memory/__init__.py +0 -0
- fsspec/implementations/tests/memory/memory_fixtures.py +27 -0
- fsspec/implementations/tests/memory/memory_test.py +14 -0
- fsspec/implementations/tests/out.zip +0 -0
- fsspec/implementations/tests/test_archive.py +382 -0
- fsspec/implementations/tests/test_arrow.py +259 -0
- fsspec/implementations/tests/test_cached.py +1306 -0
- fsspec/implementations/tests/test_common.py +35 -0
- fsspec/implementations/tests/test_dask.py +29 -0
- fsspec/implementations/tests/test_data.py +20 -0
- fsspec/implementations/tests/test_dbfs.py +268 -0
- fsspec/implementations/tests/test_dirfs.py +588 -0
- fsspec/implementations/tests/test_ftp.py +178 -0
- fsspec/implementations/tests/test_git.py +76 -0
- fsspec/implementations/tests/test_http.py +577 -0
- fsspec/implementations/tests/test_jupyter.py +57 -0
- fsspec/implementations/tests/test_libarchive.py +33 -0
- fsspec/implementations/tests/test_local.py +1285 -0
- fsspec/implementations/tests/test_memory.py +382 -0
- fsspec/implementations/tests/test_reference.py +720 -0
- fsspec/implementations/tests/test_sftp.py +233 -0
- fsspec/implementations/tests/test_smb.py +139 -0
- fsspec/implementations/tests/test_tar.py +243 -0
- fsspec/implementations/tests/test_webhdfs.py +197 -0
- fsspec/implementations/tests/test_zip.py +134 -0
- fsspec/implementations/webhdfs.py +1 -3
- fsspec/parquet.py +0 -8
- fsspec/registry.py +4 -0
- fsspec/spec.py +21 -4
- fsspec/tests/__init__.py +0 -0
- fsspec/tests/abstract/mv.py +57 -0
- fsspec/tests/conftest.py +188 -0
- fsspec/tests/data/listing.html +1 -0
- fsspec/tests/test_api.py +498 -0
- fsspec/tests/test_async.py +230 -0
- fsspec/tests/test_caches.py +255 -0
- fsspec/tests/test_callbacks.py +89 -0
- fsspec/tests/test_compression.py +164 -0
- fsspec/tests/test_config.py +129 -0
- fsspec/tests/test_core.py +466 -0
- fsspec/tests/test_downstream.py +40 -0
- fsspec/tests/test_file.py +200 -0
- fsspec/tests/test_fuse.py +147 -0
- fsspec/tests/test_generic.py +90 -0
- fsspec/tests/test_gui.py +23 -0
- fsspec/tests/test_mapping.py +228 -0
- fsspec/tests/test_parquet.py +140 -0
- fsspec/tests/test_registry.py +134 -0
- fsspec/tests/test_spec.py +1167 -0
- fsspec/tests/test_utils.py +478 -0
- fsspec/utils.py +0 -2
- fsspec-2024.5.0.dist-info/METADATA +273 -0
- fsspec-2024.5.0.dist-info/RECORD +111 -0
- {fsspec-2024.3.1.dist-info → fsspec-2024.5.0.dist-info}/WHEEL +1 -2
- fsspec-2024.3.1.dist-info/METADATA +0 -167
- fsspec-2024.3.1.dist-info/RECORD +0 -54
- fsspec-2024.3.1.dist-info/top_level.txt +0 -1
- {fsspec-2024.3.1.dist-info → fsspec-2024.5.0.dist-info/licenses}/LICENSE +0 -0
fsspec/tests/conftest.py
ADDED
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
import contextlib
|
|
2
|
+
import gzip
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
import threading
|
|
6
|
+
from collections import ChainMap
|
|
7
|
+
from http.server import BaseHTTPRequestHandler, HTTPServer
|
|
8
|
+
|
|
9
|
+
import pytest
|
|
10
|
+
|
|
11
|
+
requests = pytest.importorskip("requests")
|
|
12
|
+
port = 9898
|
|
13
|
+
data = b"\n".join([b"some test data"] * 1000)
|
|
14
|
+
realfile = f"http://127.0.0.1:{port}/index/realfile"
|
|
15
|
+
index = b'<a href="%s">Link</a>' % realfile.encode()
|
|
16
|
+
listing = open(
|
|
17
|
+
os.path.join(os.path.dirname(__file__), "data", "listing.html"), "rb"
|
|
18
|
+
).read()
|
|
19
|
+
win = os.name == "nt"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _make_listing(*paths):
|
|
23
|
+
return "\n".join(
|
|
24
|
+
f'<a href="http://127.0.0.1:{port}{f}">Link_{i}</a>'
|
|
25
|
+
for i, f in enumerate(paths)
|
|
26
|
+
).encode()
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@pytest.fixture
|
|
30
|
+
def reset_files():
|
|
31
|
+
yield
|
|
32
|
+
|
|
33
|
+
# Reset the newly added files after the
|
|
34
|
+
# test is completed.
|
|
35
|
+
HTTPTestHandler.dynamic_files.clear()
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class HTTPTestHandler(BaseHTTPRequestHandler):
|
|
39
|
+
static_files = {
|
|
40
|
+
"/index/realfile": data,
|
|
41
|
+
"/index/otherfile": data,
|
|
42
|
+
"/index": index,
|
|
43
|
+
"/data/20020401": listing,
|
|
44
|
+
"/simple/": _make_listing("/simple/file", "/simple/dir/"),
|
|
45
|
+
"/simple/file": data,
|
|
46
|
+
"/simple/dir/": _make_listing("/simple/dir/file"),
|
|
47
|
+
"/simple/dir/file": data,
|
|
48
|
+
}
|
|
49
|
+
dynamic_files = {}
|
|
50
|
+
|
|
51
|
+
files = ChainMap(dynamic_files, static_files)
|
|
52
|
+
|
|
53
|
+
def __init__(self, *args, **kwargs):
|
|
54
|
+
super().__init__(*args, **kwargs)
|
|
55
|
+
|
|
56
|
+
def _respond(self, code=200, headers=None, data=b""):
|
|
57
|
+
headers = headers or {}
|
|
58
|
+
headers.update({"User-Agent": "test"})
|
|
59
|
+
self.send_response(code)
|
|
60
|
+
for k, v in headers.items():
|
|
61
|
+
self.send_header(k, str(v))
|
|
62
|
+
self.end_headers()
|
|
63
|
+
if data:
|
|
64
|
+
self.wfile.write(data)
|
|
65
|
+
|
|
66
|
+
def do_GET(self):
|
|
67
|
+
file_path = self.path
|
|
68
|
+
if file_path.endswith("/") and file_path.rstrip("/") in self.files:
|
|
69
|
+
file_path = file_path.rstrip("/")
|
|
70
|
+
file_data = self.files.get(file_path)
|
|
71
|
+
if "give_path" in self.headers:
|
|
72
|
+
return self._respond(200, data=json.dumps({"path": self.path}).encode())
|
|
73
|
+
if "redirect" in self.headers and file_path != "/index/realfile":
|
|
74
|
+
new_url = f"http://127.0.0.1:{port}/index/realfile"
|
|
75
|
+
return self._respond(301, {"Location": new_url})
|
|
76
|
+
if file_data is None:
|
|
77
|
+
return self._respond(404)
|
|
78
|
+
|
|
79
|
+
status = 200
|
|
80
|
+
content_range = f"bytes 0-{len(file_data) - 1}/{len(file_data)}"
|
|
81
|
+
if ("Range" in self.headers) and ("ignore_range" not in self.headers):
|
|
82
|
+
ran = self.headers["Range"]
|
|
83
|
+
b, ran = ran.split("=")
|
|
84
|
+
start, end = ran.split("-")
|
|
85
|
+
if start:
|
|
86
|
+
content_range = f"bytes {start}-{end}/{len(file_data)}"
|
|
87
|
+
file_data = file_data[int(start) : (int(end) + 1) if end else None]
|
|
88
|
+
else:
|
|
89
|
+
# suffix only
|
|
90
|
+
l = len(file_data)
|
|
91
|
+
content_range = f"bytes {l - int(end)}-{l - 1}/{l}"
|
|
92
|
+
file_data = file_data[-int(end) :]
|
|
93
|
+
if "use_206" in self.headers:
|
|
94
|
+
status = 206
|
|
95
|
+
if "give_length" in self.headers:
|
|
96
|
+
if "gzip_encoding" in self.headers:
|
|
97
|
+
file_data = gzip.compress(file_data)
|
|
98
|
+
response_headers = {
|
|
99
|
+
"Content-Length": len(file_data),
|
|
100
|
+
"Content-Encoding": "gzip",
|
|
101
|
+
}
|
|
102
|
+
else:
|
|
103
|
+
response_headers = {"Content-Length": len(file_data)}
|
|
104
|
+
self._respond(status, response_headers, file_data)
|
|
105
|
+
elif "give_range" in self.headers:
|
|
106
|
+
self._respond(status, {"Content-Range": content_range}, file_data)
|
|
107
|
+
elif "give_mimetype" in self.headers:
|
|
108
|
+
self._respond(
|
|
109
|
+
status, {"Content-Type": "text/html; charset=utf-8"}, file_data
|
|
110
|
+
)
|
|
111
|
+
else:
|
|
112
|
+
self._respond(status, data=file_data)
|
|
113
|
+
|
|
114
|
+
def do_POST(self):
|
|
115
|
+
length = self.headers.get("Content-Length")
|
|
116
|
+
file_path = self.path.rstrip("/")
|
|
117
|
+
if length is None:
|
|
118
|
+
assert self.headers.get("Transfer-Encoding") == "chunked"
|
|
119
|
+
self.files[file_path] = b"".join(self.read_chunks())
|
|
120
|
+
else:
|
|
121
|
+
self.files[file_path] = self.rfile.read(length)
|
|
122
|
+
self._respond(200)
|
|
123
|
+
|
|
124
|
+
do_PUT = do_POST
|
|
125
|
+
|
|
126
|
+
def read_chunks(self):
|
|
127
|
+
length = -1
|
|
128
|
+
while length != 0:
|
|
129
|
+
line = self.rfile.readline().strip()
|
|
130
|
+
if len(line) == 0:
|
|
131
|
+
length = 0
|
|
132
|
+
else:
|
|
133
|
+
length = int(line, 16)
|
|
134
|
+
yield self.rfile.read(length)
|
|
135
|
+
self.rfile.readline()
|
|
136
|
+
|
|
137
|
+
def do_HEAD(self):
|
|
138
|
+
if "head_not_auth" in self.headers:
|
|
139
|
+
return self._respond(
|
|
140
|
+
403, {"Content-Length": 123}, b"not authorized for HEAD request"
|
|
141
|
+
)
|
|
142
|
+
elif "head_ok" not in self.headers:
|
|
143
|
+
return self._respond(405)
|
|
144
|
+
|
|
145
|
+
file_path = self.path.rstrip("/")
|
|
146
|
+
file_data = self.files.get(file_path)
|
|
147
|
+
if file_data is None:
|
|
148
|
+
return self._respond(404)
|
|
149
|
+
|
|
150
|
+
if ("give_length" in self.headers) or ("head_give_length" in self.headers):
|
|
151
|
+
response_headers = {"Content-Length": len(file_data)}
|
|
152
|
+
if "zero_length" in self.headers:
|
|
153
|
+
response_headers["Content-Length"] = 0
|
|
154
|
+
elif "gzip_encoding" in self.headers:
|
|
155
|
+
file_data = gzip.compress(file_data)
|
|
156
|
+
response_headers["Content-Encoding"] = "gzip"
|
|
157
|
+
response_headers["Content-Length"] = len(file_data)
|
|
158
|
+
|
|
159
|
+
self._respond(200, response_headers)
|
|
160
|
+
elif "give_range" in self.headers:
|
|
161
|
+
self._respond(
|
|
162
|
+
200, {"Content-Range": f"0-{len(file_data) - 1}/{len(file_data)}"}
|
|
163
|
+
)
|
|
164
|
+
elif "give_etag" in self.headers:
|
|
165
|
+
self._respond(200, {"ETag": "xxx"})
|
|
166
|
+
else:
|
|
167
|
+
self._respond(200) # OK response, but no useful info
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
@contextlib.contextmanager
|
|
171
|
+
def serve():
|
|
172
|
+
server_address = ("", port)
|
|
173
|
+
httpd = HTTPServer(server_address, HTTPTestHandler)
|
|
174
|
+
th = threading.Thread(target=httpd.serve_forever)
|
|
175
|
+
th.daemon = True
|
|
176
|
+
th.start()
|
|
177
|
+
try:
|
|
178
|
+
yield f"http://127.0.0.1:{port}"
|
|
179
|
+
finally:
|
|
180
|
+
httpd.socket.close()
|
|
181
|
+
httpd.shutdown()
|
|
182
|
+
th.join()
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
@pytest.fixture(scope="module")
|
|
186
|
+
def server():
|
|
187
|
+
with serve() as s:
|
|
188
|
+
yield s
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
\n<html><head><title>nasagrace.unl.edu - /data/20020401/</title></head><body><H1>nasagrace.unl.edu - /data/20020401/</H1><hr>\n\n<pre><A HREF="/data/">[To Parent Directory]</A><br><br> 1/27/2020 9:54 AM 1194073 <A HREF="/data/20020401/GRACE_GWS_20020401.pdf">GRACE_GWS_20020401.pdf</A><br> 1/27/2020 9:54 AM 380043 <A HREF="/data/20020401/GRACE_GWS_20020401.png">GRACE_GWS_20020401.png</A><br> 1/27/2020 9:54 AM 1192987 <A HREF="/data/20020401/GRACE_RTZSM_20020401.pdf">GRACE_RTZSM_20020401.pdf</A><br> 1/27/2020 9:54 AM 384342 <A HREF="/data/20020401/GRACE_RTZSM_20020401.png">GRACE_RTZSM_20020401.png</A><br> 1/27/2020 9:55 AM 1202046 <A HREF="/data/20020401/GRACE_SFSM_20020401.pdf">GRACE_SFSM_20020401.pdf</A><br> 1/27/2020 9:55 AM 387932 <A HREF="/data/20020401/GRACE_SFSM_20020401.png">GRACE_SFSM_20020401.png</A><br> 1/27/2020 9:54 AM 4975980 <A HREF="/data/20020401/GRACEDADM_CLSM0125US_7D.A20020401.030.nc4">GRACEDADM_CLSM0125US_7D.A20020401.030.nc4</A><br> 1/27/2020 9:54 AM 345640 <A HREF="/data/20020401/gws_perc_0125deg_US_20020401.img">gws_perc_0125deg_US_20020401.img</A><br> 1/27/2020 9:54 AM 2272 <A HREF="/data/20020401/gws_perc_0125deg_US_20020401.img.aux.xml">gws_perc_0125deg_US_20020401.img.aux.xml</A><br> 1/27/2020 9:54 AM 5678 <A HREF="/data/20020401/gws_perc_0125deg_US_20020401.img.xml">gws_perc_0125deg_US_20020401.img.xml</A><br> 1/27/2020 9:54 AM 136081 <A HREF="/data/20020401/gws_perc_0125deg_US_20020401.rrd">gws_perc_0125deg_US_20020401.rrd</A><br> 1/27/2020 9:54 AM 83
|
fsspec/tests/test_api.py
ADDED
|
@@ -0,0 +1,498 @@
|
|
|
1
|
+
"""Tests the spec, using memoryfs"""
|
|
2
|
+
|
|
3
|
+
import contextlib
|
|
4
|
+
import os
|
|
5
|
+
import pickle
|
|
6
|
+
import tempfile
|
|
7
|
+
from unittest.mock import Mock
|
|
8
|
+
|
|
9
|
+
import pytest
|
|
10
|
+
|
|
11
|
+
import fsspec
|
|
12
|
+
from fsspec.implementations.memory import MemoryFile, MemoryFileSystem
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def test_idempotent():
|
|
16
|
+
MemoryFileSystem.clear_instance_cache()
|
|
17
|
+
fs = MemoryFileSystem()
|
|
18
|
+
fs2 = MemoryFileSystem()
|
|
19
|
+
assert fs is fs2
|
|
20
|
+
assert MemoryFileSystem.current() is fs2
|
|
21
|
+
|
|
22
|
+
MemoryFileSystem.clear_instance_cache()
|
|
23
|
+
assert not MemoryFileSystem._cache
|
|
24
|
+
|
|
25
|
+
fs2 = MemoryFileSystem().current()
|
|
26
|
+
assert fs == fs2
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def test_pickle():
|
|
30
|
+
fs = MemoryFileSystem()
|
|
31
|
+
fs2 = pickle.loads(pickle.dumps(fs))
|
|
32
|
+
assert fs == fs2
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def test_class_methods():
|
|
36
|
+
assert MemoryFileSystem._strip_protocol("memory://stuff") == "/stuff"
|
|
37
|
+
assert MemoryFileSystem._strip_protocol("stuff") == "/stuff"
|
|
38
|
+
assert MemoryFileSystem._strip_protocol("other://stuff") == "other://stuff"
|
|
39
|
+
|
|
40
|
+
assert MemoryFileSystem._get_kwargs_from_urls("memory://user@thing") == {}
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def test_multi(m):
|
|
44
|
+
m.pipe("/afile", b"data")
|
|
45
|
+
fs, token, paths = fsspec.core.get_fs_token_paths(["/afile", "/afile"])
|
|
46
|
+
assert len(paths) == 2
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def test_get_put(tmpdir, m):
|
|
50
|
+
tmpdir = str(tmpdir)
|
|
51
|
+
fn = os.path.join(tmpdir, "one")
|
|
52
|
+
open(fn, "wb").write(b"one")
|
|
53
|
+
os.mkdir(os.path.join(tmpdir, "dir"))
|
|
54
|
+
fn2 = os.path.join(tmpdir, "dir", "two")
|
|
55
|
+
open(fn2, "wb").write(b"two")
|
|
56
|
+
|
|
57
|
+
fs = MemoryFileSystem()
|
|
58
|
+
fs.put(fn, "/afile")
|
|
59
|
+
assert fs.cat("/afile") == b"one"
|
|
60
|
+
|
|
61
|
+
fs.store["/bfile"] = MemoryFile(fs, "/bfile", b"data")
|
|
62
|
+
fn3 = os.path.join(tmpdir, "three")
|
|
63
|
+
fs.get("/bfile", fn3)
|
|
64
|
+
assert open(fn3, "rb").read() == b"data"
|
|
65
|
+
|
|
66
|
+
fs.put(tmpdir, "/more", recursive=True)
|
|
67
|
+
assert fs.find("/more") == ["/more/dir/two", "/more/one", "/more/three"]
|
|
68
|
+
|
|
69
|
+
@contextlib.contextmanager
|
|
70
|
+
def tmp_chdir(path):
|
|
71
|
+
curdir = os.getcwd()
|
|
72
|
+
os.chdir(path)
|
|
73
|
+
try:
|
|
74
|
+
yield
|
|
75
|
+
finally:
|
|
76
|
+
os.chdir(curdir)
|
|
77
|
+
|
|
78
|
+
with tmp_chdir(os.path.join(tmpdir, os.path.pardir)):
|
|
79
|
+
fs.put(os.path.basename(tmpdir), "/moretwo", recursive=True)
|
|
80
|
+
assert fs.find("/moretwo") == [
|
|
81
|
+
"/moretwo/dir/two",
|
|
82
|
+
"/moretwo/one",
|
|
83
|
+
"/moretwo/three",
|
|
84
|
+
]
|
|
85
|
+
|
|
86
|
+
with tmp_chdir(tmpdir):
|
|
87
|
+
fs.put(os.path.curdir, "/morethree", recursive=True)
|
|
88
|
+
assert fs.find("/morethree") == [
|
|
89
|
+
"/morethree/dir/two",
|
|
90
|
+
"/morethree/one",
|
|
91
|
+
"/morethree/three",
|
|
92
|
+
]
|
|
93
|
+
|
|
94
|
+
for f in [fn, fn2, fn3]:
|
|
95
|
+
os.remove(f)
|
|
96
|
+
os.rmdir(os.path.join(tmpdir, "dir"))
|
|
97
|
+
|
|
98
|
+
fs.get("/more/", tmpdir + "/", recursive=True)
|
|
99
|
+
assert open(fn3, "rb").read() == b"data"
|
|
100
|
+
assert open(fn, "rb").read() == b"one"
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def test_du(m):
|
|
104
|
+
fs = MemoryFileSystem()
|
|
105
|
+
fs.store.update(
|
|
106
|
+
{
|
|
107
|
+
"/dir/afile": MemoryFile(fs, "/afile", b"a"),
|
|
108
|
+
"/dir/dirb/afile": MemoryFile(fs, "/afile", b"bb"),
|
|
109
|
+
"/dir/dirb/bfile": MemoryFile(fs, "/afile", b"ccc"),
|
|
110
|
+
}
|
|
111
|
+
)
|
|
112
|
+
assert fs.du("/dir") == 6
|
|
113
|
+
assert fs.du("/dir", total=False) == {
|
|
114
|
+
"/dir/afile": 1,
|
|
115
|
+
"/dir/dirb/afile": 2,
|
|
116
|
+
"/dir/dirb/bfile": 3,
|
|
117
|
+
}
|
|
118
|
+
assert fs.du("/dir", withdirs=True) == 6
|
|
119
|
+
assert fs.du("/dir", total=False, withdirs=True) == {
|
|
120
|
+
"/dir": 0,
|
|
121
|
+
"/dir/afile": 1,
|
|
122
|
+
"/dir/dirb": 0,
|
|
123
|
+
"/dir/dirb/afile": 2,
|
|
124
|
+
"/dir/dirb/bfile": 3,
|
|
125
|
+
}
|
|
126
|
+
with pytest.raises(ValueError):
|
|
127
|
+
assert fs.du("/dir", maxdepth=0) == 1
|
|
128
|
+
assert fs.du("/dir", total=False, withdirs=True, maxdepth=1) == {
|
|
129
|
+
"/dir": 0,
|
|
130
|
+
"/dir/afile": 1,
|
|
131
|
+
"/dir/dirb": 0,
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
# Size of file only.
|
|
135
|
+
assert fs.du("/dir/afile") == 1
|
|
136
|
+
assert fs.du("/dir/afile", withdirs=True) == 1
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def test_head_tail(m):
|
|
140
|
+
fs = MemoryFileSystem()
|
|
141
|
+
with fs.open("/myfile", "wb") as f:
|
|
142
|
+
f.write(b"I had a nice big cabbage")
|
|
143
|
+
assert fs.head("/myfile", 5) == b"I had"
|
|
144
|
+
assert fs.tail("/myfile", 7) == b"cabbage"
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def test_move(m):
|
|
148
|
+
fs = MemoryFileSystem()
|
|
149
|
+
with fs.open("/myfile", "wb") as f:
|
|
150
|
+
f.write(b"I had a nice big cabbage")
|
|
151
|
+
fs.move("/myfile", "/otherfile")
|
|
152
|
+
assert not fs.exists("/myfile")
|
|
153
|
+
assert fs.info("/otherfile")
|
|
154
|
+
assert isinstance(fs.ukey("/otherfile"), str)
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def test_recursive_get_put(tmpdir, m):
|
|
158
|
+
fs = MemoryFileSystem()
|
|
159
|
+
os.makedirs(f"{tmpdir}/nest")
|
|
160
|
+
for file in ["one", "two", "nest/other"]:
|
|
161
|
+
with open(f"{tmpdir}/{file}", "wb") as f:
|
|
162
|
+
f.write(b"data")
|
|
163
|
+
|
|
164
|
+
fs.put(str(tmpdir), "test", recursive=True)
|
|
165
|
+
|
|
166
|
+
# get to directory with slash
|
|
167
|
+
d = tempfile.mkdtemp()
|
|
168
|
+
fs.get("test/", d, recursive=True)
|
|
169
|
+
for file in ["one", "two", "nest/other"]:
|
|
170
|
+
with open(f"{d}/{file}", "rb") as f:
|
|
171
|
+
f.read() == b"data"
|
|
172
|
+
|
|
173
|
+
# get to directory without slash
|
|
174
|
+
d = tempfile.mkdtemp()
|
|
175
|
+
fs.get("test", d, recursive=True)
|
|
176
|
+
for file in ["test/one", "test/two", "test/nest/other"]:
|
|
177
|
+
with open(f"{d}/{file}", "rb") as f:
|
|
178
|
+
f.read() == b"data"
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def test_pipe_cat(m):
|
|
182
|
+
fs = MemoryFileSystem()
|
|
183
|
+
fs.pipe("afile", b"contents")
|
|
184
|
+
assert fs.cat("afile") == b"contents"
|
|
185
|
+
|
|
186
|
+
data = {"/bfile": b"more", "/cfile": b"stuff"}
|
|
187
|
+
fs.pipe(data)
|
|
188
|
+
assert fs.cat(list(data)) == data
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def test_read_block_delimiter(m):
|
|
192
|
+
fs = MemoryFileSystem()
|
|
193
|
+
with fs.open("/myfile", "wb") as f:
|
|
194
|
+
f.write(b"some\nlines\nof\ntext")
|
|
195
|
+
assert fs.read_block("/myfile", 0, 2, b"\n") == b"some\n"
|
|
196
|
+
assert fs.read_block("/myfile", 2, 6, b"\n") == b"lines\n"
|
|
197
|
+
assert fs.read_block("/myfile", 6, 2, b"\n") == b""
|
|
198
|
+
assert fs.read_block("/myfile", 2, 9, b"\n") == b"lines\nof\n"
|
|
199
|
+
assert fs.read_block("/myfile", 12, 6, b"\n") == b"text"
|
|
200
|
+
assert fs.read_block("/myfile", 0, None) == fs.cat("/myfile")
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def test_open_text(m):
|
|
204
|
+
fs = MemoryFileSystem()
|
|
205
|
+
with fs.open("/myfile", "wb") as f:
|
|
206
|
+
f.write(b"some\nlines\nof\ntext")
|
|
207
|
+
f = fs.open("/myfile", "r", encoding="latin1")
|
|
208
|
+
assert f.encoding == "latin1"
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def test_read_text(m):
|
|
212
|
+
with m.open("/myfile", "w", encoding="utf-8") as f:
|
|
213
|
+
f.write("some\nlines\nof\ntext")
|
|
214
|
+
assert m.read_text("/myfile", encoding="utf-8") == "some\nlines\nof\ntext"
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def test_write_text(m):
|
|
218
|
+
m.write_text("/myfile", "some\nlines\nof\ntext", encoding="utf-8")
|
|
219
|
+
assert m.read_text("/myfile", encoding="utf-8") == "some\nlines\nof\ntext"
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def test_chained_fs():
|
|
223
|
+
d1 = tempfile.mkdtemp()
|
|
224
|
+
d2 = tempfile.mkdtemp()
|
|
225
|
+
f1 = os.path.join(d1, "f1")
|
|
226
|
+
with open(f1, "wb") as f:
|
|
227
|
+
f.write(b"test")
|
|
228
|
+
|
|
229
|
+
of = fsspec.open(
|
|
230
|
+
f"simplecache::file://{f1}",
|
|
231
|
+
simplecache={"cache_storage": d2, "same_names": True},
|
|
232
|
+
)
|
|
233
|
+
with of as f:
|
|
234
|
+
assert f.read() == b"test"
|
|
235
|
+
|
|
236
|
+
assert os.listdir(d2) == ["f1"]
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
@pytest.mark.xfail(reason="see issue #334", strict=True)
|
|
240
|
+
def test_multilevel_chained_fs():
|
|
241
|
+
"""This test reproduces fsspec/filesystem_spec#334"""
|
|
242
|
+
import zipfile
|
|
243
|
+
|
|
244
|
+
d1 = tempfile.mkdtemp()
|
|
245
|
+
f1 = os.path.join(d1, "f1.zip")
|
|
246
|
+
with zipfile.ZipFile(f1, mode="w") as z:
|
|
247
|
+
# filename, content
|
|
248
|
+
z.writestr("foo.txt", "foo.txt")
|
|
249
|
+
z.writestr("bar.txt", "bar.txt")
|
|
250
|
+
|
|
251
|
+
# We expected this to be the correct syntax
|
|
252
|
+
with pytest.raises(IsADirectoryError):
|
|
253
|
+
of = fsspec.open_files(f"zip://*.txt::simplecache::file://{f1}")
|
|
254
|
+
assert len(of) == 2
|
|
255
|
+
|
|
256
|
+
# But this is what is actually valid...
|
|
257
|
+
of = fsspec.open_files(f"zip://*.txt::simplecache://{f1}::file://")
|
|
258
|
+
|
|
259
|
+
assert len(of) == 2
|
|
260
|
+
for open_file in of:
|
|
261
|
+
with open_file as f:
|
|
262
|
+
assert f.read().decode("utf-8") == f.name
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
def test_multilevel_chained_fs_zip_zip_file():
|
|
266
|
+
"""This test reproduces fsspec/filesystem_spec#334"""
|
|
267
|
+
import zipfile
|
|
268
|
+
|
|
269
|
+
d1 = tempfile.mkdtemp()
|
|
270
|
+
f1 = os.path.join(d1, "f1.zip")
|
|
271
|
+
f2 = os.path.join(d1, "f2.zip")
|
|
272
|
+
with zipfile.ZipFile(f1, mode="w") as z:
|
|
273
|
+
# filename, content
|
|
274
|
+
z.writestr("foo.txt", "foo.txt")
|
|
275
|
+
z.writestr("bar.txt", "bar.txt")
|
|
276
|
+
|
|
277
|
+
with zipfile.ZipFile(f2, mode="w") as z:
|
|
278
|
+
with open(f1, "rb") as f:
|
|
279
|
+
z.writestr("f1.zip", f.read())
|
|
280
|
+
|
|
281
|
+
# We expected this to be the correct syntax
|
|
282
|
+
of = fsspec.open_files(f"zip://*.txt::zip://f1.zip::file://{f2}")
|
|
283
|
+
|
|
284
|
+
assert len(of) == 2
|
|
285
|
+
for open_file in of:
|
|
286
|
+
with open_file as f:
|
|
287
|
+
assert f.read().decode("utf-8") == f.name
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
def test_chained_equivalent():
|
|
291
|
+
d1 = tempfile.mkdtemp()
|
|
292
|
+
d2 = tempfile.mkdtemp()
|
|
293
|
+
f1 = os.path.join(d1, "f1")
|
|
294
|
+
with open(f1, "wb") as f:
|
|
295
|
+
f.write(b"test1")
|
|
296
|
+
|
|
297
|
+
of = fsspec.open(
|
|
298
|
+
f"simplecache::file://{f1}",
|
|
299
|
+
simplecache={"cache_storage": d2, "same_names": True},
|
|
300
|
+
)
|
|
301
|
+
of2 = fsspec.open(
|
|
302
|
+
f"simplecache://{f1}",
|
|
303
|
+
cache_storage=d2,
|
|
304
|
+
same_names=True,
|
|
305
|
+
target_protocol="file",
|
|
306
|
+
target_options={},
|
|
307
|
+
)
|
|
308
|
+
# the following line passes by fluke - they are not quite the same instance,
|
|
309
|
+
# since the parameters don't quite match. Also, the url understood by the two
|
|
310
|
+
# of s are not the same (path gets munged a bit differently)
|
|
311
|
+
assert of.fs == of2.fs
|
|
312
|
+
assert hash(of.fs) == hash(of2.fs)
|
|
313
|
+
assert of.open().read() == of2.open().read()
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
def test_chained_fs_multi():
|
|
317
|
+
d1 = tempfile.mkdtemp()
|
|
318
|
+
d2 = tempfile.mkdtemp()
|
|
319
|
+
f1 = os.path.join(d1, "f1")
|
|
320
|
+
f2 = os.path.join(d1, "f2")
|
|
321
|
+
with open(f1, "wb") as f:
|
|
322
|
+
f.write(b"test1")
|
|
323
|
+
with open(f2, "wb") as f:
|
|
324
|
+
f.write(b"test2")
|
|
325
|
+
|
|
326
|
+
of = fsspec.open_files(
|
|
327
|
+
f"simplecache::file://{d1}/*",
|
|
328
|
+
simplecache={"cache_storage": d2, "same_names": True},
|
|
329
|
+
)
|
|
330
|
+
with of[0] as f:
|
|
331
|
+
assert f.read() == b"test1"
|
|
332
|
+
with of[1] as f:
|
|
333
|
+
assert f.read() == b"test2"
|
|
334
|
+
|
|
335
|
+
assert sorted(os.listdir(d2)) == ["f1", "f2"]
|
|
336
|
+
|
|
337
|
+
d2 = tempfile.mkdtemp()
|
|
338
|
+
|
|
339
|
+
of = fsspec.open_files(
|
|
340
|
+
[f"simplecache::file://{f1}", f"simplecache::file://{f2}"],
|
|
341
|
+
simplecache={"cache_storage": d2, "same_names": True},
|
|
342
|
+
)
|
|
343
|
+
with of[0] as f:
|
|
344
|
+
assert f.read() == b"test1"
|
|
345
|
+
with of[1] as f:
|
|
346
|
+
assert f.read() == b"test2"
|
|
347
|
+
|
|
348
|
+
assert sorted(os.listdir(d2)) == ["f1", "f2"]
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
def test_chained_fo():
|
|
352
|
+
import zipfile
|
|
353
|
+
|
|
354
|
+
d1 = tempfile.mkdtemp()
|
|
355
|
+
f1 = os.path.join(d1, "temp.zip")
|
|
356
|
+
d3 = tempfile.mkdtemp()
|
|
357
|
+
with zipfile.ZipFile(f1, mode="w") as z:
|
|
358
|
+
z.writestr("afile", b"test")
|
|
359
|
+
|
|
360
|
+
of = fsspec.open(f"zip://afile::file://{f1}")
|
|
361
|
+
with of as f:
|
|
362
|
+
assert f.read() == b"test"
|
|
363
|
+
|
|
364
|
+
of = fsspec.open_files(f"zip://*::file://{f1}")
|
|
365
|
+
with of[0] as f:
|
|
366
|
+
assert f.read() == b"test"
|
|
367
|
+
|
|
368
|
+
of = fsspec.open_files(
|
|
369
|
+
f"simplecache::zip://*::file://{f1}",
|
|
370
|
+
simplecache={"cache_storage": d3, "same_names": True},
|
|
371
|
+
)
|
|
372
|
+
with of[0] as f:
|
|
373
|
+
assert f.read() == b"test"
|
|
374
|
+
assert "afile" in os.listdir(d3)
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
def test_url_to_fs():
|
|
378
|
+
url = "memory://a.txt"
|
|
379
|
+
fs, url2 = fsspec.core.url_to_fs(url)
|
|
380
|
+
|
|
381
|
+
assert isinstance(fs, MemoryFileSystem)
|
|
382
|
+
assert url2 == "/a.txt"
|
|
383
|
+
|
|
384
|
+
|
|
385
|
+
def test_walk(m):
|
|
386
|
+
# depth = 0
|
|
387
|
+
dir1 = "/dir1"
|
|
388
|
+
# depth = 1 (2 dirs, 1 file)
|
|
389
|
+
dir11 = dir1 + "/dir11"
|
|
390
|
+
dir12 = dir1 + "/dir12"
|
|
391
|
+
file11 = dir1 + "/file11"
|
|
392
|
+
# depth = 2
|
|
393
|
+
dir111 = dir11 + "/dir111"
|
|
394
|
+
file111 = dir11 + "/file111"
|
|
395
|
+
file121 = dir12 + "/file121"
|
|
396
|
+
# depth = 3
|
|
397
|
+
file1111 = dir111 + "/file1111"
|
|
398
|
+
|
|
399
|
+
m.mkdir(dir111) # Creates parents too
|
|
400
|
+
m.mkdir(dir12) # Creates parents too
|
|
401
|
+
m.touch(file11)
|
|
402
|
+
m.touch(file111)
|
|
403
|
+
m.touch(file121)
|
|
404
|
+
m.touch(file1111)
|
|
405
|
+
|
|
406
|
+
# No maxdepth
|
|
407
|
+
assert list(m.walk(dir1, topdown=True)) == [
|
|
408
|
+
(dir1, ["dir11", "dir12"], ["file11"]),
|
|
409
|
+
(dir11, ["dir111"], ["file111"]),
|
|
410
|
+
(dir111, [], ["file1111"]),
|
|
411
|
+
(dir12, [], ["file121"]),
|
|
412
|
+
]
|
|
413
|
+
assert list(m.walk(dir1, topdown=False)) == [
|
|
414
|
+
(dir111, [], ["file1111"]),
|
|
415
|
+
(dir11, ["dir111"], ["file111"]),
|
|
416
|
+
(dir12, [], ["file121"]),
|
|
417
|
+
(dir1, ["dir11", "dir12"], ["file11"]),
|
|
418
|
+
]
|
|
419
|
+
|
|
420
|
+
# maxdepth=2
|
|
421
|
+
assert list(m.walk(dir1, maxdepth=2, topdown=True)) == [
|
|
422
|
+
(dir1, ["dir11", "dir12"], ["file11"]),
|
|
423
|
+
(dir11, ["dir111"], ["file111"]),
|
|
424
|
+
(dir12, [], ["file121"]),
|
|
425
|
+
]
|
|
426
|
+
assert list(m.walk(dir1, maxdepth=2, topdown=False)) == [
|
|
427
|
+
(dir11, ["dir111"], ["file111"]),
|
|
428
|
+
(dir12, [], ["file121"]),
|
|
429
|
+
(dir1, ["dir11", "dir12"], ["file11"]),
|
|
430
|
+
]
|
|
431
|
+
|
|
432
|
+
# maxdepth=1
|
|
433
|
+
assert list(m.walk(dir1, maxdepth=1, topdown=True)) == [
|
|
434
|
+
(dir1, ["dir11", "dir12"], ["file11"]),
|
|
435
|
+
]
|
|
436
|
+
assert list(m.walk(dir1, maxdepth=1, topdown=False)) == [
|
|
437
|
+
(dir1, ["dir11", "dir12"], ["file11"]),
|
|
438
|
+
]
|
|
439
|
+
|
|
440
|
+
# maxdepth=0
|
|
441
|
+
with pytest.raises(ValueError):
|
|
442
|
+
list(m.walk(dir1, maxdepth=0, topdown=True))
|
|
443
|
+
with pytest.raises(ValueError):
|
|
444
|
+
list(m.walk(dir1, maxdepth=0, topdown=False))
|
|
445
|
+
|
|
446
|
+
# prune dir111
|
|
447
|
+
def _walk(*args, **kwargs):
|
|
448
|
+
for path, dirs, files in m.walk(*args, **kwargs):
|
|
449
|
+
yield (path, dirs.copy(), files)
|
|
450
|
+
if "dir111" in dirs:
|
|
451
|
+
dirs.remove("dir111")
|
|
452
|
+
|
|
453
|
+
assert list(_walk(dir1, topdown=True)) == [
|
|
454
|
+
(dir1, ["dir11", "dir12"], ["file11"]),
|
|
455
|
+
(dir11, ["dir111"], ["file111"]),
|
|
456
|
+
(dir12, [], ["file121"]),
|
|
457
|
+
]
|
|
458
|
+
assert list(_walk(dir1, topdown=False)) == [
|
|
459
|
+
(dir111, [], ["file1111"]),
|
|
460
|
+
(dir11, ["dir111"], ["file111"]),
|
|
461
|
+
(dir12, [], ["file121"]),
|
|
462
|
+
(dir1, ["dir11", "dir12"], ["file11"]),
|
|
463
|
+
]
|
|
464
|
+
|
|
465
|
+
# reverse dirs order
|
|
466
|
+
def _walk(*args, **kwargs):
|
|
467
|
+
for path, dirs, files in m.walk(*args, **kwargs):
|
|
468
|
+
yield (path, dirs.copy(), files)
|
|
469
|
+
dirs.reverse()
|
|
470
|
+
|
|
471
|
+
assert list(_walk(dir1, topdown=True)) == [
|
|
472
|
+
(dir1, ["dir11", "dir12"], ["file11"]),
|
|
473
|
+
# Here dir12 comes before dir11
|
|
474
|
+
(dir12, [], ["file121"]),
|
|
475
|
+
(dir11, ["dir111"], ["file111"]),
|
|
476
|
+
(dir111, [], ["file1111"]),
|
|
477
|
+
]
|
|
478
|
+
assert list(_walk(dir1, topdown=False)) == [
|
|
479
|
+
(dir111, [], ["file1111"]),
|
|
480
|
+
(dir11, ["dir111"], ["file111"]),
|
|
481
|
+
(dir12, [], ["file121"]),
|
|
482
|
+
(dir1, ["dir11", "dir12"], ["file11"]),
|
|
483
|
+
]
|
|
484
|
+
|
|
485
|
+
# on_error omit by default
|
|
486
|
+
assert list(m.walk("do_not_exist")) == []
|
|
487
|
+
# on_error omit
|
|
488
|
+
assert list(m.walk("do_not_exist", on_error="omit")) == []
|
|
489
|
+
# on_error raise
|
|
490
|
+
with pytest.raises(FileNotFoundError):
|
|
491
|
+
list(m.walk("do_not_exist", on_error="raise"))
|
|
492
|
+
# on_error callable function
|
|
493
|
+
mock = Mock()
|
|
494
|
+
assert list(m.walk("do_not_exist", on_error=mock.onerror)) == []
|
|
495
|
+
mock.onerror.assert_called()
|
|
496
|
+
assert mock.onerror.call_args.kwargs == {}
|
|
497
|
+
assert len(mock.onerror.call_args.args) == 1
|
|
498
|
+
assert isinstance(mock.onerror.call_args.args[0], FileNotFoundError)
|