fsspec 2024.3.1__py3-none-any.whl → 2024.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. fsspec/__init__.py +2 -3
  2. fsspec/_version.py +14 -19
  3. fsspec/caching.py +83 -14
  4. fsspec/compression.py +1 -0
  5. fsspec/core.py +32 -8
  6. fsspec/exceptions.py +1 -0
  7. fsspec/generic.py +1 -1
  8. fsspec/gui.py +1 -1
  9. fsspec/implementations/arrow.py +0 -2
  10. fsspec/implementations/cache_mapper.py +1 -2
  11. fsspec/implementations/cache_metadata.py +7 -7
  12. fsspec/implementations/dirfs.py +2 -2
  13. fsspec/implementations/http.py +9 -9
  14. fsspec/implementations/local.py +78 -45
  15. fsspec/implementations/memory.py +9 -0
  16. fsspec/implementations/smb.py +3 -1
  17. fsspec/implementations/tests/__init__.py +0 -0
  18. fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_file_listing.yaml +112 -0
  19. fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_mkdir.yaml +582 -0
  20. fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_read_pyarrow_non_partitioned.yaml +873 -0
  21. fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_read_range.yaml +458 -0
  22. fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_read_range_chunked.yaml +1355 -0
  23. fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_write_and_read.yaml +795 -0
  24. fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_write_pyarrow_non_partitioned.yaml +613 -0
  25. fsspec/implementations/tests/conftest.py +39 -0
  26. fsspec/implementations/tests/local/__init__.py +0 -0
  27. fsspec/implementations/tests/local/local_fixtures.py +18 -0
  28. fsspec/implementations/tests/local/local_test.py +14 -0
  29. fsspec/implementations/tests/memory/__init__.py +0 -0
  30. fsspec/implementations/tests/memory/memory_fixtures.py +27 -0
  31. fsspec/implementations/tests/memory/memory_test.py +14 -0
  32. fsspec/implementations/tests/out.zip +0 -0
  33. fsspec/implementations/tests/test_archive.py +382 -0
  34. fsspec/implementations/tests/test_arrow.py +259 -0
  35. fsspec/implementations/tests/test_cached.py +1306 -0
  36. fsspec/implementations/tests/test_common.py +35 -0
  37. fsspec/implementations/tests/test_dask.py +29 -0
  38. fsspec/implementations/tests/test_data.py +20 -0
  39. fsspec/implementations/tests/test_dbfs.py +268 -0
  40. fsspec/implementations/tests/test_dirfs.py +588 -0
  41. fsspec/implementations/tests/test_ftp.py +178 -0
  42. fsspec/implementations/tests/test_git.py +76 -0
  43. fsspec/implementations/tests/test_http.py +577 -0
  44. fsspec/implementations/tests/test_jupyter.py +57 -0
  45. fsspec/implementations/tests/test_libarchive.py +33 -0
  46. fsspec/implementations/tests/test_local.py +1285 -0
  47. fsspec/implementations/tests/test_memory.py +382 -0
  48. fsspec/implementations/tests/test_reference.py +720 -0
  49. fsspec/implementations/tests/test_sftp.py +233 -0
  50. fsspec/implementations/tests/test_smb.py +139 -0
  51. fsspec/implementations/tests/test_tar.py +243 -0
  52. fsspec/implementations/tests/test_webhdfs.py +197 -0
  53. fsspec/implementations/tests/test_zip.py +134 -0
  54. fsspec/implementations/webhdfs.py +1 -3
  55. fsspec/parquet.py +0 -8
  56. fsspec/registry.py +4 -0
  57. fsspec/spec.py +21 -4
  58. fsspec/tests/__init__.py +0 -0
  59. fsspec/tests/abstract/mv.py +57 -0
  60. fsspec/tests/conftest.py +188 -0
  61. fsspec/tests/data/listing.html +1 -0
  62. fsspec/tests/test_api.py +498 -0
  63. fsspec/tests/test_async.py +230 -0
  64. fsspec/tests/test_caches.py +255 -0
  65. fsspec/tests/test_callbacks.py +89 -0
  66. fsspec/tests/test_compression.py +164 -0
  67. fsspec/tests/test_config.py +129 -0
  68. fsspec/tests/test_core.py +466 -0
  69. fsspec/tests/test_downstream.py +40 -0
  70. fsspec/tests/test_file.py +200 -0
  71. fsspec/tests/test_fuse.py +147 -0
  72. fsspec/tests/test_generic.py +90 -0
  73. fsspec/tests/test_gui.py +23 -0
  74. fsspec/tests/test_mapping.py +228 -0
  75. fsspec/tests/test_parquet.py +140 -0
  76. fsspec/tests/test_registry.py +134 -0
  77. fsspec/tests/test_spec.py +1167 -0
  78. fsspec/tests/test_utils.py +478 -0
  79. fsspec/utils.py +0 -2
  80. fsspec-2024.5.0.dist-info/METADATA +273 -0
  81. fsspec-2024.5.0.dist-info/RECORD +111 -0
  82. {fsspec-2024.3.1.dist-info → fsspec-2024.5.0.dist-info}/WHEEL +1 -2
  83. fsspec-2024.3.1.dist-info/METADATA +0 -167
  84. fsspec-2024.3.1.dist-info/RECORD +0 -54
  85. fsspec-2024.3.1.dist-info/top_level.txt +0 -1
  86. {fsspec-2024.3.1.dist-info → fsspec-2024.5.0.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,188 @@
1
+ import contextlib
2
+ import gzip
3
+ import json
4
+ import os
5
+ import threading
6
+ from collections import ChainMap
7
+ from http.server import BaseHTTPRequestHandler, HTTPServer
8
+
9
+ import pytest
10
+
11
+ requests = pytest.importorskip("requests")
12
+ port = 9898
13
+ data = b"\n".join([b"some test data"] * 1000)
14
+ realfile = f"http://127.0.0.1:{port}/index/realfile"
15
+ index = b'<a href="%s">Link</a>' % realfile.encode()
16
+ listing = open(
17
+ os.path.join(os.path.dirname(__file__), "data", "listing.html"), "rb"
18
+ ).read()
19
+ win = os.name == "nt"
20
+
21
+
22
+ def _make_listing(*paths):
23
+ return "\n".join(
24
+ f'<a href="http://127.0.0.1:{port}{f}">Link_{i}</a>'
25
+ for i, f in enumerate(paths)
26
+ ).encode()
27
+
28
+
29
+ @pytest.fixture
30
+ def reset_files():
31
+ yield
32
+
33
+ # Reset the newly added files after the
34
+ # test is completed.
35
+ HTTPTestHandler.dynamic_files.clear()
36
+
37
+
38
+ class HTTPTestHandler(BaseHTTPRequestHandler):
39
+ static_files = {
40
+ "/index/realfile": data,
41
+ "/index/otherfile": data,
42
+ "/index": index,
43
+ "/data/20020401": listing,
44
+ "/simple/": _make_listing("/simple/file", "/simple/dir/"),
45
+ "/simple/file": data,
46
+ "/simple/dir/": _make_listing("/simple/dir/file"),
47
+ "/simple/dir/file": data,
48
+ }
49
+ dynamic_files = {}
50
+
51
+ files = ChainMap(dynamic_files, static_files)
52
+
53
+ def __init__(self, *args, **kwargs):
54
+ super().__init__(*args, **kwargs)
55
+
56
+ def _respond(self, code=200, headers=None, data=b""):
57
+ headers = headers or {}
58
+ headers.update({"User-Agent": "test"})
59
+ self.send_response(code)
60
+ for k, v in headers.items():
61
+ self.send_header(k, str(v))
62
+ self.end_headers()
63
+ if data:
64
+ self.wfile.write(data)
65
+
66
+ def do_GET(self):
67
+ file_path = self.path
68
+ if file_path.endswith("/") and file_path.rstrip("/") in self.files:
69
+ file_path = file_path.rstrip("/")
70
+ file_data = self.files.get(file_path)
71
+ if "give_path" in self.headers:
72
+ return self._respond(200, data=json.dumps({"path": self.path}).encode())
73
+ if "redirect" in self.headers and file_path != "/index/realfile":
74
+ new_url = f"http://127.0.0.1:{port}/index/realfile"
75
+ return self._respond(301, {"Location": new_url})
76
+ if file_data is None:
77
+ return self._respond(404)
78
+
79
+ status = 200
80
+ content_range = f"bytes 0-{len(file_data) - 1}/{len(file_data)}"
81
+ if ("Range" in self.headers) and ("ignore_range" not in self.headers):
82
+ ran = self.headers["Range"]
83
+ b, ran = ran.split("=")
84
+ start, end = ran.split("-")
85
+ if start:
86
+ content_range = f"bytes {start}-{end}/{len(file_data)}"
87
+ file_data = file_data[int(start) : (int(end) + 1) if end else None]
88
+ else:
89
+ # suffix only
90
+ l = len(file_data)
91
+ content_range = f"bytes {l - int(end)}-{l - 1}/{l}"
92
+ file_data = file_data[-int(end) :]
93
+ if "use_206" in self.headers:
94
+ status = 206
95
+ if "give_length" in self.headers:
96
+ if "gzip_encoding" in self.headers:
97
+ file_data = gzip.compress(file_data)
98
+ response_headers = {
99
+ "Content-Length": len(file_data),
100
+ "Content-Encoding": "gzip",
101
+ }
102
+ else:
103
+ response_headers = {"Content-Length": len(file_data)}
104
+ self._respond(status, response_headers, file_data)
105
+ elif "give_range" in self.headers:
106
+ self._respond(status, {"Content-Range": content_range}, file_data)
107
+ elif "give_mimetype" in self.headers:
108
+ self._respond(
109
+ status, {"Content-Type": "text/html; charset=utf-8"}, file_data
110
+ )
111
+ else:
112
+ self._respond(status, data=file_data)
113
+
114
+ def do_POST(self):
115
+ length = self.headers.get("Content-Length")
116
+ file_path = self.path.rstrip("/")
117
+ if length is None:
118
+ assert self.headers.get("Transfer-Encoding") == "chunked"
119
+ self.files[file_path] = b"".join(self.read_chunks())
120
+ else:
121
+ self.files[file_path] = self.rfile.read(length)
122
+ self._respond(200)
123
+
124
+ do_PUT = do_POST
125
+
126
+ def read_chunks(self):
127
+ length = -1
128
+ while length != 0:
129
+ line = self.rfile.readline().strip()
130
+ if len(line) == 0:
131
+ length = 0
132
+ else:
133
+ length = int(line, 16)
134
+ yield self.rfile.read(length)
135
+ self.rfile.readline()
136
+
137
+ def do_HEAD(self):
138
+ if "head_not_auth" in self.headers:
139
+ return self._respond(
140
+ 403, {"Content-Length": 123}, b"not authorized for HEAD request"
141
+ )
142
+ elif "head_ok" not in self.headers:
143
+ return self._respond(405)
144
+
145
+ file_path = self.path.rstrip("/")
146
+ file_data = self.files.get(file_path)
147
+ if file_data is None:
148
+ return self._respond(404)
149
+
150
+ if ("give_length" in self.headers) or ("head_give_length" in self.headers):
151
+ response_headers = {"Content-Length": len(file_data)}
152
+ if "zero_length" in self.headers:
153
+ response_headers["Content-Length"] = 0
154
+ elif "gzip_encoding" in self.headers:
155
+ file_data = gzip.compress(file_data)
156
+ response_headers["Content-Encoding"] = "gzip"
157
+ response_headers["Content-Length"] = len(file_data)
158
+
159
+ self._respond(200, response_headers)
160
+ elif "give_range" in self.headers:
161
+ self._respond(
162
+ 200, {"Content-Range": f"0-{len(file_data) - 1}/{len(file_data)}"}
163
+ )
164
+ elif "give_etag" in self.headers:
165
+ self._respond(200, {"ETag": "xxx"})
166
+ else:
167
+ self._respond(200) # OK response, but no useful info
168
+
169
+
170
+ @contextlib.contextmanager
171
+ def serve():
172
+ server_address = ("", port)
173
+ httpd = HTTPServer(server_address, HTTPTestHandler)
174
+ th = threading.Thread(target=httpd.serve_forever)
175
+ th.daemon = True
176
+ th.start()
177
+ try:
178
+ yield f"http://127.0.0.1:{port}"
179
+ finally:
180
+ httpd.socket.close()
181
+ httpd.shutdown()
182
+ th.join()
183
+
184
+
185
+ @pytest.fixture(scope="module")
186
+ def server():
187
+ with serve() as s:
188
+ yield s
@@ -0,0 +1 @@
1
+ \n<html><head><title>nasagrace.unl.edu - /data/20020401/</title></head><body><H1>nasagrace.unl.edu - /data/20020401/</H1><hr>\n\n<pre><A HREF="/data/">[To Parent Directory]</A><br><br> 1/27/2020 9:54 AM 1194073 <A HREF="/data/20020401/GRACE_GWS_20020401.pdf">GRACE_GWS_20020401.pdf</A><br> 1/27/2020 9:54 AM 380043 <A HREF="/data/20020401/GRACE_GWS_20020401.png">GRACE_GWS_20020401.png</A><br> 1/27/2020 9:54 AM 1192987 <A HREF="/data/20020401/GRACE_RTZSM_20020401.pdf">GRACE_RTZSM_20020401.pdf</A><br> 1/27/2020 9:54 AM 384342 <A HREF="/data/20020401/GRACE_RTZSM_20020401.png">GRACE_RTZSM_20020401.png</A><br> 1/27/2020 9:55 AM 1202046 <A HREF="/data/20020401/GRACE_SFSM_20020401.pdf">GRACE_SFSM_20020401.pdf</A><br> 1/27/2020 9:55 AM 387932 <A HREF="/data/20020401/GRACE_SFSM_20020401.png">GRACE_SFSM_20020401.png</A><br> 1/27/2020 9:54 AM 4975980 <A HREF="/data/20020401/GRACEDADM_CLSM0125US_7D.A20020401.030.nc4">GRACEDADM_CLSM0125US_7D.A20020401.030.nc4</A><br> 1/27/2020 9:54 AM 345640 <A HREF="/data/20020401/gws_perc_0125deg_US_20020401.img">gws_perc_0125deg_US_20020401.img</A><br> 1/27/2020 9:54 AM 2272 <A HREF="/data/20020401/gws_perc_0125deg_US_20020401.img.aux.xml">gws_perc_0125deg_US_20020401.img.aux.xml</A><br> 1/27/2020 9:54 AM 5678 <A HREF="/data/20020401/gws_perc_0125deg_US_20020401.img.xml">gws_perc_0125deg_US_20020401.img.xml</A><br> 1/27/2020 9:54 AM 136081 <A HREF="/data/20020401/gws_perc_0125deg_US_20020401.rrd">gws_perc_0125deg_US_20020401.rrd</A><br> 1/27/2020 9:54 AM 83
@@ -0,0 +1,498 @@
1
+ """Tests the spec, using memoryfs"""
2
+
3
+ import contextlib
4
+ import os
5
+ import pickle
6
+ import tempfile
7
+ from unittest.mock import Mock
8
+
9
+ import pytest
10
+
11
+ import fsspec
12
+ from fsspec.implementations.memory import MemoryFile, MemoryFileSystem
13
+
14
+
15
+ def test_idempotent():
16
+ MemoryFileSystem.clear_instance_cache()
17
+ fs = MemoryFileSystem()
18
+ fs2 = MemoryFileSystem()
19
+ assert fs is fs2
20
+ assert MemoryFileSystem.current() is fs2
21
+
22
+ MemoryFileSystem.clear_instance_cache()
23
+ assert not MemoryFileSystem._cache
24
+
25
+ fs2 = MemoryFileSystem().current()
26
+ assert fs == fs2
27
+
28
+
29
+ def test_pickle():
30
+ fs = MemoryFileSystem()
31
+ fs2 = pickle.loads(pickle.dumps(fs))
32
+ assert fs == fs2
33
+
34
+
35
+ def test_class_methods():
36
+ assert MemoryFileSystem._strip_protocol("memory://stuff") == "/stuff"
37
+ assert MemoryFileSystem._strip_protocol("stuff") == "/stuff"
38
+ assert MemoryFileSystem._strip_protocol("other://stuff") == "other://stuff"
39
+
40
+ assert MemoryFileSystem._get_kwargs_from_urls("memory://user@thing") == {}
41
+
42
+
43
+ def test_multi(m):
44
+ m.pipe("/afile", b"data")
45
+ fs, token, paths = fsspec.core.get_fs_token_paths(["/afile", "/afile"])
46
+ assert len(paths) == 2
47
+
48
+
49
+ def test_get_put(tmpdir, m):
50
+ tmpdir = str(tmpdir)
51
+ fn = os.path.join(tmpdir, "one")
52
+ open(fn, "wb").write(b"one")
53
+ os.mkdir(os.path.join(tmpdir, "dir"))
54
+ fn2 = os.path.join(tmpdir, "dir", "two")
55
+ open(fn2, "wb").write(b"two")
56
+
57
+ fs = MemoryFileSystem()
58
+ fs.put(fn, "/afile")
59
+ assert fs.cat("/afile") == b"one"
60
+
61
+ fs.store["/bfile"] = MemoryFile(fs, "/bfile", b"data")
62
+ fn3 = os.path.join(tmpdir, "three")
63
+ fs.get("/bfile", fn3)
64
+ assert open(fn3, "rb").read() == b"data"
65
+
66
+ fs.put(tmpdir, "/more", recursive=True)
67
+ assert fs.find("/more") == ["/more/dir/two", "/more/one", "/more/three"]
68
+
69
+ @contextlib.contextmanager
70
+ def tmp_chdir(path):
71
+ curdir = os.getcwd()
72
+ os.chdir(path)
73
+ try:
74
+ yield
75
+ finally:
76
+ os.chdir(curdir)
77
+
78
+ with tmp_chdir(os.path.join(tmpdir, os.path.pardir)):
79
+ fs.put(os.path.basename(tmpdir), "/moretwo", recursive=True)
80
+ assert fs.find("/moretwo") == [
81
+ "/moretwo/dir/two",
82
+ "/moretwo/one",
83
+ "/moretwo/three",
84
+ ]
85
+
86
+ with tmp_chdir(tmpdir):
87
+ fs.put(os.path.curdir, "/morethree", recursive=True)
88
+ assert fs.find("/morethree") == [
89
+ "/morethree/dir/two",
90
+ "/morethree/one",
91
+ "/morethree/three",
92
+ ]
93
+
94
+ for f in [fn, fn2, fn3]:
95
+ os.remove(f)
96
+ os.rmdir(os.path.join(tmpdir, "dir"))
97
+
98
+ fs.get("/more/", tmpdir + "/", recursive=True)
99
+ assert open(fn3, "rb").read() == b"data"
100
+ assert open(fn, "rb").read() == b"one"
101
+
102
+
103
+ def test_du(m):
104
+ fs = MemoryFileSystem()
105
+ fs.store.update(
106
+ {
107
+ "/dir/afile": MemoryFile(fs, "/afile", b"a"),
108
+ "/dir/dirb/afile": MemoryFile(fs, "/afile", b"bb"),
109
+ "/dir/dirb/bfile": MemoryFile(fs, "/afile", b"ccc"),
110
+ }
111
+ )
112
+ assert fs.du("/dir") == 6
113
+ assert fs.du("/dir", total=False) == {
114
+ "/dir/afile": 1,
115
+ "/dir/dirb/afile": 2,
116
+ "/dir/dirb/bfile": 3,
117
+ }
118
+ assert fs.du("/dir", withdirs=True) == 6
119
+ assert fs.du("/dir", total=False, withdirs=True) == {
120
+ "/dir": 0,
121
+ "/dir/afile": 1,
122
+ "/dir/dirb": 0,
123
+ "/dir/dirb/afile": 2,
124
+ "/dir/dirb/bfile": 3,
125
+ }
126
+ with pytest.raises(ValueError):
127
+ assert fs.du("/dir", maxdepth=0) == 1
128
+ assert fs.du("/dir", total=False, withdirs=True, maxdepth=1) == {
129
+ "/dir": 0,
130
+ "/dir/afile": 1,
131
+ "/dir/dirb": 0,
132
+ }
133
+
134
+ # Size of file only.
135
+ assert fs.du("/dir/afile") == 1
136
+ assert fs.du("/dir/afile", withdirs=True) == 1
137
+
138
+
139
+ def test_head_tail(m):
140
+ fs = MemoryFileSystem()
141
+ with fs.open("/myfile", "wb") as f:
142
+ f.write(b"I had a nice big cabbage")
143
+ assert fs.head("/myfile", 5) == b"I had"
144
+ assert fs.tail("/myfile", 7) == b"cabbage"
145
+
146
+
147
+ def test_move(m):
148
+ fs = MemoryFileSystem()
149
+ with fs.open("/myfile", "wb") as f:
150
+ f.write(b"I had a nice big cabbage")
151
+ fs.move("/myfile", "/otherfile")
152
+ assert not fs.exists("/myfile")
153
+ assert fs.info("/otherfile")
154
+ assert isinstance(fs.ukey("/otherfile"), str)
155
+
156
+
157
+ def test_recursive_get_put(tmpdir, m):
158
+ fs = MemoryFileSystem()
159
+ os.makedirs(f"{tmpdir}/nest")
160
+ for file in ["one", "two", "nest/other"]:
161
+ with open(f"{tmpdir}/{file}", "wb") as f:
162
+ f.write(b"data")
163
+
164
+ fs.put(str(tmpdir), "test", recursive=True)
165
+
166
+ # get to directory with slash
167
+ d = tempfile.mkdtemp()
168
+ fs.get("test/", d, recursive=True)
169
+ for file in ["one", "two", "nest/other"]:
170
+ with open(f"{d}/{file}", "rb") as f:
171
+ f.read() == b"data"
172
+
173
+ # get to directory without slash
174
+ d = tempfile.mkdtemp()
175
+ fs.get("test", d, recursive=True)
176
+ for file in ["test/one", "test/two", "test/nest/other"]:
177
+ with open(f"{d}/{file}", "rb") as f:
178
+ f.read() == b"data"
179
+
180
+
181
+ def test_pipe_cat(m):
182
+ fs = MemoryFileSystem()
183
+ fs.pipe("afile", b"contents")
184
+ assert fs.cat("afile") == b"contents"
185
+
186
+ data = {"/bfile": b"more", "/cfile": b"stuff"}
187
+ fs.pipe(data)
188
+ assert fs.cat(list(data)) == data
189
+
190
+
191
+ def test_read_block_delimiter(m):
192
+ fs = MemoryFileSystem()
193
+ with fs.open("/myfile", "wb") as f:
194
+ f.write(b"some\nlines\nof\ntext")
195
+ assert fs.read_block("/myfile", 0, 2, b"\n") == b"some\n"
196
+ assert fs.read_block("/myfile", 2, 6, b"\n") == b"lines\n"
197
+ assert fs.read_block("/myfile", 6, 2, b"\n") == b""
198
+ assert fs.read_block("/myfile", 2, 9, b"\n") == b"lines\nof\n"
199
+ assert fs.read_block("/myfile", 12, 6, b"\n") == b"text"
200
+ assert fs.read_block("/myfile", 0, None) == fs.cat("/myfile")
201
+
202
+
203
+ def test_open_text(m):
204
+ fs = MemoryFileSystem()
205
+ with fs.open("/myfile", "wb") as f:
206
+ f.write(b"some\nlines\nof\ntext")
207
+ f = fs.open("/myfile", "r", encoding="latin1")
208
+ assert f.encoding == "latin1"
209
+
210
+
211
+ def test_read_text(m):
212
+ with m.open("/myfile", "w", encoding="utf-8") as f:
213
+ f.write("some\nlines\nof\ntext")
214
+ assert m.read_text("/myfile", encoding="utf-8") == "some\nlines\nof\ntext"
215
+
216
+
217
+ def test_write_text(m):
218
+ m.write_text("/myfile", "some\nlines\nof\ntext", encoding="utf-8")
219
+ assert m.read_text("/myfile", encoding="utf-8") == "some\nlines\nof\ntext"
220
+
221
+
222
+ def test_chained_fs():
223
+ d1 = tempfile.mkdtemp()
224
+ d2 = tempfile.mkdtemp()
225
+ f1 = os.path.join(d1, "f1")
226
+ with open(f1, "wb") as f:
227
+ f.write(b"test")
228
+
229
+ of = fsspec.open(
230
+ f"simplecache::file://{f1}",
231
+ simplecache={"cache_storage": d2, "same_names": True},
232
+ )
233
+ with of as f:
234
+ assert f.read() == b"test"
235
+
236
+ assert os.listdir(d2) == ["f1"]
237
+
238
+
239
+ @pytest.mark.xfail(reason="see issue #334", strict=True)
240
+ def test_multilevel_chained_fs():
241
+ """This test reproduces fsspec/filesystem_spec#334"""
242
+ import zipfile
243
+
244
+ d1 = tempfile.mkdtemp()
245
+ f1 = os.path.join(d1, "f1.zip")
246
+ with zipfile.ZipFile(f1, mode="w") as z:
247
+ # filename, content
248
+ z.writestr("foo.txt", "foo.txt")
249
+ z.writestr("bar.txt", "bar.txt")
250
+
251
+ # We expected this to be the correct syntax
252
+ with pytest.raises(IsADirectoryError):
253
+ of = fsspec.open_files(f"zip://*.txt::simplecache::file://{f1}")
254
+ assert len(of) == 2
255
+
256
+ # But this is what is actually valid...
257
+ of = fsspec.open_files(f"zip://*.txt::simplecache://{f1}::file://")
258
+
259
+ assert len(of) == 2
260
+ for open_file in of:
261
+ with open_file as f:
262
+ assert f.read().decode("utf-8") == f.name
263
+
264
+
265
+ def test_multilevel_chained_fs_zip_zip_file():
266
+ """This test reproduces fsspec/filesystem_spec#334"""
267
+ import zipfile
268
+
269
+ d1 = tempfile.mkdtemp()
270
+ f1 = os.path.join(d1, "f1.zip")
271
+ f2 = os.path.join(d1, "f2.zip")
272
+ with zipfile.ZipFile(f1, mode="w") as z:
273
+ # filename, content
274
+ z.writestr("foo.txt", "foo.txt")
275
+ z.writestr("bar.txt", "bar.txt")
276
+
277
+ with zipfile.ZipFile(f2, mode="w") as z:
278
+ with open(f1, "rb") as f:
279
+ z.writestr("f1.zip", f.read())
280
+
281
+ # We expected this to be the correct syntax
282
+ of = fsspec.open_files(f"zip://*.txt::zip://f1.zip::file://{f2}")
283
+
284
+ assert len(of) == 2
285
+ for open_file in of:
286
+ with open_file as f:
287
+ assert f.read().decode("utf-8") == f.name
288
+
289
+
290
+ def test_chained_equivalent():
291
+ d1 = tempfile.mkdtemp()
292
+ d2 = tempfile.mkdtemp()
293
+ f1 = os.path.join(d1, "f1")
294
+ with open(f1, "wb") as f:
295
+ f.write(b"test1")
296
+
297
+ of = fsspec.open(
298
+ f"simplecache::file://{f1}",
299
+ simplecache={"cache_storage": d2, "same_names": True},
300
+ )
301
+ of2 = fsspec.open(
302
+ f"simplecache://{f1}",
303
+ cache_storage=d2,
304
+ same_names=True,
305
+ target_protocol="file",
306
+ target_options={},
307
+ )
308
+ # the following line passes by fluke - they are not quite the same instance,
309
+ # since the parameters don't quite match. Also, the url understood by the two
310
+ # of s are not the same (path gets munged a bit differently)
311
+ assert of.fs == of2.fs
312
+ assert hash(of.fs) == hash(of2.fs)
313
+ assert of.open().read() == of2.open().read()
314
+
315
+
316
+ def test_chained_fs_multi():
317
+ d1 = tempfile.mkdtemp()
318
+ d2 = tempfile.mkdtemp()
319
+ f1 = os.path.join(d1, "f1")
320
+ f2 = os.path.join(d1, "f2")
321
+ with open(f1, "wb") as f:
322
+ f.write(b"test1")
323
+ with open(f2, "wb") as f:
324
+ f.write(b"test2")
325
+
326
+ of = fsspec.open_files(
327
+ f"simplecache::file://{d1}/*",
328
+ simplecache={"cache_storage": d2, "same_names": True},
329
+ )
330
+ with of[0] as f:
331
+ assert f.read() == b"test1"
332
+ with of[1] as f:
333
+ assert f.read() == b"test2"
334
+
335
+ assert sorted(os.listdir(d2)) == ["f1", "f2"]
336
+
337
+ d2 = tempfile.mkdtemp()
338
+
339
+ of = fsspec.open_files(
340
+ [f"simplecache::file://{f1}", f"simplecache::file://{f2}"],
341
+ simplecache={"cache_storage": d2, "same_names": True},
342
+ )
343
+ with of[0] as f:
344
+ assert f.read() == b"test1"
345
+ with of[1] as f:
346
+ assert f.read() == b"test2"
347
+
348
+ assert sorted(os.listdir(d2)) == ["f1", "f2"]
349
+
350
+
351
+ def test_chained_fo():
352
+ import zipfile
353
+
354
+ d1 = tempfile.mkdtemp()
355
+ f1 = os.path.join(d1, "temp.zip")
356
+ d3 = tempfile.mkdtemp()
357
+ with zipfile.ZipFile(f1, mode="w") as z:
358
+ z.writestr("afile", b"test")
359
+
360
+ of = fsspec.open(f"zip://afile::file://{f1}")
361
+ with of as f:
362
+ assert f.read() == b"test"
363
+
364
+ of = fsspec.open_files(f"zip://*::file://{f1}")
365
+ with of[0] as f:
366
+ assert f.read() == b"test"
367
+
368
+ of = fsspec.open_files(
369
+ f"simplecache::zip://*::file://{f1}",
370
+ simplecache={"cache_storage": d3, "same_names": True},
371
+ )
372
+ with of[0] as f:
373
+ assert f.read() == b"test"
374
+ assert "afile" in os.listdir(d3)
375
+
376
+
377
+ def test_url_to_fs():
378
+ url = "memory://a.txt"
379
+ fs, url2 = fsspec.core.url_to_fs(url)
380
+
381
+ assert isinstance(fs, MemoryFileSystem)
382
+ assert url2 == "/a.txt"
383
+
384
+
385
+ def test_walk(m):
386
+ # depth = 0
387
+ dir1 = "/dir1"
388
+ # depth = 1 (2 dirs, 1 file)
389
+ dir11 = dir1 + "/dir11"
390
+ dir12 = dir1 + "/dir12"
391
+ file11 = dir1 + "/file11"
392
+ # depth = 2
393
+ dir111 = dir11 + "/dir111"
394
+ file111 = dir11 + "/file111"
395
+ file121 = dir12 + "/file121"
396
+ # depth = 3
397
+ file1111 = dir111 + "/file1111"
398
+
399
+ m.mkdir(dir111) # Creates parents too
400
+ m.mkdir(dir12) # Creates parents too
401
+ m.touch(file11)
402
+ m.touch(file111)
403
+ m.touch(file121)
404
+ m.touch(file1111)
405
+
406
+ # No maxdepth
407
+ assert list(m.walk(dir1, topdown=True)) == [
408
+ (dir1, ["dir11", "dir12"], ["file11"]),
409
+ (dir11, ["dir111"], ["file111"]),
410
+ (dir111, [], ["file1111"]),
411
+ (dir12, [], ["file121"]),
412
+ ]
413
+ assert list(m.walk(dir1, topdown=False)) == [
414
+ (dir111, [], ["file1111"]),
415
+ (dir11, ["dir111"], ["file111"]),
416
+ (dir12, [], ["file121"]),
417
+ (dir1, ["dir11", "dir12"], ["file11"]),
418
+ ]
419
+
420
+ # maxdepth=2
421
+ assert list(m.walk(dir1, maxdepth=2, topdown=True)) == [
422
+ (dir1, ["dir11", "dir12"], ["file11"]),
423
+ (dir11, ["dir111"], ["file111"]),
424
+ (dir12, [], ["file121"]),
425
+ ]
426
+ assert list(m.walk(dir1, maxdepth=2, topdown=False)) == [
427
+ (dir11, ["dir111"], ["file111"]),
428
+ (dir12, [], ["file121"]),
429
+ (dir1, ["dir11", "dir12"], ["file11"]),
430
+ ]
431
+
432
+ # maxdepth=1
433
+ assert list(m.walk(dir1, maxdepth=1, topdown=True)) == [
434
+ (dir1, ["dir11", "dir12"], ["file11"]),
435
+ ]
436
+ assert list(m.walk(dir1, maxdepth=1, topdown=False)) == [
437
+ (dir1, ["dir11", "dir12"], ["file11"]),
438
+ ]
439
+
440
+ # maxdepth=0
441
+ with pytest.raises(ValueError):
442
+ list(m.walk(dir1, maxdepth=0, topdown=True))
443
+ with pytest.raises(ValueError):
444
+ list(m.walk(dir1, maxdepth=0, topdown=False))
445
+
446
+ # prune dir111
447
+ def _walk(*args, **kwargs):
448
+ for path, dirs, files in m.walk(*args, **kwargs):
449
+ yield (path, dirs.copy(), files)
450
+ if "dir111" in dirs:
451
+ dirs.remove("dir111")
452
+
453
+ assert list(_walk(dir1, topdown=True)) == [
454
+ (dir1, ["dir11", "dir12"], ["file11"]),
455
+ (dir11, ["dir111"], ["file111"]),
456
+ (dir12, [], ["file121"]),
457
+ ]
458
+ assert list(_walk(dir1, topdown=False)) == [
459
+ (dir111, [], ["file1111"]),
460
+ (dir11, ["dir111"], ["file111"]),
461
+ (dir12, [], ["file121"]),
462
+ (dir1, ["dir11", "dir12"], ["file11"]),
463
+ ]
464
+
465
+ # reverse dirs order
466
+ def _walk(*args, **kwargs):
467
+ for path, dirs, files in m.walk(*args, **kwargs):
468
+ yield (path, dirs.copy(), files)
469
+ dirs.reverse()
470
+
471
+ assert list(_walk(dir1, topdown=True)) == [
472
+ (dir1, ["dir11", "dir12"], ["file11"]),
473
+ # Here dir12 comes before dir11
474
+ (dir12, [], ["file121"]),
475
+ (dir11, ["dir111"], ["file111"]),
476
+ (dir111, [], ["file1111"]),
477
+ ]
478
+ assert list(_walk(dir1, topdown=False)) == [
479
+ (dir111, [], ["file1111"]),
480
+ (dir11, ["dir111"], ["file111"]),
481
+ (dir12, [], ["file121"]),
482
+ (dir1, ["dir11", "dir12"], ["file11"]),
483
+ ]
484
+
485
+ # on_error omit by default
486
+ assert list(m.walk("do_not_exist")) == []
487
+ # on_error omit
488
+ assert list(m.walk("do_not_exist", on_error="omit")) == []
489
+ # on_error raise
490
+ with pytest.raises(FileNotFoundError):
491
+ list(m.walk("do_not_exist", on_error="raise"))
492
+ # on_error callable function
493
+ mock = Mock()
494
+ assert list(m.walk("do_not_exist", on_error=mock.onerror)) == []
495
+ mock.onerror.assert_called()
496
+ assert mock.onerror.call_args.kwargs == {}
497
+ assert len(mock.onerror.call_args.args) == 1
498
+ assert isinstance(mock.onerror.call_args.args[0], FileNotFoundError)