fsspec 2024.5.0__py3-none-any.whl → 2024.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. fsspec/_version.py +2 -2
  2. fsspec/caching.py +3 -2
  3. fsspec/compression.py +1 -1
  4. fsspec/implementations/cached.py +1 -13
  5. fsspec/implementations/github.py +12 -0
  6. fsspec/implementations/reference.py +6 -0
  7. fsspec/implementations/smb.py +10 -0
  8. fsspec/json.py +81 -0
  9. fsspec/registry.py +24 -18
  10. fsspec/spec.py +76 -34
  11. fsspec/utils.py +1 -1
  12. {fsspec-2024.5.0.dist-info → fsspec-2024.6.0.dist-info}/METADATA +11 -5
  13. fsspec-2024.6.0.dist-info/RECORD +55 -0
  14. fsspec/implementations/tests/__init__.py +0 -0
  15. fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_file_listing.yaml +0 -112
  16. fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_mkdir.yaml +0 -582
  17. fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_read_pyarrow_non_partitioned.yaml +0 -873
  18. fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_read_range.yaml +0 -458
  19. fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_read_range_chunked.yaml +0 -1355
  20. fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_write_and_read.yaml +0 -795
  21. fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_write_pyarrow_non_partitioned.yaml +0 -613
  22. fsspec/implementations/tests/conftest.py +0 -39
  23. fsspec/implementations/tests/local/__init__.py +0 -0
  24. fsspec/implementations/tests/local/local_fixtures.py +0 -18
  25. fsspec/implementations/tests/local/local_test.py +0 -14
  26. fsspec/implementations/tests/memory/__init__.py +0 -0
  27. fsspec/implementations/tests/memory/memory_fixtures.py +0 -27
  28. fsspec/implementations/tests/memory/memory_test.py +0 -14
  29. fsspec/implementations/tests/out.zip +0 -0
  30. fsspec/implementations/tests/test_archive.py +0 -382
  31. fsspec/implementations/tests/test_arrow.py +0 -259
  32. fsspec/implementations/tests/test_cached.py +0 -1306
  33. fsspec/implementations/tests/test_common.py +0 -35
  34. fsspec/implementations/tests/test_dask.py +0 -29
  35. fsspec/implementations/tests/test_data.py +0 -20
  36. fsspec/implementations/tests/test_dbfs.py +0 -268
  37. fsspec/implementations/tests/test_dirfs.py +0 -588
  38. fsspec/implementations/tests/test_ftp.py +0 -178
  39. fsspec/implementations/tests/test_git.py +0 -76
  40. fsspec/implementations/tests/test_http.py +0 -577
  41. fsspec/implementations/tests/test_jupyter.py +0 -57
  42. fsspec/implementations/tests/test_libarchive.py +0 -33
  43. fsspec/implementations/tests/test_local.py +0 -1285
  44. fsspec/implementations/tests/test_memory.py +0 -382
  45. fsspec/implementations/tests/test_reference.py +0 -720
  46. fsspec/implementations/tests/test_sftp.py +0 -233
  47. fsspec/implementations/tests/test_smb.py +0 -139
  48. fsspec/implementations/tests/test_tar.py +0 -243
  49. fsspec/implementations/tests/test_webhdfs.py +0 -197
  50. fsspec/implementations/tests/test_zip.py +0 -134
  51. fsspec/tests/__init__.py +0 -0
  52. fsspec/tests/conftest.py +0 -188
  53. fsspec/tests/data/listing.html +0 -1
  54. fsspec/tests/test_api.py +0 -498
  55. fsspec/tests/test_async.py +0 -230
  56. fsspec/tests/test_caches.py +0 -255
  57. fsspec/tests/test_callbacks.py +0 -89
  58. fsspec/tests/test_compression.py +0 -164
  59. fsspec/tests/test_config.py +0 -129
  60. fsspec/tests/test_core.py +0 -466
  61. fsspec/tests/test_downstream.py +0 -40
  62. fsspec/tests/test_file.py +0 -200
  63. fsspec/tests/test_fuse.py +0 -147
  64. fsspec/tests/test_generic.py +0 -90
  65. fsspec/tests/test_gui.py +0 -23
  66. fsspec/tests/test_mapping.py +0 -228
  67. fsspec/tests/test_parquet.py +0 -140
  68. fsspec/tests/test_registry.py +0 -134
  69. fsspec/tests/test_spec.py +0 -1167
  70. fsspec/tests/test_utils.py +0 -478
  71. fsspec-2024.5.0.dist-info/RECORD +0 -111
  72. {fsspec-2024.5.0.dist-info → fsspec-2024.6.0.dist-info}/WHEEL +0 -0
  73. {fsspec-2024.5.0.dist-info → fsspec-2024.6.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,233 +0,0 @@
1
- import os
2
- import shlex
3
- import subprocess
4
- import time
5
- from tarfile import TarFile
6
-
7
- import pytest
8
-
9
- import fsspec
10
-
11
- pytest.importorskip("paramiko")
12
-
13
-
14
- def stop_docker(name):
15
- cmd = shlex.split(f'docker ps -a -q --filter "name={name}"')
16
- cid = subprocess.check_output(cmd).strip().decode()
17
- if cid:
18
- subprocess.call(["docker", "rm", "-f", cid])
19
-
20
-
21
- @pytest.fixture(scope="module")
22
- def ssh():
23
- try:
24
- pchk = ["docker", "run", "--name", "fsspec_test_sftp", "hello-world"]
25
- subprocess.check_call(pchk)
26
- stop_docker("fsspec_test_sftp")
27
- except (subprocess.CalledProcessError, FileNotFoundError):
28
- pytest.skip("docker run not available")
29
- return
30
-
31
- # requires docker
32
- cmds = [
33
- r"apt-get update",
34
- r"apt-get install -y openssh-server",
35
- r"mkdir /var/run/sshd",
36
- "bash -c \"echo 'root:pass' | chpasswd\"",
37
- (
38
- r"sed -i 's/PermitRootLogin prohibit-password/PermitRootLogin yes/' "
39
- r"/etc/ssh/sshd_config"
40
- ),
41
- (
42
- r"sed 's@session\s*required\s*pam_loginuid.so@session optional "
43
- r"pam_loginuid.so@g' -i /etc/pam.d/sshd"
44
- ),
45
- r'bash -c "echo \"export VISIBLE=now\" >> /etc/profile"',
46
- r"/usr/sbin/sshd",
47
- ]
48
- name = "fsspec_sftp"
49
- stop_docker(name)
50
- cmd = f"docker run -d -p 9200:22 --name {name} ubuntu:16.04 sleep 9000"
51
- try:
52
- cid = subprocess.check_output(shlex.split(cmd)).strip().decode()
53
- for cmd in cmds:
54
- subprocess.call(["docker", "exec", cid] + shlex.split(cmd))
55
- time.sleep(1)
56
- yield {
57
- "host": "localhost",
58
- "port": 9200,
59
- "username": "root",
60
- "password": "pass",
61
- }
62
- finally:
63
- stop_docker(name)
64
-
65
-
66
- @pytest.fixture(scope="module")
67
- def root_path():
68
- return "/home/someuser/"
69
-
70
-
71
- def test_simple(ssh, root_path):
72
- f = fsspec.get_filesystem_class("sftp")(**ssh)
73
- f.mkdirs(root_path + "deeper")
74
- try:
75
- f.touch(root_path + "deeper/afile")
76
- assert f.find(root_path) == [root_path + "deeper/afile"]
77
- assert f.ls(root_path + "deeper/") == [root_path + "deeper/afile"]
78
- assert f.info(root_path + "deeper/afile")["type"] == "file"
79
- assert f.info(root_path + "deeper/afile")["size"] == 0
80
- assert f.exists(root_path)
81
- finally:
82
- f.rm(root_path, recursive=True)
83
- assert not f.exists(root_path)
84
-
85
-
86
- @pytest.mark.parametrize("protocol", ["sftp", "ssh"])
87
- def test_with_url(protocol, ssh):
88
- fo = fsspec.open(
89
- protocol
90
- + "://{username}:{password}@{host}:{port}/home/someuserout".format(**ssh),
91
- "wb",
92
- )
93
- with fo as f:
94
- f.write(b"hello")
95
- fo = fsspec.open(
96
- protocol
97
- + "://{username}:{password}@{host}:{port}/home/someuserout".format(**ssh),
98
- "rb",
99
- )
100
- with fo as f:
101
- assert f.read() == b"hello"
102
-
103
-
104
- @pytest.mark.parametrize("protocol", ["sftp", "ssh"])
105
- def test_get_dir(protocol, ssh, root_path, tmpdir):
106
- path = str(tmpdir)
107
- f = fsspec.filesystem(protocol, **ssh)
108
- f.mkdirs(root_path + "deeper", exist_ok=True)
109
- f.touch(root_path + "deeper/afile")
110
- f.get(root_path, path, recursive=True)
111
-
112
- assert os.path.isdir(f"{path}/deeper")
113
- assert os.path.isfile(f"{path}/deeper/afile")
114
-
115
- f.get(
116
- protocol
117
- + "://{username}:{password}@{host}:{port}{root_path}".format(
118
- root_path=root_path, **ssh
119
- ),
120
- f"{path}/test2",
121
- recursive=True,
122
- )
123
-
124
- assert os.path.isdir(f"{path}/test2/deeper")
125
- assert os.path.isfile(f"{path}/test2/deeper/afile")
126
-
127
-
128
- @pytest.fixture(scope="module")
129
- def netloc(ssh):
130
- username = ssh.get("username")
131
- password = ssh.get("password")
132
- host = ssh.get("host")
133
- port = ssh.get("port")
134
- userpass = (
135
- f"{username}:{password if password is not None else ''}@"
136
- if username is not None
137
- else ""
138
- )
139
- netloc = f"{host}:{port if port is not None else ''}"
140
- return userpass + netloc
141
-
142
-
143
- def test_put_file(ssh, tmp_path, root_path):
144
- tmp_file = tmp_path / "a.txt"
145
- with open(tmp_file, mode="w") as fd:
146
- fd.write("blabla")
147
-
148
- f = fsspec.get_filesystem_class("sftp")(**ssh)
149
- f.put_file(lpath=tmp_file, rpath=root_path + "a.txt")
150
-
151
-
152
- def test_simple_with_tar(ssh, netloc, tmp_path, root_path):
153
- files_to_pack = ["a.txt", "b.txt"]
154
-
155
- tar_filename = make_tarfile(files_to_pack, tmp_path)
156
-
157
- f = fsspec.get_filesystem_class("sftp")(**ssh)
158
- f.mkdirs(f"{root_path}deeper", exist_ok=True)
159
- try:
160
- remote_tar_filename = f"{root_path}deeper/somefile.tar"
161
- with f.open(remote_tar_filename, mode="wb") as wfd:
162
- with open(tar_filename, mode="rb") as rfd:
163
- wfd.write(rfd.read())
164
- fs = fsspec.open(f"tar::ssh://{netloc}{remote_tar_filename}").fs
165
- files = fs.find("/")
166
- assert files == files_to_pack
167
- finally:
168
- f.rm(root_path, recursive=True)
169
-
170
-
171
- def make_tarfile(files_to_pack, tmp_path):
172
- """Create a tarfile with some files."""
173
- tar_filename = tmp_path / "sometarfile.tar"
174
- for filename in files_to_pack:
175
- with open(tmp_path / filename, mode="w") as fd:
176
- fd.write("")
177
- with TarFile(tar_filename, mode="w") as tf:
178
- for filename in files_to_pack:
179
- tf.add(tmp_path / filename, arcname=filename)
180
- return tar_filename
181
-
182
-
183
- def test_transaction(ssh, root_path):
184
- f = fsspec.get_filesystem_class("sftp")(**ssh)
185
- f.mkdirs(root_path + "deeper", exist_ok=True)
186
- try:
187
- f.start_transaction()
188
- f.touch(root_path + "deeper/afile")
189
- assert f.find(root_path) == []
190
- f.end_transaction()
191
- assert f.find(root_path) == [root_path + "deeper/afile"]
192
-
193
- with f.transaction:
194
- assert f._intrans
195
- f.touch(root_path + "deeper/afile2")
196
- assert f.find(root_path) == [root_path + "deeper/afile"]
197
- assert f.find(root_path) == [
198
- root_path + "deeper/afile",
199
- root_path + "deeper/afile2",
200
- ]
201
- finally:
202
- f.rm(root_path, recursive=True)
203
-
204
-
205
- @pytest.mark.parametrize("path", ["/a/b/c", "a/b/c"])
206
- def test_mkdir_create_parent(ssh, path):
207
- f = fsspec.get_filesystem_class("sftp")(**ssh)
208
-
209
- with pytest.raises(FileNotFoundError):
210
- f.mkdir(path, create_parents=False)
211
-
212
- f.mkdir(path)
213
- assert f.exists(path)
214
-
215
- with pytest.raises(FileExistsError, match=path):
216
- f.mkdir(path)
217
-
218
- f.rm(path, recursive=True)
219
- assert not f.exists(path)
220
-
221
-
222
- @pytest.mark.parametrize("path", ["/a/b/c", "a/b/c"])
223
- def test_makedirs_exist_ok(ssh, path):
224
- f = fsspec.get_filesystem_class("sftp")(**ssh)
225
-
226
- f.makedirs(path, exist_ok=False)
227
-
228
- with pytest.raises(FileExistsError, match=path):
229
- f.makedirs(path, exist_ok=False)
230
-
231
- f.makedirs(path, exist_ok=True)
232
- f.rm(path, recursive=True)
233
- assert not f.exists(path)
@@ -1,139 +0,0 @@
1
- """
2
- Test SMBFileSystem class using a docker container
3
- """
4
-
5
- import logging
6
- import os
7
- import shlex
8
- import subprocess
9
- import time
10
-
11
- import pytest
12
-
13
- import fsspec
14
-
15
- pytest.importorskip("smbprotocol")
16
-
17
- # ruff: noqa: F821
18
-
19
- if os.environ.get("WSL_INTEROP"):
20
- # Running on WSL (Windows)
21
- port_test = [9999]
22
-
23
- else:
24
- # ! pylint: disable=redefined-outer-name,missing-function-docstring
25
-
26
- # Test standard and non-standard ports
27
- default_port = 445
28
- port_test = [None, default_port, 9999]
29
-
30
-
31
- def stop_docker(container):
32
- cmd = shlex.split('docker ps -a -q --filter "name=%s"' % container)
33
- cid = subprocess.check_output(cmd).strip().decode()
34
- if cid:
35
- subprocess.call(["docker", "rm", "-f", "-v", cid])
36
-
37
-
38
- @pytest.fixture(scope="module", params=port_test)
39
- def smb_params(request):
40
- try:
41
- pchk = ["docker", "run", "--name", "fsspec_test_smb", "hello-world"]
42
- subprocess.check_call(pchk)
43
- stop_docker("fsspec_test_smb")
44
- except (subprocess.CalledProcessError, FileNotFoundError):
45
- pytest.skip("docker run not available")
46
-
47
- # requires docker
48
- container = "fsspec_smb"
49
- stop_docker(container)
50
- cfg = "-p -u 'testuser;testpass' -s 'home;/share;no;no;no;testuser'"
51
- port = request.param if request.param is not None else default_port
52
- img = (
53
- f"docker run --name {container} --detach -p 139:139 -p {port}:445 dperson/samba" # noqa: E231 E501
54
- )
55
- cmd = f"{img} {cfg}"
56
- try:
57
- cid = subprocess.check_output(shlex.split(cmd)).strip().decode()
58
- logger = logging.getLogger("fsspec")
59
- logger.debug("Container: %s", cid)
60
- time.sleep(1)
61
- yield {
62
- "host": "localhost",
63
- "port": request.param,
64
- "username": "testuser",
65
- "password": "testpass",
66
- "register_session_retries": 100, # max ~= 10 seconds
67
- }
68
- finally:
69
- import smbclient # pylint: disable=import-outside-toplevel
70
-
71
- smbclient.reset_connection_cache()
72
- stop_docker(container)
73
-
74
-
75
- @pytest.mark.flaky(reruns=2, reruns_delay=2)
76
- def test_simple(smb_params):
77
- adir = "/home/adir"
78
- adir2 = "/home/adir/otherdir/"
79
- afile = "/home/adir/otherdir/afile"
80
- fsmb = fsspec.get_filesystem_class("smb")(**smb_params)
81
- fsmb.mkdirs(adir2)
82
- fsmb.touch(afile)
83
- assert fsmb.find(adir) == [afile]
84
- assert fsmb.ls(adir2, detail=False) == [afile]
85
- assert fsmb.info(afile)["type"] == "file"
86
- assert fsmb.info(afile)["size"] == 0
87
- assert fsmb.exists(adir)
88
- fsmb.rm(adir, recursive=True)
89
- assert not fsmb.exists(adir)
90
-
91
-
92
- @pytest.mark.flaky(reruns=2, reruns_delay=2)
93
- def test_with_url(smb_params):
94
- if smb_params["port"] is None:
95
- smb_url = "smb://{username}:{password}@{host}/home/someuser.txt"
96
- else:
97
- smb_url = "smb://{username}:{password}@{host}:{port}/home/someuser.txt"
98
- fwo = fsspec.open(smb_url.format(**smb_params), "wb")
99
- with fwo as fwr:
100
- fwr.write(b"hello")
101
- fro = fsspec.open(smb_url.format(**smb_params), "rb")
102
- with fro as frd:
103
- read_result = frd.read()
104
- assert read_result == b"hello"
105
-
106
-
107
- @pytest.mark.flaky(reruns=2, reruns_delay=2)
108
- def test_transaction(smb_params):
109
- afile = "/home/afolder/otherdir/afile"
110
- afile2 = "/home/afolder/otherdir/afile2"
111
- adir = "/home/afolder"
112
- adir2 = "/home/afolder/otherdir"
113
- fsmb = fsspec.get_filesystem_class("smb")(**smb_params)
114
- fsmb.mkdirs(adir2)
115
- fsmb.start_transaction()
116
- fsmb.touch(afile)
117
- assert fsmb.find(adir) == []
118
- fsmb.end_transaction()
119
- assert fsmb.find(adir) == [afile]
120
-
121
- with fsmb.transaction:
122
- assert fsmb._intrans
123
- fsmb.touch(afile2)
124
- assert fsmb.find(adir) == [afile]
125
- assert fsmb.find(adir) == [afile, afile2]
126
-
127
-
128
- @pytest.mark.flaky(reruns=2, reruns_delay=2)
129
- def test_makedirs_exist_ok(smb_params):
130
- fsmb = fsspec.get_filesystem_class("smb")(**smb_params)
131
- fsmb.makedirs("/home/a/b/c")
132
- fsmb.makedirs("/home/a/b/c", exist_ok=True)
133
-
134
-
135
- @pytest.mark.flaky(reruns=2, reruns_delay=2)
136
- def test_rename_from_upath(smb_params):
137
- fsmb = fsspec.get_filesystem_class("smb")(**smb_params)
138
- fsmb.makedirs("/home/a/b/c", exist_ok=True)
139
- fsmb.mv("/home/a/b/c", "/home/a/b/d", recursive=False, maxdepth=None)
@@ -1,243 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import os
4
- import shutil
5
- import tarfile
6
- import tempfile
7
- from io import BytesIO
8
- from pathlib import Path
9
-
10
- import pytest
11
-
12
- import fsspec
13
- from fsspec.core import OpenFile
14
- from fsspec.implementations.cached import WholeFileCacheFileSystem
15
- from fsspec.implementations.tar import TarFileSystem
16
- from fsspec.implementations.tests.test_archive import archive_data, temptar
17
-
18
-
19
- def test_info():
20
- with temptar(archive_data) as t:
21
- fs = fsspec.filesystem("tar", fo=t)
22
-
23
- # Iterate over all directories.
24
- # Probe specific fields of Tar archives.
25
- for d in fs._all_dirnames(archive_data.keys()):
26
- lhs = fs.info(d)
27
- del lhs["chksum"]
28
- expected = {
29
- "name": f"{d}",
30
- "size": 0,
31
- "type": "directory",
32
- "devmajor": 0,
33
- "devminor": 0,
34
- "gname": "",
35
- "linkname": "",
36
- "uid": 0,
37
- "gid": 0,
38
- "mode": 420,
39
- "mtime": 0,
40
- "uname": "",
41
- }
42
- assert lhs == expected
43
-
44
- # Iterate over all files.
45
- for f in archive_data:
46
- lhs = fs.info(f)
47
-
48
- # Probe some specific fields of Tar archives.
49
- assert "mode" in lhs
50
- assert "uid" in lhs
51
- assert "gid" in lhs
52
- assert "mtime" in lhs
53
- assert "chksum" in lhs
54
-
55
-
56
- @pytest.mark.parametrize(
57
- "recipe",
58
- [
59
- {"mode": "w", "suffix": ".tar", "magic": b"a\x00\x00\x00\x00"},
60
- {"mode": "w:gz", "suffix": ".tar.gz", "magic": b"\x1f\x8b\x08\x08"},
61
- {"mode": "w:bz2", "suffix": ".tar.bz2", "magic": b"BZh91AY"},
62
- {"mode": "w:xz", "suffix": ".tar.xz", "magic": b"\xfd7zXZ\x00\x00"},
63
- ],
64
- ids=["tar", "tar-gz", "tar-bz2", "tar-xz"],
65
- )
66
- def test_compressions(recipe):
67
- """
68
- Run tests on all available tar file compression variants.
69
- """
70
- with temptar(archive_data, mode=recipe["mode"], suffix=recipe["suffix"]) as t:
71
- fs = fsspec.filesystem("tar", fo=t)
72
-
73
- # Verify that the tar archive has the correct compression.
74
- with open(t, "rb") as raw:
75
- assert raw.read()[:10].startswith(recipe["magic"])
76
-
77
- # Verify content of a sample file.
78
- assert fs.cat("b") == b"hello"
79
-
80
-
81
- @pytest.mark.parametrize(
82
- "recipe",
83
- [
84
- {"mode": "w", "suffix": ".tar", "magic": b"a\x00\x00\x00\x00"},
85
- {"mode": "w:gz", "suffix": ".tar.gz", "magic": b"\x1f\x8b\x08\x08"},
86
- {"mode": "w:bz2", "suffix": ".tar.bz2", "magic": b"BZh91AY"},
87
- {"mode": "w:xz", "suffix": ".tar.xz", "magic": b"\xfd7zXZ\x00\x00"},
88
- ],
89
- ids=["tar", "tar-gz", "tar-bz2", "tar-xz"],
90
- )
91
- def test_filesystem_direct(recipe, tmpdir):
92
- """
93
- Run tests through a real fsspec filesystem implementation.
94
- Here: `LocalFileSystem`.
95
- """
96
-
97
- filename = os.path.join(tmpdir, f'temp{recipe["suffix"]}')
98
-
99
- fs = fsspec.filesystem("file")
100
- f = OpenFile(fs, filename, mode="wb")
101
-
102
- with temptar(archive_data, mode=recipe["mode"], suffix=recipe["suffix"]) as tf:
103
- with f as fo:
104
- fo.write(open(tf, "rb").read())
105
-
106
- # Verify that the tar archive has the correct compression.
107
- with open(filename, "rb") as raw:
108
- assert raw.read()[:10].startswith(recipe["magic"])
109
-
110
- # Verify content of a sample file.
111
- with fs.open(filename) as resource:
112
- tarfs = fsspec.filesystem("tar", fo=resource)
113
- assert tarfs.cat("b") == b"hello"
114
-
115
-
116
- @pytest.mark.parametrize(
117
- "recipe",
118
- [
119
- {"mode": "w", "suffix": ".tar", "magic": b"a\x00\x00\x00\x00"},
120
- {"mode": "w:gz", "suffix": ".tar.gz", "magic": b"\x1f\x8b\x08\x08"},
121
- {"mode": "w:bz2", "suffix": ".tar.bz2", "magic": b"BZh91AY"},
122
- {"mode": "w:xz", "suffix": ".tar.xz", "magic": b"\xfd7zXZ\x00\x00"},
123
- ],
124
- ids=["tar", "tar-gz", "tar-bz2", "tar-xz"],
125
- )
126
- def test_filesystem_cached(recipe, tmpdir):
127
- """
128
- Run tests through a real, cached, fsspec filesystem implementation.
129
- Here: `TarFileSystem` over `WholeFileCacheFileSystem` over `LocalFileSystem`.
130
- """
131
-
132
- filename = os.path.join(tmpdir, f'temp{recipe["suffix"]}')
133
-
134
- # Create a filesystem from test fixture.
135
- fs = fsspec.filesystem("file")
136
- f = OpenFile(fs, filename, mode="wb")
137
-
138
- with temptar(archive_data, mode=recipe["mode"], suffix=recipe["suffix"]) as tf:
139
- with f as fo:
140
- fo.write(open(tf, "rb").read())
141
-
142
- # Verify that the tar archive has the correct compression.
143
- with open(filename, "rb") as raw:
144
- assert raw.read()[:10].startswith(recipe["magic"])
145
-
146
- # Access cached filesystem.
147
- cachedir = tempfile.mkdtemp()
148
- filesystem = WholeFileCacheFileSystem(fs=fs, cache_storage=cachedir)
149
-
150
- # Verify the cache is empty beforehand.
151
- assert os.listdir(cachedir) == []
152
-
153
- # Verify content of a sample file.
154
- with filesystem.open(filename) as resource:
155
- tarfs = fsspec.filesystem("tar", fo=resource)
156
- assert tarfs.cat("b") == b"hello"
157
-
158
- # Verify the cache is populated afterwards.
159
- assert len(os.listdir(cachedir)) == 2
160
-
161
- # Verify that the cache is empty after clearing it.
162
- filesystem.clear_cache()
163
- assert os.listdir(cachedir) == []
164
-
165
- filesystem.clear_cache()
166
- shutil.rmtree(cachedir)
167
-
168
-
169
- @pytest.mark.parametrize(
170
- "recipe",
171
- [
172
- {"mode": "w", "suffix": ".tar", "magic": b"a\x00\x00\x00\x00"},
173
- {"mode": "w:gz", "suffix": ".tar.gz", "magic": b"\x1f\x8b\x08\x08"},
174
- {"mode": "w:bz2", "suffix": ".tar.bz2", "magic": b"BZh91AY"},
175
- {"mode": "w:xz", "suffix": ".tar.xz", "magic": b"\xfd7zXZ\x00\x00"},
176
- ],
177
- ids=["tar", "tar-gz", "tar-bz2", "tar-xz"],
178
- )
179
- def test_url_to_fs_direct(recipe, tmpdir):
180
- with temptar(archive_data, mode=recipe["mode"], suffix=recipe["suffix"]) as tf:
181
- url = f"tar://inner::file://{tf}"
182
- fs, url = fsspec.core.url_to_fs(url=url)
183
- assert fs.cat("b") == b"hello"
184
-
185
-
186
- @pytest.mark.parametrize(
187
- "recipe",
188
- [
189
- {"mode": "w", "suffix": ".tar"},
190
- {"mode": "w:gz", "suffix": ".tar.gz"},
191
- {"mode": "w:bz2", "suffix": ".tar.bz2"},
192
- {"mode": "w:xz", "suffix": ".tar.xz"},
193
- ],
194
- ids=["tar", "tar-gz", "tar-bz2", "tar-xz"],
195
- )
196
- def test_url_to_fs_cached(recipe, tmpdir):
197
- with temptar(archive_data, mode=recipe["mode"], suffix=recipe["suffix"]) as tf:
198
- url = f"tar://inner::simplecache::file://{tf}"
199
- # requires same_names in order to be able to guess compression from
200
- # filename
201
- fs, url = fsspec.core.url_to_fs(url=url, simplecache={"same_names": True})
202
- assert fs.cat("b") == b"hello"
203
-
204
-
205
- @pytest.mark.parametrize(
206
- "compression", ["", "gz", "bz2", "xz"], ids=["tar", "tar-gz", "tar-bz2", "tar-xz"]
207
- )
208
- def test_ls_with_folders(compression: str, tmp_path: Path):
209
- """
210
- Create a tar file that doesn't include the intermediate folder structure,
211
- but make sure that the reading filesystem is still able to resolve the
212
- intermediate folders, like the ZipFileSystem.
213
- """
214
- tar_data: dict[str, bytes] = {
215
- "a.pdf": b"Hello A!",
216
- "b/c.pdf": b"Hello C!",
217
- "d/e/f.pdf": b"Hello F!",
218
- "d/g.pdf": b"Hello G!",
219
- }
220
- if compression:
221
- temp_archive_file = tmp_path / f"test_tar_file.tar.{compression}"
222
- else:
223
- temp_archive_file = tmp_path / "test_tar_file.tar"
224
- with open(temp_archive_file, "wb") as fd:
225
- # We need to manually write the tarfile here, because temptar
226
- # creates intermediate directories which is not how tars are always created
227
- with tarfile.open(fileobj=fd, mode=f"w:{compression}") as tf:
228
- for tar_file_path, data in tar_data.items():
229
- content = data
230
- info = tarfile.TarInfo(name=tar_file_path)
231
- info.size = len(content)
232
- tf.addfile(info, BytesIO(content))
233
- with open(temp_archive_file, "rb") as fd:
234
- fs = TarFileSystem(fd)
235
- assert fs.find("/", withdirs=True) == [
236
- "a.pdf",
237
- "b",
238
- "b/c.pdf",
239
- "d",
240
- "d/e",
241
- "d/e/f.pdf",
242
- "d/g.pdf",
243
- ]