fsspec 2024.5.0__py3-none-any.whl → 2024.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fsspec/_version.py +2 -2
- fsspec/caching.py +3 -2
- fsspec/compression.py +1 -1
- fsspec/generic.py +3 -0
- fsspec/implementations/cached.py +6 -16
- fsspec/implementations/dirfs.py +2 -0
- fsspec/implementations/github.py +12 -0
- fsspec/implementations/http.py +2 -1
- fsspec/implementations/reference.py +9 -0
- fsspec/implementations/smb.py +10 -0
- fsspec/json.py +121 -0
- fsspec/registry.py +24 -18
- fsspec/spec.py +119 -33
- fsspec/utils.py +1 -1
- {fsspec-2024.5.0.dist-info → fsspec-2024.6.1.dist-info}/METADATA +10 -5
- fsspec-2024.6.1.dist-info/RECORD +55 -0
- {fsspec-2024.5.0.dist-info → fsspec-2024.6.1.dist-info}/WHEEL +1 -1
- fsspec/implementations/tests/__init__.py +0 -0
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_file_listing.yaml +0 -112
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_mkdir.yaml +0 -582
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_read_pyarrow_non_partitioned.yaml +0 -873
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_read_range.yaml +0 -458
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_read_range_chunked.yaml +0 -1355
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_write_and_read.yaml +0 -795
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_write_pyarrow_non_partitioned.yaml +0 -613
- fsspec/implementations/tests/conftest.py +0 -39
- fsspec/implementations/tests/local/__init__.py +0 -0
- fsspec/implementations/tests/local/local_fixtures.py +0 -18
- fsspec/implementations/tests/local/local_test.py +0 -14
- fsspec/implementations/tests/memory/__init__.py +0 -0
- fsspec/implementations/tests/memory/memory_fixtures.py +0 -27
- fsspec/implementations/tests/memory/memory_test.py +0 -14
- fsspec/implementations/tests/out.zip +0 -0
- fsspec/implementations/tests/test_archive.py +0 -382
- fsspec/implementations/tests/test_arrow.py +0 -259
- fsspec/implementations/tests/test_cached.py +0 -1306
- fsspec/implementations/tests/test_common.py +0 -35
- fsspec/implementations/tests/test_dask.py +0 -29
- fsspec/implementations/tests/test_data.py +0 -20
- fsspec/implementations/tests/test_dbfs.py +0 -268
- fsspec/implementations/tests/test_dirfs.py +0 -588
- fsspec/implementations/tests/test_ftp.py +0 -178
- fsspec/implementations/tests/test_git.py +0 -76
- fsspec/implementations/tests/test_http.py +0 -577
- fsspec/implementations/tests/test_jupyter.py +0 -57
- fsspec/implementations/tests/test_libarchive.py +0 -33
- fsspec/implementations/tests/test_local.py +0 -1285
- fsspec/implementations/tests/test_memory.py +0 -382
- fsspec/implementations/tests/test_reference.py +0 -720
- fsspec/implementations/tests/test_sftp.py +0 -233
- fsspec/implementations/tests/test_smb.py +0 -139
- fsspec/implementations/tests/test_tar.py +0 -243
- fsspec/implementations/tests/test_webhdfs.py +0 -197
- fsspec/implementations/tests/test_zip.py +0 -134
- fsspec/tests/__init__.py +0 -0
- fsspec/tests/conftest.py +0 -188
- fsspec/tests/data/listing.html +0 -1
- fsspec/tests/test_api.py +0 -498
- fsspec/tests/test_async.py +0 -230
- fsspec/tests/test_caches.py +0 -255
- fsspec/tests/test_callbacks.py +0 -89
- fsspec/tests/test_compression.py +0 -164
- fsspec/tests/test_config.py +0 -129
- fsspec/tests/test_core.py +0 -466
- fsspec/tests/test_downstream.py +0 -40
- fsspec/tests/test_file.py +0 -200
- fsspec/tests/test_fuse.py +0 -147
- fsspec/tests/test_generic.py +0 -90
- fsspec/tests/test_gui.py +0 -23
- fsspec/tests/test_mapping.py +0 -228
- fsspec/tests/test_parquet.py +0 -140
- fsspec/tests/test_registry.py +0 -134
- fsspec/tests/test_spec.py +0 -1167
- fsspec/tests/test_utils.py +0 -478
- fsspec-2024.5.0.dist-info/RECORD +0 -111
- {fsspec-2024.5.0.dist-info → fsspec-2024.6.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,197 +0,0 @@
|
|
|
1
|
-
import pickle
|
|
2
|
-
import shlex
|
|
3
|
-
import subprocess
|
|
4
|
-
import time
|
|
5
|
-
|
|
6
|
-
import pytest
|
|
7
|
-
|
|
8
|
-
import fsspec
|
|
9
|
-
|
|
10
|
-
requests = pytest.importorskip("requests")
|
|
11
|
-
|
|
12
|
-
from fsspec.implementations.webhdfs import WebHDFS # noqa: E402
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
@pytest.fixture(scope="module")
|
|
16
|
-
def hdfs_cluster():
|
|
17
|
-
cmd0 = shlex.split("htcluster shutdown")
|
|
18
|
-
try:
|
|
19
|
-
subprocess.check_output(cmd0, stderr=subprocess.STDOUT)
|
|
20
|
-
except FileNotFoundError:
|
|
21
|
-
pytest.skip("htcluster not found")
|
|
22
|
-
except subprocess.CalledProcessError as ex:
|
|
23
|
-
pytest.skip(f"htcluster failed: {ex.output.decode()}")
|
|
24
|
-
cmd1 = shlex.split("htcluster startup --image base")
|
|
25
|
-
subprocess.check_output(cmd1)
|
|
26
|
-
try:
|
|
27
|
-
while True:
|
|
28
|
-
t = 90
|
|
29
|
-
try:
|
|
30
|
-
requests.get("http://localhost:50070/webhdfs/v1/?op=LISTSTATUS")
|
|
31
|
-
except: # noqa: E722
|
|
32
|
-
t -= 1
|
|
33
|
-
assert t > 0, "Timeout waiting for HDFS"
|
|
34
|
-
time.sleep(1)
|
|
35
|
-
continue
|
|
36
|
-
break
|
|
37
|
-
time.sleep(7)
|
|
38
|
-
yield "localhost"
|
|
39
|
-
finally:
|
|
40
|
-
subprocess.check_output(cmd0)
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
def test_pickle(hdfs_cluster):
|
|
44
|
-
w = WebHDFS(hdfs_cluster, user="testuser")
|
|
45
|
-
w2 = pickle.loads(pickle.dumps(w))
|
|
46
|
-
assert w == w2
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
def test_simple(hdfs_cluster):
|
|
50
|
-
w = WebHDFS(hdfs_cluster, user="testuser")
|
|
51
|
-
home = w.home_directory()
|
|
52
|
-
assert home == "/user/testuser"
|
|
53
|
-
with pytest.raises(PermissionError):
|
|
54
|
-
w.mkdir("/root")
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
def test_url(hdfs_cluster):
|
|
58
|
-
url = "webhdfs://testuser@localhost:50070/user/testuser/myfile"
|
|
59
|
-
fo = fsspec.open(url, "wb", data_proxy={"worker.example.com": "localhost"})
|
|
60
|
-
with fo as f:
|
|
61
|
-
f.write(b"hello")
|
|
62
|
-
fo = fsspec.open(url, "rb", data_proxy={"worker.example.com": "localhost"})
|
|
63
|
-
with fo as f:
|
|
64
|
-
assert f.read() == b"hello"
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
def test_workflow(hdfs_cluster):
|
|
68
|
-
w = WebHDFS(
|
|
69
|
-
hdfs_cluster, user="testuser", data_proxy={"worker.example.com": "localhost"}
|
|
70
|
-
)
|
|
71
|
-
fn = "/user/testuser/testrun/afile"
|
|
72
|
-
w.mkdir("/user/testuser/testrun")
|
|
73
|
-
with w.open(fn, "wb") as f:
|
|
74
|
-
f.write(b"hello")
|
|
75
|
-
assert w.exists(fn)
|
|
76
|
-
info = w.info(fn)
|
|
77
|
-
assert info["size"] == 5
|
|
78
|
-
assert w.isfile(fn)
|
|
79
|
-
assert w.cat(fn) == b"hello"
|
|
80
|
-
w.rm("/user/testuser/testrun", recursive=True)
|
|
81
|
-
assert not w.exists(fn)
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
def test_with_gzip(hdfs_cluster):
|
|
85
|
-
from gzip import GzipFile
|
|
86
|
-
|
|
87
|
-
w = WebHDFS(
|
|
88
|
-
hdfs_cluster, user="testuser", data_proxy={"worker.example.com": "localhost"}
|
|
89
|
-
)
|
|
90
|
-
fn = "/user/testuser/gzfile"
|
|
91
|
-
with w.open(fn, "wb") as f:
|
|
92
|
-
gf = GzipFile(fileobj=f, mode="w")
|
|
93
|
-
gf.write(b"hello")
|
|
94
|
-
gf.close()
|
|
95
|
-
with w.open(fn, "rb") as f:
|
|
96
|
-
gf = GzipFile(fileobj=f, mode="r")
|
|
97
|
-
assert gf.read() == b"hello"
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
def test_workflow_transaction(hdfs_cluster):
|
|
101
|
-
w = WebHDFS(
|
|
102
|
-
hdfs_cluster, user="testuser", data_proxy={"worker.example.com": "localhost"}
|
|
103
|
-
)
|
|
104
|
-
fn = "/user/testuser/testrun/afile"
|
|
105
|
-
w.mkdirs("/user/testuser/testrun")
|
|
106
|
-
with w.transaction:
|
|
107
|
-
with w.open(fn, "wb") as f:
|
|
108
|
-
f.write(b"hello")
|
|
109
|
-
assert not w.exists(fn)
|
|
110
|
-
assert w.exists(fn)
|
|
111
|
-
assert w.ukey(fn)
|
|
112
|
-
files = w.ls("/user/testuser/testrun", True)
|
|
113
|
-
summ = w.content_summary("/user/testuser/testrun")
|
|
114
|
-
assert summ["length"] == files[0]["size"]
|
|
115
|
-
assert summ["fileCount"] == 1
|
|
116
|
-
|
|
117
|
-
w.rm("/user/testuser/testrun", recursive=True)
|
|
118
|
-
assert not w.exists(fn)
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
def test_webhdfs_cp_file(hdfs_cluster):
|
|
122
|
-
fs = WebHDFS(
|
|
123
|
-
hdfs_cluster, user="testuser", data_proxy={"worker.example.com": "localhost"}
|
|
124
|
-
)
|
|
125
|
-
|
|
126
|
-
src, dst = "/user/testuser/testrun/f1", "/user/testuser/testrun/f2"
|
|
127
|
-
|
|
128
|
-
fs.mkdir("/user/testuser/testrun")
|
|
129
|
-
|
|
130
|
-
with fs.open(src, "wb") as f:
|
|
131
|
-
f.write(b"hello")
|
|
132
|
-
|
|
133
|
-
fs.cp_file(src, dst)
|
|
134
|
-
|
|
135
|
-
assert fs.exists(src)
|
|
136
|
-
assert fs.exists(dst)
|
|
137
|
-
assert fs.cat(src) == fs.cat(dst)
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
def test_path_with_equals(hdfs_cluster):
|
|
141
|
-
fs = WebHDFS(
|
|
142
|
-
hdfs_cluster, user="testuser", data_proxy={"worker.example.com": "localhost"}
|
|
143
|
-
)
|
|
144
|
-
path_with_equals = "/user/testuser/some_table/datestamp=2023-11-11"
|
|
145
|
-
|
|
146
|
-
fs.mkdir(path_with_equals)
|
|
147
|
-
|
|
148
|
-
result = fs.ls(path_with_equals)
|
|
149
|
-
assert result is not None
|
|
150
|
-
assert fs.exists(path_with_equals)
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
def test_error_handling_with_equals_in_path(hdfs_cluster):
|
|
154
|
-
fs = WebHDFS(hdfs_cluster, user="testuser")
|
|
155
|
-
invalid_path_with_equals = (
|
|
156
|
-
"/user/testuser/some_table/invalid_path=datestamp=2023-11-11"
|
|
157
|
-
)
|
|
158
|
-
|
|
159
|
-
with pytest.raises(FileNotFoundError):
|
|
160
|
-
fs.ls(invalid_path_with_equals)
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
def test_create_and_touch_file_with_equals(hdfs_cluster):
|
|
164
|
-
fs = WebHDFS(
|
|
165
|
-
hdfs_cluster,
|
|
166
|
-
user="testuser",
|
|
167
|
-
data_proxy={"worker.example.com": "localhost"},
|
|
168
|
-
)
|
|
169
|
-
base_path = "/user/testuser/some_table/datestamp=2023-11-11"
|
|
170
|
-
file_path = f"{base_path}/testfile.txt"
|
|
171
|
-
|
|
172
|
-
fs.mkdir(base_path)
|
|
173
|
-
fs.touch(file_path, "wb")
|
|
174
|
-
assert fs.exists(file_path)
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
def test_write_read_verify_file_with_equals(hdfs_cluster):
|
|
178
|
-
fs = WebHDFS(
|
|
179
|
-
hdfs_cluster,
|
|
180
|
-
user="testuser",
|
|
181
|
-
data_proxy={"worker.example.com": "localhost"},
|
|
182
|
-
)
|
|
183
|
-
base_path = "/user/testuser/some_table/datestamp=2023-11-11"
|
|
184
|
-
file_path = f"{base_path}/testfile.txt"
|
|
185
|
-
content = b"This is some content!"
|
|
186
|
-
|
|
187
|
-
fs.mkdir(base_path)
|
|
188
|
-
with fs.open(file_path, "wb") as f:
|
|
189
|
-
f.write(content)
|
|
190
|
-
|
|
191
|
-
with fs.open(file_path, "rb") as f:
|
|
192
|
-
assert f.read() == content
|
|
193
|
-
|
|
194
|
-
file_info = fs.ls(base_path, detail=True)
|
|
195
|
-
assert len(file_info) == 1
|
|
196
|
-
assert file_info[0]["name"] == file_path
|
|
197
|
-
assert file_info[0]["size"] == len(content)
|
|
@@ -1,134 +0,0 @@
|
|
|
1
|
-
import collections.abc
|
|
2
|
-
import os.path
|
|
3
|
-
|
|
4
|
-
import pytest
|
|
5
|
-
|
|
6
|
-
import fsspec
|
|
7
|
-
from fsspec.implementations.tests.test_archive import archive_data, tempzip
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
def test_info():
|
|
11
|
-
with tempzip(archive_data) as z:
|
|
12
|
-
fs = fsspec.filesystem("zip", fo=z)
|
|
13
|
-
|
|
14
|
-
# Iterate over all files.
|
|
15
|
-
for f in archive_data:
|
|
16
|
-
lhs = fs.info(f)
|
|
17
|
-
|
|
18
|
-
# Probe some specific fields of Zip archives.
|
|
19
|
-
assert "CRC" in lhs
|
|
20
|
-
assert "compress_size" in lhs
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
def test_fsspec_get_mapper():
|
|
24
|
-
"""Added for #788"""
|
|
25
|
-
|
|
26
|
-
with tempzip(archive_data) as z:
|
|
27
|
-
mapping = fsspec.get_mapper(f"zip::{z}")
|
|
28
|
-
|
|
29
|
-
assert isinstance(mapping, collections.abc.Mapping)
|
|
30
|
-
keys = sorted(mapping.keys())
|
|
31
|
-
assert keys == ["a", "b", "deeply/nested/path"]
|
|
32
|
-
|
|
33
|
-
# mapping.getitems() will call FSMap.fs.cat()
|
|
34
|
-
# which was not accurately implemented for zip.
|
|
35
|
-
assert isinstance(mapping, fsspec.mapping.FSMap)
|
|
36
|
-
items = dict(mapping.getitems(keys))
|
|
37
|
-
assert items == {"a": b"", "b": b"hello", "deeply/nested/path": b"stuff"}
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
def test_not_cached():
|
|
41
|
-
with tempzip(archive_data) as z:
|
|
42
|
-
fs = fsspec.filesystem("zip", fo=z)
|
|
43
|
-
fs2 = fsspec.filesystem("zip", fo=z)
|
|
44
|
-
assert fs is not fs2
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
def test_root_info():
|
|
48
|
-
with tempzip(archive_data) as z:
|
|
49
|
-
fs = fsspec.filesystem("zip", fo=z)
|
|
50
|
-
assert fs.info("/") == {"name": "", "type": "directory", "size": 0}
|
|
51
|
-
assert fs.info("") == {"name": "", "type": "directory", "size": 0}
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
def test_write_seek(m):
|
|
55
|
-
with m.open("afile.zip", "wb") as f:
|
|
56
|
-
fs = fsspec.filesystem("zip", fo=f, mode="w")
|
|
57
|
-
fs.pipe("another", b"hi")
|
|
58
|
-
fs.zip.close()
|
|
59
|
-
|
|
60
|
-
with m.open("afile.zip", "rb") as f:
|
|
61
|
-
fs = fsspec.filesystem("zip", fo=f)
|
|
62
|
-
assert fs.cat("another") == b"hi"
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
def test_rw(m):
|
|
66
|
-
# extra arg to zip means "create archive"
|
|
67
|
-
with fsspec.open(
|
|
68
|
-
"zip://afile::memory://out.zip", mode="wb", zip={"mode": "w"}
|
|
69
|
-
) as f:
|
|
70
|
-
f.write(b"data")
|
|
71
|
-
|
|
72
|
-
with fsspec.open("zip://afile::memory://out.zip", mode="rb") as f:
|
|
73
|
-
assert f.read() == b"data"
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
def test_mapper(m):
|
|
77
|
-
# extra arg to zip means "create archive"
|
|
78
|
-
mapper = fsspec.get_mapper("zip::memory://out.zip", zip={"mode": "w"})
|
|
79
|
-
with pytest.raises(KeyError):
|
|
80
|
-
mapper["a"]
|
|
81
|
-
|
|
82
|
-
mapper["a"] = b"data"
|
|
83
|
-
with pytest.raises(OSError):
|
|
84
|
-
# fails because this is write mode and we cannot also read
|
|
85
|
-
mapper["a"]
|
|
86
|
-
assert "a" in mapper # but be can list
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
def test_zip_glob_star(m):
|
|
90
|
-
with fsspec.open(
|
|
91
|
-
"zip://adir/afile::memory://out.zip", mode="wb", zip={"mode": "w"}
|
|
92
|
-
) as f:
|
|
93
|
-
f.write(b"data")
|
|
94
|
-
|
|
95
|
-
fs, _ = fsspec.core.url_to_fs("zip::memory://out.zip")
|
|
96
|
-
outfiles = fs.glob("*")
|
|
97
|
-
assert len(outfiles) == 1
|
|
98
|
-
|
|
99
|
-
fs = fsspec.filesystem("zip", fo="memory://out.zip", mode="w")
|
|
100
|
-
fs.mkdir("adir")
|
|
101
|
-
fs.pipe("adir/afile", b"data")
|
|
102
|
-
outfiles = fs.glob("*")
|
|
103
|
-
assert len(outfiles) == 1
|
|
104
|
-
|
|
105
|
-
fn = f"{os.path.dirname(os.path.abspath((__file__)))}/out.zip"
|
|
106
|
-
fs = fsspec.filesystem("zip", fo=fn, mode="r")
|
|
107
|
-
outfiles = fs.glob("*")
|
|
108
|
-
assert len(outfiles) == 1
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
def test_append(m, tmpdir):
|
|
112
|
-
fs = fsspec.filesystem("zip", fo="memory://out.zip", mode="w")
|
|
113
|
-
with fs.open("afile", "wb") as f:
|
|
114
|
-
f.write(b"data")
|
|
115
|
-
fs.close()
|
|
116
|
-
|
|
117
|
-
fs = fsspec.filesystem("zip", fo="memory://out.zip", mode="a")
|
|
118
|
-
with fs.open("bfile", "wb") as f:
|
|
119
|
-
f.write(b"data")
|
|
120
|
-
fs.close()
|
|
121
|
-
|
|
122
|
-
assert len(fsspec.open_files("zip://*::memory://out.zip")) == 2
|
|
123
|
-
|
|
124
|
-
fs = fsspec.filesystem("zip", fo=f"{tmpdir}/out.zip", mode="w")
|
|
125
|
-
with fs.open("afile", "wb") as f:
|
|
126
|
-
f.write(b"data")
|
|
127
|
-
fs.close()
|
|
128
|
-
|
|
129
|
-
fs = fsspec.filesystem("zip", fo=f"{tmpdir}/out.zip", mode="a")
|
|
130
|
-
with fs.open("bfile", "wb") as f:
|
|
131
|
-
f.write(b"data")
|
|
132
|
-
fs.close()
|
|
133
|
-
|
|
134
|
-
assert len(fsspec.open_files("zip://*::memory://out.zip")) == 2
|
fsspec/tests/__init__.py
DELETED
|
File without changes
|
fsspec/tests/conftest.py
DELETED
|
@@ -1,188 +0,0 @@
|
|
|
1
|
-
import contextlib
|
|
2
|
-
import gzip
|
|
3
|
-
import json
|
|
4
|
-
import os
|
|
5
|
-
import threading
|
|
6
|
-
from collections import ChainMap
|
|
7
|
-
from http.server import BaseHTTPRequestHandler, HTTPServer
|
|
8
|
-
|
|
9
|
-
import pytest
|
|
10
|
-
|
|
11
|
-
requests = pytest.importorskip("requests")
|
|
12
|
-
port = 9898
|
|
13
|
-
data = b"\n".join([b"some test data"] * 1000)
|
|
14
|
-
realfile = f"http://127.0.0.1:{port}/index/realfile"
|
|
15
|
-
index = b'<a href="%s">Link</a>' % realfile.encode()
|
|
16
|
-
listing = open(
|
|
17
|
-
os.path.join(os.path.dirname(__file__), "data", "listing.html"), "rb"
|
|
18
|
-
).read()
|
|
19
|
-
win = os.name == "nt"
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
def _make_listing(*paths):
|
|
23
|
-
return "\n".join(
|
|
24
|
-
f'<a href="http://127.0.0.1:{port}{f}">Link_{i}</a>'
|
|
25
|
-
for i, f in enumerate(paths)
|
|
26
|
-
).encode()
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
@pytest.fixture
|
|
30
|
-
def reset_files():
|
|
31
|
-
yield
|
|
32
|
-
|
|
33
|
-
# Reset the newly added files after the
|
|
34
|
-
# test is completed.
|
|
35
|
-
HTTPTestHandler.dynamic_files.clear()
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
class HTTPTestHandler(BaseHTTPRequestHandler):
|
|
39
|
-
static_files = {
|
|
40
|
-
"/index/realfile": data,
|
|
41
|
-
"/index/otherfile": data,
|
|
42
|
-
"/index": index,
|
|
43
|
-
"/data/20020401": listing,
|
|
44
|
-
"/simple/": _make_listing("/simple/file", "/simple/dir/"),
|
|
45
|
-
"/simple/file": data,
|
|
46
|
-
"/simple/dir/": _make_listing("/simple/dir/file"),
|
|
47
|
-
"/simple/dir/file": data,
|
|
48
|
-
}
|
|
49
|
-
dynamic_files = {}
|
|
50
|
-
|
|
51
|
-
files = ChainMap(dynamic_files, static_files)
|
|
52
|
-
|
|
53
|
-
def __init__(self, *args, **kwargs):
|
|
54
|
-
super().__init__(*args, **kwargs)
|
|
55
|
-
|
|
56
|
-
def _respond(self, code=200, headers=None, data=b""):
|
|
57
|
-
headers = headers or {}
|
|
58
|
-
headers.update({"User-Agent": "test"})
|
|
59
|
-
self.send_response(code)
|
|
60
|
-
for k, v in headers.items():
|
|
61
|
-
self.send_header(k, str(v))
|
|
62
|
-
self.end_headers()
|
|
63
|
-
if data:
|
|
64
|
-
self.wfile.write(data)
|
|
65
|
-
|
|
66
|
-
def do_GET(self):
|
|
67
|
-
file_path = self.path
|
|
68
|
-
if file_path.endswith("/") and file_path.rstrip("/") in self.files:
|
|
69
|
-
file_path = file_path.rstrip("/")
|
|
70
|
-
file_data = self.files.get(file_path)
|
|
71
|
-
if "give_path" in self.headers:
|
|
72
|
-
return self._respond(200, data=json.dumps({"path": self.path}).encode())
|
|
73
|
-
if "redirect" in self.headers and file_path != "/index/realfile":
|
|
74
|
-
new_url = f"http://127.0.0.1:{port}/index/realfile"
|
|
75
|
-
return self._respond(301, {"Location": new_url})
|
|
76
|
-
if file_data is None:
|
|
77
|
-
return self._respond(404)
|
|
78
|
-
|
|
79
|
-
status = 200
|
|
80
|
-
content_range = f"bytes 0-{len(file_data) - 1}/{len(file_data)}"
|
|
81
|
-
if ("Range" in self.headers) and ("ignore_range" not in self.headers):
|
|
82
|
-
ran = self.headers["Range"]
|
|
83
|
-
b, ran = ran.split("=")
|
|
84
|
-
start, end = ran.split("-")
|
|
85
|
-
if start:
|
|
86
|
-
content_range = f"bytes {start}-{end}/{len(file_data)}"
|
|
87
|
-
file_data = file_data[int(start) : (int(end) + 1) if end else None]
|
|
88
|
-
else:
|
|
89
|
-
# suffix only
|
|
90
|
-
l = len(file_data)
|
|
91
|
-
content_range = f"bytes {l - int(end)}-{l - 1}/{l}"
|
|
92
|
-
file_data = file_data[-int(end) :]
|
|
93
|
-
if "use_206" in self.headers:
|
|
94
|
-
status = 206
|
|
95
|
-
if "give_length" in self.headers:
|
|
96
|
-
if "gzip_encoding" in self.headers:
|
|
97
|
-
file_data = gzip.compress(file_data)
|
|
98
|
-
response_headers = {
|
|
99
|
-
"Content-Length": len(file_data),
|
|
100
|
-
"Content-Encoding": "gzip",
|
|
101
|
-
}
|
|
102
|
-
else:
|
|
103
|
-
response_headers = {"Content-Length": len(file_data)}
|
|
104
|
-
self._respond(status, response_headers, file_data)
|
|
105
|
-
elif "give_range" in self.headers:
|
|
106
|
-
self._respond(status, {"Content-Range": content_range}, file_data)
|
|
107
|
-
elif "give_mimetype" in self.headers:
|
|
108
|
-
self._respond(
|
|
109
|
-
status, {"Content-Type": "text/html; charset=utf-8"}, file_data
|
|
110
|
-
)
|
|
111
|
-
else:
|
|
112
|
-
self._respond(status, data=file_data)
|
|
113
|
-
|
|
114
|
-
def do_POST(self):
|
|
115
|
-
length = self.headers.get("Content-Length")
|
|
116
|
-
file_path = self.path.rstrip("/")
|
|
117
|
-
if length is None:
|
|
118
|
-
assert self.headers.get("Transfer-Encoding") == "chunked"
|
|
119
|
-
self.files[file_path] = b"".join(self.read_chunks())
|
|
120
|
-
else:
|
|
121
|
-
self.files[file_path] = self.rfile.read(length)
|
|
122
|
-
self._respond(200)
|
|
123
|
-
|
|
124
|
-
do_PUT = do_POST
|
|
125
|
-
|
|
126
|
-
def read_chunks(self):
|
|
127
|
-
length = -1
|
|
128
|
-
while length != 0:
|
|
129
|
-
line = self.rfile.readline().strip()
|
|
130
|
-
if len(line) == 0:
|
|
131
|
-
length = 0
|
|
132
|
-
else:
|
|
133
|
-
length = int(line, 16)
|
|
134
|
-
yield self.rfile.read(length)
|
|
135
|
-
self.rfile.readline()
|
|
136
|
-
|
|
137
|
-
def do_HEAD(self):
|
|
138
|
-
if "head_not_auth" in self.headers:
|
|
139
|
-
return self._respond(
|
|
140
|
-
403, {"Content-Length": 123}, b"not authorized for HEAD request"
|
|
141
|
-
)
|
|
142
|
-
elif "head_ok" not in self.headers:
|
|
143
|
-
return self._respond(405)
|
|
144
|
-
|
|
145
|
-
file_path = self.path.rstrip("/")
|
|
146
|
-
file_data = self.files.get(file_path)
|
|
147
|
-
if file_data is None:
|
|
148
|
-
return self._respond(404)
|
|
149
|
-
|
|
150
|
-
if ("give_length" in self.headers) or ("head_give_length" in self.headers):
|
|
151
|
-
response_headers = {"Content-Length": len(file_data)}
|
|
152
|
-
if "zero_length" in self.headers:
|
|
153
|
-
response_headers["Content-Length"] = 0
|
|
154
|
-
elif "gzip_encoding" in self.headers:
|
|
155
|
-
file_data = gzip.compress(file_data)
|
|
156
|
-
response_headers["Content-Encoding"] = "gzip"
|
|
157
|
-
response_headers["Content-Length"] = len(file_data)
|
|
158
|
-
|
|
159
|
-
self._respond(200, response_headers)
|
|
160
|
-
elif "give_range" in self.headers:
|
|
161
|
-
self._respond(
|
|
162
|
-
200, {"Content-Range": f"0-{len(file_data) - 1}/{len(file_data)}"}
|
|
163
|
-
)
|
|
164
|
-
elif "give_etag" in self.headers:
|
|
165
|
-
self._respond(200, {"ETag": "xxx"})
|
|
166
|
-
else:
|
|
167
|
-
self._respond(200) # OK response, but no useful info
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
@contextlib.contextmanager
|
|
171
|
-
def serve():
|
|
172
|
-
server_address = ("", port)
|
|
173
|
-
httpd = HTTPServer(server_address, HTTPTestHandler)
|
|
174
|
-
th = threading.Thread(target=httpd.serve_forever)
|
|
175
|
-
th.daemon = True
|
|
176
|
-
th.start()
|
|
177
|
-
try:
|
|
178
|
-
yield f"http://127.0.0.1:{port}"
|
|
179
|
-
finally:
|
|
180
|
-
httpd.socket.close()
|
|
181
|
-
httpd.shutdown()
|
|
182
|
-
th.join()
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
@pytest.fixture(scope="module")
|
|
186
|
-
def server():
|
|
187
|
-
with serve() as s:
|
|
188
|
-
yield s
|
fsspec/tests/data/listing.html
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
\n<html><head><title>nasagrace.unl.edu - /data/20020401/</title></head><body><H1>nasagrace.unl.edu - /data/20020401/</H1><hr>\n\n<pre><A HREF="/data/">[To Parent Directory]</A><br><br> 1/27/2020 9:54 AM 1194073 <A HREF="/data/20020401/GRACE_GWS_20020401.pdf">GRACE_GWS_20020401.pdf</A><br> 1/27/2020 9:54 AM 380043 <A HREF="/data/20020401/GRACE_GWS_20020401.png">GRACE_GWS_20020401.png</A><br> 1/27/2020 9:54 AM 1192987 <A HREF="/data/20020401/GRACE_RTZSM_20020401.pdf">GRACE_RTZSM_20020401.pdf</A><br> 1/27/2020 9:54 AM 384342 <A HREF="/data/20020401/GRACE_RTZSM_20020401.png">GRACE_RTZSM_20020401.png</A><br> 1/27/2020 9:55 AM 1202046 <A HREF="/data/20020401/GRACE_SFSM_20020401.pdf">GRACE_SFSM_20020401.pdf</A><br> 1/27/2020 9:55 AM 387932 <A HREF="/data/20020401/GRACE_SFSM_20020401.png">GRACE_SFSM_20020401.png</A><br> 1/27/2020 9:54 AM 4975980 <A HREF="/data/20020401/GRACEDADM_CLSM0125US_7D.A20020401.030.nc4">GRACEDADM_CLSM0125US_7D.A20020401.030.nc4</A><br> 1/27/2020 9:54 AM 345640 <A HREF="/data/20020401/gws_perc_0125deg_US_20020401.img">gws_perc_0125deg_US_20020401.img</A><br> 1/27/2020 9:54 AM 2272 <A HREF="/data/20020401/gws_perc_0125deg_US_20020401.img.aux.xml">gws_perc_0125deg_US_20020401.img.aux.xml</A><br> 1/27/2020 9:54 AM 5678 <A HREF="/data/20020401/gws_perc_0125deg_US_20020401.img.xml">gws_perc_0125deg_US_20020401.img.xml</A><br> 1/27/2020 9:54 AM 136081 <A HREF="/data/20020401/gws_perc_0125deg_US_20020401.rrd">gws_perc_0125deg_US_20020401.rrd</A><br> 1/27/2020 9:54 AM 83
|