fsspec 2024.3.1__py3-none-any.whl → 2024.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fsspec/__init__.py +2 -3
- fsspec/_version.py +14 -19
- fsspec/caching.py +83 -14
- fsspec/compression.py +1 -0
- fsspec/core.py +32 -8
- fsspec/exceptions.py +1 -0
- fsspec/generic.py +1 -1
- fsspec/gui.py +1 -1
- fsspec/implementations/arrow.py +0 -2
- fsspec/implementations/cache_mapper.py +1 -2
- fsspec/implementations/cache_metadata.py +7 -7
- fsspec/implementations/dirfs.py +2 -2
- fsspec/implementations/http.py +9 -9
- fsspec/implementations/local.py +78 -45
- fsspec/implementations/memory.py +9 -0
- fsspec/implementations/smb.py +3 -1
- fsspec/implementations/tests/__init__.py +0 -0
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_file_listing.yaml +112 -0
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_mkdir.yaml +582 -0
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_read_pyarrow_non_partitioned.yaml +873 -0
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_read_range.yaml +458 -0
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_read_range_chunked.yaml +1355 -0
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_write_and_read.yaml +795 -0
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_write_pyarrow_non_partitioned.yaml +613 -0
- fsspec/implementations/tests/conftest.py +39 -0
- fsspec/implementations/tests/local/__init__.py +0 -0
- fsspec/implementations/tests/local/local_fixtures.py +18 -0
- fsspec/implementations/tests/local/local_test.py +14 -0
- fsspec/implementations/tests/memory/__init__.py +0 -0
- fsspec/implementations/tests/memory/memory_fixtures.py +27 -0
- fsspec/implementations/tests/memory/memory_test.py +14 -0
- fsspec/implementations/tests/out.zip +0 -0
- fsspec/implementations/tests/test_archive.py +382 -0
- fsspec/implementations/tests/test_arrow.py +259 -0
- fsspec/implementations/tests/test_cached.py +1306 -0
- fsspec/implementations/tests/test_common.py +35 -0
- fsspec/implementations/tests/test_dask.py +29 -0
- fsspec/implementations/tests/test_data.py +20 -0
- fsspec/implementations/tests/test_dbfs.py +268 -0
- fsspec/implementations/tests/test_dirfs.py +588 -0
- fsspec/implementations/tests/test_ftp.py +178 -0
- fsspec/implementations/tests/test_git.py +76 -0
- fsspec/implementations/tests/test_http.py +577 -0
- fsspec/implementations/tests/test_jupyter.py +57 -0
- fsspec/implementations/tests/test_libarchive.py +33 -0
- fsspec/implementations/tests/test_local.py +1285 -0
- fsspec/implementations/tests/test_memory.py +382 -0
- fsspec/implementations/tests/test_reference.py +720 -0
- fsspec/implementations/tests/test_sftp.py +233 -0
- fsspec/implementations/tests/test_smb.py +139 -0
- fsspec/implementations/tests/test_tar.py +243 -0
- fsspec/implementations/tests/test_webhdfs.py +197 -0
- fsspec/implementations/tests/test_zip.py +134 -0
- fsspec/implementations/webhdfs.py +1 -3
- fsspec/parquet.py +0 -8
- fsspec/registry.py +4 -0
- fsspec/spec.py +21 -4
- fsspec/tests/__init__.py +0 -0
- fsspec/tests/abstract/mv.py +57 -0
- fsspec/tests/conftest.py +188 -0
- fsspec/tests/data/listing.html +1 -0
- fsspec/tests/test_api.py +498 -0
- fsspec/tests/test_async.py +230 -0
- fsspec/tests/test_caches.py +255 -0
- fsspec/tests/test_callbacks.py +89 -0
- fsspec/tests/test_compression.py +164 -0
- fsspec/tests/test_config.py +129 -0
- fsspec/tests/test_core.py +466 -0
- fsspec/tests/test_downstream.py +40 -0
- fsspec/tests/test_file.py +200 -0
- fsspec/tests/test_fuse.py +147 -0
- fsspec/tests/test_generic.py +90 -0
- fsspec/tests/test_gui.py +23 -0
- fsspec/tests/test_mapping.py +228 -0
- fsspec/tests/test_parquet.py +140 -0
- fsspec/tests/test_registry.py +134 -0
- fsspec/tests/test_spec.py +1167 -0
- fsspec/tests/test_utils.py +478 -0
- fsspec/utils.py +0 -2
- fsspec-2024.5.0.dist-info/METADATA +273 -0
- fsspec-2024.5.0.dist-info/RECORD +111 -0
- {fsspec-2024.3.1.dist-info → fsspec-2024.5.0.dist-info}/WHEEL +1 -2
- fsspec-2024.3.1.dist-info/METADATA +0 -167
- fsspec-2024.3.1.dist-info/RECORD +0 -54
- fsspec-2024.3.1.dist-info/top_level.txt +0 -1
- {fsspec-2024.3.1.dist-info → fsspec-2024.5.0.dist-info/licenses}/LICENSE +0 -0
|
@@ -0,0 +1,577 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import io
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
import sys
|
|
6
|
+
import time
|
|
7
|
+
|
|
8
|
+
import aiohttp
|
|
9
|
+
import pytest
|
|
10
|
+
|
|
11
|
+
import fsspec.asyn
|
|
12
|
+
import fsspec.utils
|
|
13
|
+
from fsspec.implementations.http import HTTPStreamFile
|
|
14
|
+
from fsspec.tests.conftest import data, reset_files, server, win # noqa: F401
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def test_list(server):
|
|
18
|
+
h = fsspec.filesystem("http")
|
|
19
|
+
out = h.glob(server + "/index/*")
|
|
20
|
+
assert out == [server + "/index/realfile"]
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def test_list_invalid_args(server):
|
|
24
|
+
with pytest.raises(TypeError):
|
|
25
|
+
h = fsspec.filesystem("http", use_foobar=True)
|
|
26
|
+
h.glob(server + "/index/*")
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def test_list_cache(server):
|
|
30
|
+
h = fsspec.filesystem("http", use_listings_cache=True)
|
|
31
|
+
out = h.glob(server + "/index/*")
|
|
32
|
+
assert out == [server + "/index/realfile"]
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def test_list_cache_with_expiry_time_cached(server):
|
|
36
|
+
h = fsspec.filesystem("http", use_listings_cache=True, listings_expiry_time=30)
|
|
37
|
+
|
|
38
|
+
# First, the directory cache is not initialized.
|
|
39
|
+
assert not h.dircache
|
|
40
|
+
|
|
41
|
+
# By querying the filesystem with "use_listings_cache=True",
|
|
42
|
+
# the cache will automatically get populated.
|
|
43
|
+
out = h.glob(server + "/index/*")
|
|
44
|
+
assert out == [server + "/index/realfile"]
|
|
45
|
+
|
|
46
|
+
# Verify cache content.
|
|
47
|
+
assert len(h.dircache) == 1
|
|
48
|
+
|
|
49
|
+
out = h.glob(server + "/index/*")
|
|
50
|
+
assert out == [server + "/index/realfile"]
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def test_list_cache_with_expiry_time_purged(server):
|
|
54
|
+
h = fsspec.filesystem("http", use_listings_cache=True, listings_expiry_time=0.3)
|
|
55
|
+
|
|
56
|
+
# First, the directory cache is not initialized.
|
|
57
|
+
assert not h.dircache
|
|
58
|
+
|
|
59
|
+
# By querying the filesystem with "use_listings_cache=True",
|
|
60
|
+
# the cache will automatically get populated.
|
|
61
|
+
out = h.glob(server + "/index/*")
|
|
62
|
+
assert out == [server + "/index/realfile"]
|
|
63
|
+
assert len(h.dircache) == 1
|
|
64
|
+
|
|
65
|
+
# Verify cache content.
|
|
66
|
+
assert server + "/index/" in h.dircache
|
|
67
|
+
assert len(h.dircache.get(server + "/index/")) == 1
|
|
68
|
+
|
|
69
|
+
# Wait beyond the TTL / cache expiry time.
|
|
70
|
+
time.sleep(0.31)
|
|
71
|
+
|
|
72
|
+
# Verify that the cache item should have been purged.
|
|
73
|
+
cached_items = h.dircache.get(server + "/index/")
|
|
74
|
+
assert cached_items is None
|
|
75
|
+
|
|
76
|
+
# Verify that after clearing the item from the cache,
|
|
77
|
+
# it can get populated again.
|
|
78
|
+
out = h.glob(server + "/index/*")
|
|
79
|
+
assert out == [server + "/index/realfile"]
|
|
80
|
+
cached_items = h.dircache.get(server + "/index/")
|
|
81
|
+
assert len(cached_items) == 1
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def test_list_cache_reuse(server):
|
|
85
|
+
h = fsspec.filesystem("http", use_listings_cache=True, listings_expiry_time=5)
|
|
86
|
+
|
|
87
|
+
# First, the directory cache is not initialized.
|
|
88
|
+
assert not h.dircache
|
|
89
|
+
|
|
90
|
+
# By querying the filesystem with "use_listings_cache=True",
|
|
91
|
+
# the cache will automatically get populated.
|
|
92
|
+
out = h.glob(server + "/index/*")
|
|
93
|
+
assert out == [server + "/index/realfile"]
|
|
94
|
+
|
|
95
|
+
# Verify cache content.
|
|
96
|
+
assert len(h.dircache) == 1
|
|
97
|
+
|
|
98
|
+
# Verify another instance without caching enabled does not have cache content.
|
|
99
|
+
h = fsspec.filesystem("http", use_listings_cache=False)
|
|
100
|
+
assert not h.dircache
|
|
101
|
+
|
|
102
|
+
# Verify that yet another new instance, with caching enabled,
|
|
103
|
+
# will see the same cache content again.
|
|
104
|
+
h = fsspec.filesystem("http", use_listings_cache=True, listings_expiry_time=5)
|
|
105
|
+
assert len(h.dircache) == 1
|
|
106
|
+
|
|
107
|
+
# However, yet another instance with a different expiry time will also not have
|
|
108
|
+
# any valid cache content.
|
|
109
|
+
h = fsspec.filesystem("http", use_listings_cache=True, listings_expiry_time=666)
|
|
110
|
+
assert len(h.dircache) == 0
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def test_ls_raises_filenotfound(server):
|
|
114
|
+
h = fsspec.filesystem("http")
|
|
115
|
+
|
|
116
|
+
with pytest.raises(FileNotFoundError):
|
|
117
|
+
h.ls(server + "/not-a-key")
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def test_list_cache_with_max_paths(server):
|
|
121
|
+
h = fsspec.filesystem("http", use_listings_cache=True, max_paths=5)
|
|
122
|
+
out = h.glob(server + "/index/*")
|
|
123
|
+
assert out == [server + "/index/realfile"]
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def test_list_cache_with_skip_instance_cache(server):
|
|
127
|
+
h = fsspec.filesystem("http", use_listings_cache=True, skip_instance_cache=True)
|
|
128
|
+
out = h.glob(server + "/index/*")
|
|
129
|
+
assert out == [server + "/index/realfile"]
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def test_glob_return_subfolders(server):
|
|
133
|
+
h = fsspec.filesystem("http")
|
|
134
|
+
out = h.glob(server + "/simple/*")
|
|
135
|
+
assert set(out) == {
|
|
136
|
+
server + "/simple/dir/",
|
|
137
|
+
server + "/simple/file",
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def test_isdir(server):
|
|
142
|
+
h = fsspec.filesystem("http")
|
|
143
|
+
assert h.isdir(server + "/index/")
|
|
144
|
+
assert not h.isdir(server + "/index/realfile")
|
|
145
|
+
assert not h.isdir(server + "doesnotevenexist")
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def test_policy_arg(server):
|
|
149
|
+
h = fsspec.filesystem("http", size_policy="get")
|
|
150
|
+
out = h.glob(server + "/index/*")
|
|
151
|
+
assert out == [server + "/index/realfile"]
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def test_exists(server):
|
|
155
|
+
h = fsspec.filesystem("http")
|
|
156
|
+
assert not h.exists(server + "/notafile")
|
|
157
|
+
with pytest.raises(FileNotFoundError):
|
|
158
|
+
h.cat(server + "/notafile")
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def test_read(server):
|
|
162
|
+
h = fsspec.filesystem("http")
|
|
163
|
+
out = server + "/index/realfile"
|
|
164
|
+
with h.open(out, "rb") as f:
|
|
165
|
+
assert f.read() == data
|
|
166
|
+
with h.open(out, "rb", block_size=0) as f:
|
|
167
|
+
assert f.read() == data
|
|
168
|
+
with h.open(out, "rb") as f:
|
|
169
|
+
assert f.read(100) + f.read() == data
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def test_file_pickle(server):
|
|
173
|
+
import pickle
|
|
174
|
+
|
|
175
|
+
# via HTTPFile
|
|
176
|
+
h = fsspec.filesystem("http", headers={"give_length": "true", "head_ok": "true"})
|
|
177
|
+
out = server + "/index/realfile"
|
|
178
|
+
|
|
179
|
+
with fsspec.open(out, headers={"give_length": "true", "head_ok": "true"}) as f:
|
|
180
|
+
pic = pickle.loads(pickle.dumps(f))
|
|
181
|
+
assert pic.read() == data
|
|
182
|
+
|
|
183
|
+
with h.open(out, "rb") as f:
|
|
184
|
+
pic = pickle.dumps(f)
|
|
185
|
+
assert f.read() == data
|
|
186
|
+
with pickle.loads(pic) as f:
|
|
187
|
+
assert f.read() == data
|
|
188
|
+
|
|
189
|
+
# via HTTPStreamFile
|
|
190
|
+
h = fsspec.filesystem("http")
|
|
191
|
+
out = server + "/index/realfile"
|
|
192
|
+
with h.open(out, "rb") as f:
|
|
193
|
+
out = pickle.dumps(f)
|
|
194
|
+
assert f.read() == data
|
|
195
|
+
with pickle.loads(out) as f:
|
|
196
|
+
assert f.read() == data
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def test_methods(server):
|
|
200
|
+
h = fsspec.filesystem("http")
|
|
201
|
+
url = server + "/index/realfile"
|
|
202
|
+
assert h.exists(url)
|
|
203
|
+
assert h.cat(url) == data
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
@pytest.mark.parametrize(
|
|
207
|
+
"headers",
|
|
208
|
+
[
|
|
209
|
+
{},
|
|
210
|
+
{"give_length": "true"},
|
|
211
|
+
{"give_length": "true", "head_ok": "true"},
|
|
212
|
+
{"give_range": "true"},
|
|
213
|
+
{"give_length": "true", "head_not_auth": "true"},
|
|
214
|
+
{"give_range": "true", "head_not_auth": "true"},
|
|
215
|
+
{"use_206": "true", "head_ok": "true", "head_give_length": "true"},
|
|
216
|
+
{"use_206": "true", "give_length": "true"},
|
|
217
|
+
{"use_206": "true", "give_range": "true"},
|
|
218
|
+
],
|
|
219
|
+
)
|
|
220
|
+
def test_random_access(server, headers):
|
|
221
|
+
h = fsspec.filesystem("http", headers=headers)
|
|
222
|
+
url = server + "/index/realfile"
|
|
223
|
+
with h.open(url, "rb") as f:
|
|
224
|
+
if headers:
|
|
225
|
+
assert f.size == len(data)
|
|
226
|
+
assert f.read(5) == data[:5]
|
|
227
|
+
|
|
228
|
+
if headers:
|
|
229
|
+
f.seek(5, 1)
|
|
230
|
+
assert f.read(5) == data[10:15]
|
|
231
|
+
else:
|
|
232
|
+
with pytest.raises(ValueError):
|
|
233
|
+
f.seek(5, 1)
|
|
234
|
+
assert f.closed
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
@pytest.mark.parametrize(
|
|
238
|
+
"headers",
|
|
239
|
+
[
|
|
240
|
+
{"ignore_range": "true", "head_ok": "true", "head_give_length": "true"},
|
|
241
|
+
{"ignore_range": "true", "give_length": "true"},
|
|
242
|
+
{"ignore_range": "true", "give_range": "true"},
|
|
243
|
+
],
|
|
244
|
+
)
|
|
245
|
+
def test_no_range_support(server, headers):
|
|
246
|
+
h = fsspec.filesystem("http", headers=headers)
|
|
247
|
+
url = server + "/index/realfile"
|
|
248
|
+
with h.open(url, "rb") as f:
|
|
249
|
+
# Random access is not possible if the server doesn't respect Range
|
|
250
|
+
f.seek(5)
|
|
251
|
+
with pytest.raises(ValueError):
|
|
252
|
+
f.read(10)
|
|
253
|
+
|
|
254
|
+
# Reading from the beginning should still work
|
|
255
|
+
f.seek(0)
|
|
256
|
+
assert f.read(10) == data[:10]
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
def test_stream_seek(server):
|
|
260
|
+
h = fsspec.filesystem("http")
|
|
261
|
+
url = server + "/index/realfile"
|
|
262
|
+
with h.open(url, "rb") as f:
|
|
263
|
+
f.seek(0) # is OK
|
|
264
|
+
data1 = f.read(5)
|
|
265
|
+
assert len(data1) == 5
|
|
266
|
+
f.seek(5)
|
|
267
|
+
f.seek(0, 1)
|
|
268
|
+
data2 = f.read()
|
|
269
|
+
assert data1 + data2 == data
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def test_mapper_url(server):
|
|
273
|
+
h = fsspec.filesystem("http")
|
|
274
|
+
mapper = h.get_mapper(server + "/index/")
|
|
275
|
+
assert mapper.root.startswith("http:")
|
|
276
|
+
assert list(mapper)
|
|
277
|
+
|
|
278
|
+
mapper2 = fsspec.get_mapper(server + "/index/")
|
|
279
|
+
assert mapper2.root.startswith("http:")
|
|
280
|
+
assert list(mapper) == list(mapper2)
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def test_content_length_zero(server):
|
|
284
|
+
h = fsspec.filesystem(
|
|
285
|
+
"http", headers={"give_length": "true", "zero_length": "true"}
|
|
286
|
+
)
|
|
287
|
+
url = server + "/index/realfile"
|
|
288
|
+
|
|
289
|
+
with h.open(url, "rb") as f:
|
|
290
|
+
assert f.read() == data
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
def test_content_encoding_gzip(server):
|
|
294
|
+
h = fsspec.filesystem(
|
|
295
|
+
"http", headers={"give_length": "true", "gzip_encoding": "true"}
|
|
296
|
+
)
|
|
297
|
+
url = server + "/index/realfile"
|
|
298
|
+
|
|
299
|
+
with h.open(url, "rb") as f:
|
|
300
|
+
assert isinstance(f, HTTPStreamFile)
|
|
301
|
+
assert f.size is None
|
|
302
|
+
assert f.read() == data
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
def test_download(server, tmpdir):
|
|
306
|
+
h = fsspec.filesystem("http", headers={"give_length": "true", "head_ok": "true "})
|
|
307
|
+
url = server + "/index/realfile"
|
|
308
|
+
fn = os.path.join(tmpdir, "afile")
|
|
309
|
+
h.get(url, fn)
|
|
310
|
+
assert open(fn, "rb").read() == data
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
def test_multi_download(server, tmpdir):
|
|
314
|
+
h = fsspec.filesystem("http", headers={"give_length": "true", "head_ok": "true "})
|
|
315
|
+
urla = server + "/index/realfile"
|
|
316
|
+
urlb = server + "/index/otherfile"
|
|
317
|
+
fna = os.path.join(tmpdir, "afile")
|
|
318
|
+
fnb = os.path.join(tmpdir, "bfile")
|
|
319
|
+
h.get([urla, urlb], [fna, fnb])
|
|
320
|
+
assert open(fna, "rb").read() == data
|
|
321
|
+
assert open(fnb, "rb").read() == data
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
def test_ls(server):
|
|
325
|
+
h = fsspec.filesystem("http")
|
|
326
|
+
l = h.ls(server + "/data/20020401/", detail=False)
|
|
327
|
+
nc = server + "/data/20020401/GRACEDADM_CLSM0125US_7D.A20020401.030.nc4"
|
|
328
|
+
assert nc in l
|
|
329
|
+
assert len(l) == 11
|
|
330
|
+
assert all(u["type"] == "file" for u in h.ls(server + "/data/20020401/"))
|
|
331
|
+
assert h.glob(server + "/data/20020401/*.nc4") == [nc]
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
def test_mcat(server):
|
|
335
|
+
h = fsspec.filesystem("http", headers={"give_length": "true", "head_ok": "true "})
|
|
336
|
+
urla = server + "/index/realfile"
|
|
337
|
+
urlb = server + "/index/otherfile"
|
|
338
|
+
out = h.cat([urla, urlb])
|
|
339
|
+
assert out == {urla: data, urlb: data}
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
def test_cat_file_range(server):
|
|
343
|
+
h = fsspec.filesystem("http", headers={"give_length": "true", "head_ok": "true "})
|
|
344
|
+
urla = server + "/index/realfile"
|
|
345
|
+
assert h.cat(urla, start=1, end=10) == data[1:10]
|
|
346
|
+
assert h.cat(urla, start=1) == data[1:]
|
|
347
|
+
|
|
348
|
+
assert h.cat(urla, start=-10) == data[-10:]
|
|
349
|
+
assert h.cat(urla, start=-10, end=-2) == data[-10:-2]
|
|
350
|
+
|
|
351
|
+
assert h.cat(urla, end=-10) == data[:-10]
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
def test_cat_file_range_numpy(server):
|
|
355
|
+
np = pytest.importorskip("numpy")
|
|
356
|
+
h = fsspec.filesystem("http", headers={"give_length": "true", "head_ok": "true "})
|
|
357
|
+
urla = server + "/index/realfile"
|
|
358
|
+
assert h.cat(urla, start=np.int8(1), end=np.int8(10)) == data[1:10]
|
|
359
|
+
out = h.cat_ranges([urla, urla], starts=np.array([1, 5]), ends=np.array([10, 15]))
|
|
360
|
+
assert out == [data[1:10], data[5:15]]
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
def test_mcat_cache(server):
|
|
364
|
+
urla = server + "/index/realfile"
|
|
365
|
+
urlb = server + "/index/otherfile"
|
|
366
|
+
fs = fsspec.filesystem("simplecache", target_protocol="http")
|
|
367
|
+
assert fs.cat([urla, urlb]) == {urla: data, urlb: data}
|
|
368
|
+
|
|
369
|
+
|
|
370
|
+
def test_mcat_expand(server):
|
|
371
|
+
h = fsspec.filesystem("http", headers={"give_length": "true", "head_ok": "true "})
|
|
372
|
+
out = h.cat(server + "/index/*")
|
|
373
|
+
assert out == {server + "/index/realfile": data}
|
|
374
|
+
|
|
375
|
+
|
|
376
|
+
def test_info(server):
|
|
377
|
+
fs = fsspec.filesystem("http", headers={"give_etag": "true", "head_ok": "true"})
|
|
378
|
+
info = fs.info(server + "/index/realfile")
|
|
379
|
+
assert info["ETag"] == "xxx"
|
|
380
|
+
|
|
381
|
+
fs = fsspec.filesystem("http", headers={"give_mimetype": "true"})
|
|
382
|
+
info = fs.info(server + "/index/realfile")
|
|
383
|
+
assert info["mimetype"] == "text/html"
|
|
384
|
+
|
|
385
|
+
fs = fsspec.filesystem("http", headers={"redirect": "true"})
|
|
386
|
+
info = fs.info(server + "/redirectme")
|
|
387
|
+
assert info["url"] == server + "/index/realfile"
|
|
388
|
+
|
|
389
|
+
|
|
390
|
+
@pytest.mark.parametrize("method", ["POST", "PUT"])
|
|
391
|
+
def test_put_file(server, tmp_path, method, reset_files):
|
|
392
|
+
src_file = tmp_path / "file_1"
|
|
393
|
+
src_file.write_bytes(data)
|
|
394
|
+
|
|
395
|
+
dwl_file = tmp_path / "down_1"
|
|
396
|
+
|
|
397
|
+
fs = fsspec.filesystem("http", headers={"head_ok": "true", "give_length": "true"})
|
|
398
|
+
with pytest.raises(FileNotFoundError):
|
|
399
|
+
fs.info(server + "/hey")
|
|
400
|
+
|
|
401
|
+
fs.put_file(src_file, server + "/hey", method=method)
|
|
402
|
+
assert fs.info(server + "/hey")["size"] == len(data)
|
|
403
|
+
|
|
404
|
+
fs.get_file(server + "/hey", dwl_file)
|
|
405
|
+
assert dwl_file.read_bytes() == data
|
|
406
|
+
|
|
407
|
+
src_file.write_bytes(b"xxx")
|
|
408
|
+
with open(src_file, "rb") as stream:
|
|
409
|
+
fs.put_file(stream, server + "/hey_2", method=method)
|
|
410
|
+
assert fs.cat(server + "/hey_2") == b"xxx"
|
|
411
|
+
|
|
412
|
+
fs.put_file(io.BytesIO(b"yyy"), server + "/hey_3", method=method)
|
|
413
|
+
assert fs.cat(server + "/hey_3") == b"yyy"
|
|
414
|
+
|
|
415
|
+
|
|
416
|
+
async def get_aiohttp():
|
|
417
|
+
from aiohttp import ClientSession
|
|
418
|
+
|
|
419
|
+
return ClientSession()
|
|
420
|
+
|
|
421
|
+
|
|
422
|
+
async def get_proxy():
|
|
423
|
+
class ProxyClient:
|
|
424
|
+
pass
|
|
425
|
+
|
|
426
|
+
return ProxyClient()
|
|
427
|
+
|
|
428
|
+
|
|
429
|
+
@pytest.mark.xfail(
|
|
430
|
+
condition=sys.flags.optimize > 1, reason="no docstrings when optimised"
|
|
431
|
+
)
|
|
432
|
+
def test_docstring():
|
|
433
|
+
h = fsspec.filesystem("http")
|
|
434
|
+
# most methods have empty docstrings and draw from base class, but this one
|
|
435
|
+
# is generated
|
|
436
|
+
assert h.pipe.__doc__
|
|
437
|
+
|
|
438
|
+
|
|
439
|
+
def test_async_other_thread(server):
|
|
440
|
+
import threading
|
|
441
|
+
|
|
442
|
+
loop = asyncio.get_event_loop()
|
|
443
|
+
th = threading.Thread(target=loop.run_forever)
|
|
444
|
+
|
|
445
|
+
th.daemon = True
|
|
446
|
+
th.start()
|
|
447
|
+
fs = fsspec.filesystem("http", asynchronous=True, loop=loop)
|
|
448
|
+
asyncio.run_coroutine_threadsafe(fs.set_session(), loop=loop).result()
|
|
449
|
+
url = server + "/index/realfile"
|
|
450
|
+
cor = fs._cat([url])
|
|
451
|
+
fut = asyncio.run_coroutine_threadsafe(cor, loop=loop)
|
|
452
|
+
assert fut.result() == {url: data}
|
|
453
|
+
loop.call_soon_threadsafe(loop.stop)
|
|
454
|
+
|
|
455
|
+
|
|
456
|
+
def test_async_this_thread(server):
|
|
457
|
+
async def _():
|
|
458
|
+
fs = fsspec.filesystem("http", asynchronous=True)
|
|
459
|
+
|
|
460
|
+
session = await fs.set_session() # creates client
|
|
461
|
+
|
|
462
|
+
url = server + "/index/realfile"
|
|
463
|
+
with pytest.raises((NotImplementedError, RuntimeError)):
|
|
464
|
+
fs.cat([url])
|
|
465
|
+
out = await fs._cat([url])
|
|
466
|
+
del fs
|
|
467
|
+
assert out == {url: data}
|
|
468
|
+
await session.close()
|
|
469
|
+
|
|
470
|
+
asyncio.run(_())
|
|
471
|
+
|
|
472
|
+
|
|
473
|
+
def _inner_pass(fs, q, fn):
|
|
474
|
+
# pass the FS instance, but don't use it; in new process, the instance
|
|
475
|
+
# cache should be skipped to make a new instance
|
|
476
|
+
import traceback
|
|
477
|
+
|
|
478
|
+
try:
|
|
479
|
+
fs = fsspec.filesystem("http")
|
|
480
|
+
q.put(fs.cat(fn))
|
|
481
|
+
except Exception:
|
|
482
|
+
q.put(traceback.format_exc())
|
|
483
|
+
|
|
484
|
+
|
|
485
|
+
@pytest.mark.parametrize("method", ["spawn", "forkserver"])
|
|
486
|
+
def test_processes(server, method):
|
|
487
|
+
import multiprocessing as mp
|
|
488
|
+
|
|
489
|
+
if win and method != "spawn":
|
|
490
|
+
pytest.skip("Windows can only spawn")
|
|
491
|
+
ctx = mp.get_context(method)
|
|
492
|
+
fn = server + "/index/realfile"
|
|
493
|
+
fs = fsspec.filesystem("http")
|
|
494
|
+
|
|
495
|
+
q = ctx.Queue()
|
|
496
|
+
p = ctx.Process(target=_inner_pass, args=(fs, q, fn))
|
|
497
|
+
p.start()
|
|
498
|
+
out = q.get()
|
|
499
|
+
assert out == fs.cat(fn)
|
|
500
|
+
p.join()
|
|
501
|
+
|
|
502
|
+
|
|
503
|
+
@pytest.mark.parametrize("get_client", [get_aiohttp, get_proxy])
|
|
504
|
+
def test_close(get_client):
|
|
505
|
+
fs = fsspec.filesystem("http", skip_instance_cache=True)
|
|
506
|
+
fs.close_session(None, asyncio.run(get_client()))
|
|
507
|
+
|
|
508
|
+
|
|
509
|
+
@pytest.mark.asyncio
|
|
510
|
+
async def test_async_file(server):
|
|
511
|
+
fs = fsspec.filesystem("http", asynchronous=True, skip_instance_cache=True)
|
|
512
|
+
fn = server + "/index/realfile"
|
|
513
|
+
of = await fs.open_async(fn)
|
|
514
|
+
async with of as f:
|
|
515
|
+
out1 = await f.read(10)
|
|
516
|
+
assert data.startswith(out1)
|
|
517
|
+
out2 = await f.read()
|
|
518
|
+
assert data == out1 + out2
|
|
519
|
+
await fs._session.close()
|
|
520
|
+
|
|
521
|
+
|
|
522
|
+
def test_encoded(server):
|
|
523
|
+
fs = fsspec.filesystem("http", encoded=True)
|
|
524
|
+
out = fs.cat(server + "/Hello%3A%20G%C3%BCnter", headers={"give_path": "true"})
|
|
525
|
+
assert json.loads(out)["path"] == "/Hello%3A%20G%C3%BCnter"
|
|
526
|
+
with pytest.raises(aiohttp.client_exceptions.ClientError):
|
|
527
|
+
fs.cat(server + "/Hello: Günter", headers={"give_path": "true"})
|
|
528
|
+
|
|
529
|
+
fs = fsspec.filesystem("http", encoded=False)
|
|
530
|
+
out = fs.cat(server + "/Hello: Günter", headers={"give_path": "true"})
|
|
531
|
+
assert json.loads(out)["path"] == "/Hello:%20G%C3%BCnter"
|
|
532
|
+
|
|
533
|
+
|
|
534
|
+
def test_with_cache(server):
|
|
535
|
+
fs = fsspec.filesystem("http", headers={"head_ok": "true", "give_length": "true"})
|
|
536
|
+
fn = server + "/index/realfile"
|
|
537
|
+
fs1 = fsspec.filesystem("blockcache", fs=fs)
|
|
538
|
+
with fs1.open(fn, "rb") as f:
|
|
539
|
+
out = f.read()
|
|
540
|
+
assert out == fs1.cat(fn)
|
|
541
|
+
|
|
542
|
+
|
|
543
|
+
@pytest.mark.asyncio
|
|
544
|
+
async def test_async_expand_path(server):
|
|
545
|
+
fs = fsspec.filesystem("http", asynchronous=True, skip_instance_cache=True)
|
|
546
|
+
|
|
547
|
+
# maxdepth=1
|
|
548
|
+
assert await fs._expand_path(server + "/index", recursive=True, maxdepth=1) == [
|
|
549
|
+
server + "/index",
|
|
550
|
+
server + "/index/realfile",
|
|
551
|
+
]
|
|
552
|
+
|
|
553
|
+
# maxdepth=0
|
|
554
|
+
with pytest.raises(ValueError):
|
|
555
|
+
await fs._expand_path(server + "/index", maxdepth=0)
|
|
556
|
+
with pytest.raises(ValueError):
|
|
557
|
+
await fs._expand_path(server + "/index", recursive=True, maxdepth=0)
|
|
558
|
+
|
|
559
|
+
await fs._session.close()
|
|
560
|
+
|
|
561
|
+
|
|
562
|
+
@pytest.mark.asyncio
|
|
563
|
+
async def test_async_walk(server):
|
|
564
|
+
fs = fsspec.filesystem("http", asynchronous=True, skip_instance_cache=True)
|
|
565
|
+
|
|
566
|
+
# No maxdepth
|
|
567
|
+
res = []
|
|
568
|
+
async for a in fs._walk(server + "/index"):
|
|
569
|
+
res.append(a) # noqa: PERF402
|
|
570
|
+
assert res == [(server + "/index", [], ["realfile"])]
|
|
571
|
+
|
|
572
|
+
# maxdepth=0
|
|
573
|
+
with pytest.raises(ValueError):
|
|
574
|
+
async for a in fs._walk(server + "/index", maxdepth=0):
|
|
575
|
+
pass
|
|
576
|
+
|
|
577
|
+
await fs._session.close()
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import shlex
|
|
3
|
+
import subprocess
|
|
4
|
+
import time
|
|
5
|
+
|
|
6
|
+
import pytest
|
|
7
|
+
|
|
8
|
+
import fsspec
|
|
9
|
+
|
|
10
|
+
pytest.importorskip("notebook")
|
|
11
|
+
requests = pytest.importorskip("requests")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@pytest.fixture()
|
|
15
|
+
def jupyter(tmpdir):
|
|
16
|
+
tmpdir = str(tmpdir)
|
|
17
|
+
os.environ["JUPYTER_TOKEN"] = "blah"
|
|
18
|
+
try:
|
|
19
|
+
cmd = f'jupyter notebook --notebook-dir="{tmpdir}" --no-browser --port=5566'
|
|
20
|
+
P = subprocess.Popen(shlex.split(cmd))
|
|
21
|
+
except FileNotFoundError:
|
|
22
|
+
pytest.skip("notebook not installed correctly")
|
|
23
|
+
try:
|
|
24
|
+
timeout = 15
|
|
25
|
+
while True:
|
|
26
|
+
try:
|
|
27
|
+
r = requests.get("http://localhost:5566/?token=blah")
|
|
28
|
+
r.raise_for_status()
|
|
29
|
+
break
|
|
30
|
+
except (requests.exceptions.BaseHTTPError, OSError):
|
|
31
|
+
time.sleep(0.1)
|
|
32
|
+
timeout -= 0.1
|
|
33
|
+
if timeout < 0:
|
|
34
|
+
pytest.xfail("Timed out for jupyter")
|
|
35
|
+
yield "http://localhost:5566/?token=blah", tmpdir
|
|
36
|
+
finally:
|
|
37
|
+
P.terminate()
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def test_simple(jupyter):
|
|
41
|
+
url, d = jupyter
|
|
42
|
+
fs = fsspec.filesystem("jupyter", url=url)
|
|
43
|
+
assert fs.ls("") == []
|
|
44
|
+
|
|
45
|
+
fs.pipe("afile", b"data")
|
|
46
|
+
assert fs.cat("afile") == b"data"
|
|
47
|
+
assert "afile" in os.listdir(d)
|
|
48
|
+
|
|
49
|
+
with fs.open("bfile", "wb") as f:
|
|
50
|
+
f.write(b"more")
|
|
51
|
+
with fs.open("bfile", "rb") as f:
|
|
52
|
+
assert f.read() == b"more"
|
|
53
|
+
|
|
54
|
+
assert fs.info("bfile")["size"] == 4
|
|
55
|
+
fs.rm("afile")
|
|
56
|
+
|
|
57
|
+
assert "afile" not in os.listdir(d)
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# this test case checks that the libarchive can be used from a seekable source (any fs
|
|
2
|
+
# with a block cache active)
|
|
3
|
+
import fsspec
|
|
4
|
+
from fsspec.implementations.tests.test_archive import archive_data, temparchive
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def test_cache(ftp_writable):
|
|
8
|
+
host, port, username, password = "localhost", 2121, "user", "pass"
|
|
9
|
+
|
|
10
|
+
with temparchive(archive_data) as archive_file:
|
|
11
|
+
with fsspec.open(
|
|
12
|
+
"ftp:///archive.7z",
|
|
13
|
+
"wb",
|
|
14
|
+
host=host,
|
|
15
|
+
port=port,
|
|
16
|
+
username=username,
|
|
17
|
+
password=password,
|
|
18
|
+
) as f:
|
|
19
|
+
f.write(open(archive_file, "rb").read())
|
|
20
|
+
of = fsspec.open(
|
|
21
|
+
"libarchive://deeply/nested/path::ftp:///archive.7z",
|
|
22
|
+
ftp={
|
|
23
|
+
"host": host,
|
|
24
|
+
"port": port,
|
|
25
|
+
"username": username,
|
|
26
|
+
"password": password,
|
|
27
|
+
},
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
with of as f:
|
|
31
|
+
readdata = f.read()
|
|
32
|
+
|
|
33
|
+
assert readdata == archive_data["deeply/nested/path"]
|