fsspec 2024.3.0__py3-none-any.whl → 2024.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. fsspec/__init__.py +2 -3
  2. fsspec/_version.py +14 -19
  3. fsspec/caching.py +83 -14
  4. fsspec/compression.py +1 -0
  5. fsspec/core.py +31 -6
  6. fsspec/exceptions.py +1 -0
  7. fsspec/generic.py +1 -1
  8. fsspec/gui.py +1 -1
  9. fsspec/implementations/arrow.py +0 -2
  10. fsspec/implementations/cache_mapper.py +1 -2
  11. fsspec/implementations/cache_metadata.py +7 -7
  12. fsspec/implementations/dirfs.py +2 -2
  13. fsspec/implementations/http.py +9 -9
  14. fsspec/implementations/local.py +97 -48
  15. fsspec/implementations/memory.py +9 -0
  16. fsspec/implementations/smb.py +3 -1
  17. fsspec/implementations/tests/__init__.py +0 -0
  18. fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_file_listing.yaml +112 -0
  19. fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_mkdir.yaml +582 -0
  20. fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_read_pyarrow_non_partitioned.yaml +873 -0
  21. fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_read_range.yaml +458 -0
  22. fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_read_range_chunked.yaml +1355 -0
  23. fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_write_and_read.yaml +795 -0
  24. fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_write_pyarrow_non_partitioned.yaml +613 -0
  25. fsspec/implementations/tests/conftest.py +39 -0
  26. fsspec/implementations/tests/local/__init__.py +0 -0
  27. fsspec/implementations/tests/local/local_fixtures.py +18 -0
  28. fsspec/implementations/tests/local/local_test.py +14 -0
  29. fsspec/implementations/tests/memory/__init__.py +0 -0
  30. fsspec/implementations/tests/memory/memory_fixtures.py +27 -0
  31. fsspec/implementations/tests/memory/memory_test.py +14 -0
  32. fsspec/implementations/tests/out.zip +0 -0
  33. fsspec/implementations/tests/test_archive.py +382 -0
  34. fsspec/implementations/tests/test_arrow.py +259 -0
  35. fsspec/implementations/tests/test_cached.py +1306 -0
  36. fsspec/implementations/tests/test_common.py +35 -0
  37. fsspec/implementations/tests/test_dask.py +29 -0
  38. fsspec/implementations/tests/test_data.py +20 -0
  39. fsspec/implementations/tests/test_dbfs.py +268 -0
  40. fsspec/implementations/tests/test_dirfs.py +588 -0
  41. fsspec/implementations/tests/test_ftp.py +178 -0
  42. fsspec/implementations/tests/test_git.py +76 -0
  43. fsspec/implementations/tests/test_http.py +577 -0
  44. fsspec/implementations/tests/test_jupyter.py +57 -0
  45. fsspec/implementations/tests/test_libarchive.py +33 -0
  46. fsspec/implementations/tests/test_local.py +1285 -0
  47. fsspec/implementations/tests/test_memory.py +382 -0
  48. fsspec/implementations/tests/test_reference.py +720 -0
  49. fsspec/implementations/tests/test_sftp.py +233 -0
  50. fsspec/implementations/tests/test_smb.py +139 -0
  51. fsspec/implementations/tests/test_tar.py +243 -0
  52. fsspec/implementations/tests/test_webhdfs.py +197 -0
  53. fsspec/implementations/tests/test_zip.py +134 -0
  54. fsspec/implementations/webhdfs.py +1 -3
  55. fsspec/mapping.py +2 -2
  56. fsspec/parquet.py +0 -8
  57. fsspec/registry.py +4 -0
  58. fsspec/spec.py +21 -4
  59. fsspec/tests/__init__.py +0 -0
  60. fsspec/tests/abstract/mv.py +57 -0
  61. fsspec/tests/conftest.py +188 -0
  62. fsspec/tests/data/listing.html +1 -0
  63. fsspec/tests/test_api.py +498 -0
  64. fsspec/tests/test_async.py +230 -0
  65. fsspec/tests/test_caches.py +255 -0
  66. fsspec/tests/test_callbacks.py +89 -0
  67. fsspec/tests/test_compression.py +164 -0
  68. fsspec/tests/test_config.py +129 -0
  69. fsspec/tests/test_core.py +466 -0
  70. fsspec/tests/test_downstream.py +40 -0
  71. fsspec/tests/test_file.py +200 -0
  72. fsspec/tests/test_fuse.py +147 -0
  73. fsspec/tests/test_generic.py +90 -0
  74. fsspec/tests/test_gui.py +23 -0
  75. fsspec/tests/test_mapping.py +228 -0
  76. fsspec/tests/test_parquet.py +140 -0
  77. fsspec/tests/test_registry.py +134 -0
  78. fsspec/tests/test_spec.py +1167 -0
  79. fsspec/tests/test_utils.py +478 -0
  80. fsspec/utils.py +0 -2
  81. fsspec-2024.5.0.dist-info/METADATA +273 -0
  82. fsspec-2024.5.0.dist-info/RECORD +111 -0
  83. {fsspec-2024.3.0.dist-info → fsspec-2024.5.0.dist-info}/WHEEL +1 -2
  84. fsspec-2024.3.0.dist-info/METADATA +0 -167
  85. fsspec-2024.3.0.dist-info/RECORD +0 -54
  86. fsspec-2024.3.0.dist-info/top_level.txt +0 -1
  87. {fsspec-2024.3.0.dist-info → fsspec-2024.5.0.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,577 @@
1
+ import asyncio
2
+ import io
3
+ import json
4
+ import os
5
+ import sys
6
+ import time
7
+
8
+ import aiohttp
9
+ import pytest
10
+
11
+ import fsspec.asyn
12
+ import fsspec.utils
13
+ from fsspec.implementations.http import HTTPStreamFile
14
+ from fsspec.tests.conftest import data, reset_files, server, win # noqa: F401
15
+
16
+
17
+ def test_list(server):
18
+ h = fsspec.filesystem("http")
19
+ out = h.glob(server + "/index/*")
20
+ assert out == [server + "/index/realfile"]
21
+
22
+
23
+ def test_list_invalid_args(server):
24
+ with pytest.raises(TypeError):
25
+ h = fsspec.filesystem("http", use_foobar=True)
26
+ h.glob(server + "/index/*")
27
+
28
+
29
+ def test_list_cache(server):
30
+ h = fsspec.filesystem("http", use_listings_cache=True)
31
+ out = h.glob(server + "/index/*")
32
+ assert out == [server + "/index/realfile"]
33
+
34
+
35
+ def test_list_cache_with_expiry_time_cached(server):
36
+ h = fsspec.filesystem("http", use_listings_cache=True, listings_expiry_time=30)
37
+
38
+ # First, the directory cache is not initialized.
39
+ assert not h.dircache
40
+
41
+ # By querying the filesystem with "use_listings_cache=True",
42
+ # the cache will automatically get populated.
43
+ out = h.glob(server + "/index/*")
44
+ assert out == [server + "/index/realfile"]
45
+
46
+ # Verify cache content.
47
+ assert len(h.dircache) == 1
48
+
49
+ out = h.glob(server + "/index/*")
50
+ assert out == [server + "/index/realfile"]
51
+
52
+
53
+ def test_list_cache_with_expiry_time_purged(server):
54
+ h = fsspec.filesystem("http", use_listings_cache=True, listings_expiry_time=0.3)
55
+
56
+ # First, the directory cache is not initialized.
57
+ assert not h.dircache
58
+
59
+ # By querying the filesystem with "use_listings_cache=True",
60
+ # the cache will automatically get populated.
61
+ out = h.glob(server + "/index/*")
62
+ assert out == [server + "/index/realfile"]
63
+ assert len(h.dircache) == 1
64
+
65
+ # Verify cache content.
66
+ assert server + "/index/" in h.dircache
67
+ assert len(h.dircache.get(server + "/index/")) == 1
68
+
69
+ # Wait beyond the TTL / cache expiry time.
70
+ time.sleep(0.31)
71
+
72
+ # Verify that the cache item should have been purged.
73
+ cached_items = h.dircache.get(server + "/index/")
74
+ assert cached_items is None
75
+
76
+ # Verify that after clearing the item from the cache,
77
+ # it can get populated again.
78
+ out = h.glob(server + "/index/*")
79
+ assert out == [server + "/index/realfile"]
80
+ cached_items = h.dircache.get(server + "/index/")
81
+ assert len(cached_items) == 1
82
+
83
+
84
+ def test_list_cache_reuse(server):
85
+ h = fsspec.filesystem("http", use_listings_cache=True, listings_expiry_time=5)
86
+
87
+ # First, the directory cache is not initialized.
88
+ assert not h.dircache
89
+
90
+ # By querying the filesystem with "use_listings_cache=True",
91
+ # the cache will automatically get populated.
92
+ out = h.glob(server + "/index/*")
93
+ assert out == [server + "/index/realfile"]
94
+
95
+ # Verify cache content.
96
+ assert len(h.dircache) == 1
97
+
98
+ # Verify another instance without caching enabled does not have cache content.
99
+ h = fsspec.filesystem("http", use_listings_cache=False)
100
+ assert not h.dircache
101
+
102
+ # Verify that yet another new instance, with caching enabled,
103
+ # will see the same cache content again.
104
+ h = fsspec.filesystem("http", use_listings_cache=True, listings_expiry_time=5)
105
+ assert len(h.dircache) == 1
106
+
107
+ # However, yet another instance with a different expiry time will also not have
108
+ # any valid cache content.
109
+ h = fsspec.filesystem("http", use_listings_cache=True, listings_expiry_time=666)
110
+ assert len(h.dircache) == 0
111
+
112
+
113
+ def test_ls_raises_filenotfound(server):
114
+ h = fsspec.filesystem("http")
115
+
116
+ with pytest.raises(FileNotFoundError):
117
+ h.ls(server + "/not-a-key")
118
+
119
+
120
+ def test_list_cache_with_max_paths(server):
121
+ h = fsspec.filesystem("http", use_listings_cache=True, max_paths=5)
122
+ out = h.glob(server + "/index/*")
123
+ assert out == [server + "/index/realfile"]
124
+
125
+
126
+ def test_list_cache_with_skip_instance_cache(server):
127
+ h = fsspec.filesystem("http", use_listings_cache=True, skip_instance_cache=True)
128
+ out = h.glob(server + "/index/*")
129
+ assert out == [server + "/index/realfile"]
130
+
131
+
132
+ def test_glob_return_subfolders(server):
133
+ h = fsspec.filesystem("http")
134
+ out = h.glob(server + "/simple/*")
135
+ assert set(out) == {
136
+ server + "/simple/dir/",
137
+ server + "/simple/file",
138
+ }
139
+
140
+
141
+ def test_isdir(server):
142
+ h = fsspec.filesystem("http")
143
+ assert h.isdir(server + "/index/")
144
+ assert not h.isdir(server + "/index/realfile")
145
+ assert not h.isdir(server + "doesnotevenexist")
146
+
147
+
148
+ def test_policy_arg(server):
149
+ h = fsspec.filesystem("http", size_policy="get")
150
+ out = h.glob(server + "/index/*")
151
+ assert out == [server + "/index/realfile"]
152
+
153
+
154
+ def test_exists(server):
155
+ h = fsspec.filesystem("http")
156
+ assert not h.exists(server + "/notafile")
157
+ with pytest.raises(FileNotFoundError):
158
+ h.cat(server + "/notafile")
159
+
160
+
161
+ def test_read(server):
162
+ h = fsspec.filesystem("http")
163
+ out = server + "/index/realfile"
164
+ with h.open(out, "rb") as f:
165
+ assert f.read() == data
166
+ with h.open(out, "rb", block_size=0) as f:
167
+ assert f.read() == data
168
+ with h.open(out, "rb") as f:
169
+ assert f.read(100) + f.read() == data
170
+
171
+
172
+ def test_file_pickle(server):
173
+ import pickle
174
+
175
+ # via HTTPFile
176
+ h = fsspec.filesystem("http", headers={"give_length": "true", "head_ok": "true"})
177
+ out = server + "/index/realfile"
178
+
179
+ with fsspec.open(out, headers={"give_length": "true", "head_ok": "true"}) as f:
180
+ pic = pickle.loads(pickle.dumps(f))
181
+ assert pic.read() == data
182
+
183
+ with h.open(out, "rb") as f:
184
+ pic = pickle.dumps(f)
185
+ assert f.read() == data
186
+ with pickle.loads(pic) as f:
187
+ assert f.read() == data
188
+
189
+ # via HTTPStreamFile
190
+ h = fsspec.filesystem("http")
191
+ out = server + "/index/realfile"
192
+ with h.open(out, "rb") as f:
193
+ out = pickle.dumps(f)
194
+ assert f.read() == data
195
+ with pickle.loads(out) as f:
196
+ assert f.read() == data
197
+
198
+
199
+ def test_methods(server):
200
+ h = fsspec.filesystem("http")
201
+ url = server + "/index/realfile"
202
+ assert h.exists(url)
203
+ assert h.cat(url) == data
204
+
205
+
206
+ @pytest.mark.parametrize(
207
+ "headers",
208
+ [
209
+ {},
210
+ {"give_length": "true"},
211
+ {"give_length": "true", "head_ok": "true"},
212
+ {"give_range": "true"},
213
+ {"give_length": "true", "head_not_auth": "true"},
214
+ {"give_range": "true", "head_not_auth": "true"},
215
+ {"use_206": "true", "head_ok": "true", "head_give_length": "true"},
216
+ {"use_206": "true", "give_length": "true"},
217
+ {"use_206": "true", "give_range": "true"},
218
+ ],
219
+ )
220
+ def test_random_access(server, headers):
221
+ h = fsspec.filesystem("http", headers=headers)
222
+ url = server + "/index/realfile"
223
+ with h.open(url, "rb") as f:
224
+ if headers:
225
+ assert f.size == len(data)
226
+ assert f.read(5) == data[:5]
227
+
228
+ if headers:
229
+ f.seek(5, 1)
230
+ assert f.read(5) == data[10:15]
231
+ else:
232
+ with pytest.raises(ValueError):
233
+ f.seek(5, 1)
234
+ assert f.closed
235
+
236
+
237
+ @pytest.mark.parametrize(
238
+ "headers",
239
+ [
240
+ {"ignore_range": "true", "head_ok": "true", "head_give_length": "true"},
241
+ {"ignore_range": "true", "give_length": "true"},
242
+ {"ignore_range": "true", "give_range": "true"},
243
+ ],
244
+ )
245
+ def test_no_range_support(server, headers):
246
+ h = fsspec.filesystem("http", headers=headers)
247
+ url = server + "/index/realfile"
248
+ with h.open(url, "rb") as f:
249
+ # Random access is not possible if the server doesn't respect Range
250
+ f.seek(5)
251
+ with pytest.raises(ValueError):
252
+ f.read(10)
253
+
254
+ # Reading from the beginning should still work
255
+ f.seek(0)
256
+ assert f.read(10) == data[:10]
257
+
258
+
259
+ def test_stream_seek(server):
260
+ h = fsspec.filesystem("http")
261
+ url = server + "/index/realfile"
262
+ with h.open(url, "rb") as f:
263
+ f.seek(0) # is OK
264
+ data1 = f.read(5)
265
+ assert len(data1) == 5
266
+ f.seek(5)
267
+ f.seek(0, 1)
268
+ data2 = f.read()
269
+ assert data1 + data2 == data
270
+
271
+
272
+ def test_mapper_url(server):
273
+ h = fsspec.filesystem("http")
274
+ mapper = h.get_mapper(server + "/index/")
275
+ assert mapper.root.startswith("http:")
276
+ assert list(mapper)
277
+
278
+ mapper2 = fsspec.get_mapper(server + "/index/")
279
+ assert mapper2.root.startswith("http:")
280
+ assert list(mapper) == list(mapper2)
281
+
282
+
283
+ def test_content_length_zero(server):
284
+ h = fsspec.filesystem(
285
+ "http", headers={"give_length": "true", "zero_length": "true"}
286
+ )
287
+ url = server + "/index/realfile"
288
+
289
+ with h.open(url, "rb") as f:
290
+ assert f.read() == data
291
+
292
+
293
+ def test_content_encoding_gzip(server):
294
+ h = fsspec.filesystem(
295
+ "http", headers={"give_length": "true", "gzip_encoding": "true"}
296
+ )
297
+ url = server + "/index/realfile"
298
+
299
+ with h.open(url, "rb") as f:
300
+ assert isinstance(f, HTTPStreamFile)
301
+ assert f.size is None
302
+ assert f.read() == data
303
+
304
+
305
+ def test_download(server, tmpdir):
306
+ h = fsspec.filesystem("http", headers={"give_length": "true", "head_ok": "true "})
307
+ url = server + "/index/realfile"
308
+ fn = os.path.join(tmpdir, "afile")
309
+ h.get(url, fn)
310
+ assert open(fn, "rb").read() == data
311
+
312
+
313
+ def test_multi_download(server, tmpdir):
314
+ h = fsspec.filesystem("http", headers={"give_length": "true", "head_ok": "true "})
315
+ urla = server + "/index/realfile"
316
+ urlb = server + "/index/otherfile"
317
+ fna = os.path.join(tmpdir, "afile")
318
+ fnb = os.path.join(tmpdir, "bfile")
319
+ h.get([urla, urlb], [fna, fnb])
320
+ assert open(fna, "rb").read() == data
321
+ assert open(fnb, "rb").read() == data
322
+
323
+
324
+ def test_ls(server):
325
+ h = fsspec.filesystem("http")
326
+ l = h.ls(server + "/data/20020401/", detail=False)
327
+ nc = server + "/data/20020401/GRACEDADM_CLSM0125US_7D.A20020401.030.nc4"
328
+ assert nc in l
329
+ assert len(l) == 11
330
+ assert all(u["type"] == "file" for u in h.ls(server + "/data/20020401/"))
331
+ assert h.glob(server + "/data/20020401/*.nc4") == [nc]
332
+
333
+
334
+ def test_mcat(server):
335
+ h = fsspec.filesystem("http", headers={"give_length": "true", "head_ok": "true "})
336
+ urla = server + "/index/realfile"
337
+ urlb = server + "/index/otherfile"
338
+ out = h.cat([urla, urlb])
339
+ assert out == {urla: data, urlb: data}
340
+
341
+
342
+ def test_cat_file_range(server):
343
+ h = fsspec.filesystem("http", headers={"give_length": "true", "head_ok": "true "})
344
+ urla = server + "/index/realfile"
345
+ assert h.cat(urla, start=1, end=10) == data[1:10]
346
+ assert h.cat(urla, start=1) == data[1:]
347
+
348
+ assert h.cat(urla, start=-10) == data[-10:]
349
+ assert h.cat(urla, start=-10, end=-2) == data[-10:-2]
350
+
351
+ assert h.cat(urla, end=-10) == data[:-10]
352
+
353
+
354
+ def test_cat_file_range_numpy(server):
355
+ np = pytest.importorskip("numpy")
356
+ h = fsspec.filesystem("http", headers={"give_length": "true", "head_ok": "true "})
357
+ urla = server + "/index/realfile"
358
+ assert h.cat(urla, start=np.int8(1), end=np.int8(10)) == data[1:10]
359
+ out = h.cat_ranges([urla, urla], starts=np.array([1, 5]), ends=np.array([10, 15]))
360
+ assert out == [data[1:10], data[5:15]]
361
+
362
+
363
+ def test_mcat_cache(server):
364
+ urla = server + "/index/realfile"
365
+ urlb = server + "/index/otherfile"
366
+ fs = fsspec.filesystem("simplecache", target_protocol="http")
367
+ assert fs.cat([urla, urlb]) == {urla: data, urlb: data}
368
+
369
+
370
+ def test_mcat_expand(server):
371
+ h = fsspec.filesystem("http", headers={"give_length": "true", "head_ok": "true "})
372
+ out = h.cat(server + "/index/*")
373
+ assert out == {server + "/index/realfile": data}
374
+
375
+
376
+ def test_info(server):
377
+ fs = fsspec.filesystem("http", headers={"give_etag": "true", "head_ok": "true"})
378
+ info = fs.info(server + "/index/realfile")
379
+ assert info["ETag"] == "xxx"
380
+
381
+ fs = fsspec.filesystem("http", headers={"give_mimetype": "true"})
382
+ info = fs.info(server + "/index/realfile")
383
+ assert info["mimetype"] == "text/html"
384
+
385
+ fs = fsspec.filesystem("http", headers={"redirect": "true"})
386
+ info = fs.info(server + "/redirectme")
387
+ assert info["url"] == server + "/index/realfile"
388
+
389
+
390
+ @pytest.mark.parametrize("method", ["POST", "PUT"])
391
+ def test_put_file(server, tmp_path, method, reset_files):
392
+ src_file = tmp_path / "file_1"
393
+ src_file.write_bytes(data)
394
+
395
+ dwl_file = tmp_path / "down_1"
396
+
397
+ fs = fsspec.filesystem("http", headers={"head_ok": "true", "give_length": "true"})
398
+ with pytest.raises(FileNotFoundError):
399
+ fs.info(server + "/hey")
400
+
401
+ fs.put_file(src_file, server + "/hey", method=method)
402
+ assert fs.info(server + "/hey")["size"] == len(data)
403
+
404
+ fs.get_file(server + "/hey", dwl_file)
405
+ assert dwl_file.read_bytes() == data
406
+
407
+ src_file.write_bytes(b"xxx")
408
+ with open(src_file, "rb") as stream:
409
+ fs.put_file(stream, server + "/hey_2", method=method)
410
+ assert fs.cat(server + "/hey_2") == b"xxx"
411
+
412
+ fs.put_file(io.BytesIO(b"yyy"), server + "/hey_3", method=method)
413
+ assert fs.cat(server + "/hey_3") == b"yyy"
414
+
415
+
416
+ async def get_aiohttp():
417
+ from aiohttp import ClientSession
418
+
419
+ return ClientSession()
420
+
421
+
422
+ async def get_proxy():
423
+ class ProxyClient:
424
+ pass
425
+
426
+ return ProxyClient()
427
+
428
+
429
+ @pytest.mark.xfail(
430
+ condition=sys.flags.optimize > 1, reason="no docstrings when optimised"
431
+ )
432
+ def test_docstring():
433
+ h = fsspec.filesystem("http")
434
+ # most methods have empty docstrings and draw from base class, but this one
435
+ # is generated
436
+ assert h.pipe.__doc__
437
+
438
+
439
+ def test_async_other_thread(server):
440
+ import threading
441
+
442
+ loop = asyncio.get_event_loop()
443
+ th = threading.Thread(target=loop.run_forever)
444
+
445
+ th.daemon = True
446
+ th.start()
447
+ fs = fsspec.filesystem("http", asynchronous=True, loop=loop)
448
+ asyncio.run_coroutine_threadsafe(fs.set_session(), loop=loop).result()
449
+ url = server + "/index/realfile"
450
+ cor = fs._cat([url])
451
+ fut = asyncio.run_coroutine_threadsafe(cor, loop=loop)
452
+ assert fut.result() == {url: data}
453
+ loop.call_soon_threadsafe(loop.stop)
454
+
455
+
456
+ def test_async_this_thread(server):
457
+ async def _():
458
+ fs = fsspec.filesystem("http", asynchronous=True)
459
+
460
+ session = await fs.set_session() # creates client
461
+
462
+ url = server + "/index/realfile"
463
+ with pytest.raises((NotImplementedError, RuntimeError)):
464
+ fs.cat([url])
465
+ out = await fs._cat([url])
466
+ del fs
467
+ assert out == {url: data}
468
+ await session.close()
469
+
470
+ asyncio.run(_())
471
+
472
+
473
+ def _inner_pass(fs, q, fn):
474
+ # pass the FS instance, but don't use it; in new process, the instance
475
+ # cache should be skipped to make a new instance
476
+ import traceback
477
+
478
+ try:
479
+ fs = fsspec.filesystem("http")
480
+ q.put(fs.cat(fn))
481
+ except Exception:
482
+ q.put(traceback.format_exc())
483
+
484
+
485
+ @pytest.mark.parametrize("method", ["spawn", "forkserver"])
486
+ def test_processes(server, method):
487
+ import multiprocessing as mp
488
+
489
+ if win and method != "spawn":
490
+ pytest.skip("Windows can only spawn")
491
+ ctx = mp.get_context(method)
492
+ fn = server + "/index/realfile"
493
+ fs = fsspec.filesystem("http")
494
+
495
+ q = ctx.Queue()
496
+ p = ctx.Process(target=_inner_pass, args=(fs, q, fn))
497
+ p.start()
498
+ out = q.get()
499
+ assert out == fs.cat(fn)
500
+ p.join()
501
+
502
+
503
+ @pytest.mark.parametrize("get_client", [get_aiohttp, get_proxy])
504
+ def test_close(get_client):
505
+ fs = fsspec.filesystem("http", skip_instance_cache=True)
506
+ fs.close_session(None, asyncio.run(get_client()))
507
+
508
+
509
+ @pytest.mark.asyncio
510
+ async def test_async_file(server):
511
+ fs = fsspec.filesystem("http", asynchronous=True, skip_instance_cache=True)
512
+ fn = server + "/index/realfile"
513
+ of = await fs.open_async(fn)
514
+ async with of as f:
515
+ out1 = await f.read(10)
516
+ assert data.startswith(out1)
517
+ out2 = await f.read()
518
+ assert data == out1 + out2
519
+ await fs._session.close()
520
+
521
+
522
+ def test_encoded(server):
523
+ fs = fsspec.filesystem("http", encoded=True)
524
+ out = fs.cat(server + "/Hello%3A%20G%C3%BCnter", headers={"give_path": "true"})
525
+ assert json.loads(out)["path"] == "/Hello%3A%20G%C3%BCnter"
526
+ with pytest.raises(aiohttp.client_exceptions.ClientError):
527
+ fs.cat(server + "/Hello: Günter", headers={"give_path": "true"})
528
+
529
+ fs = fsspec.filesystem("http", encoded=False)
530
+ out = fs.cat(server + "/Hello: Günter", headers={"give_path": "true"})
531
+ assert json.loads(out)["path"] == "/Hello:%20G%C3%BCnter"
532
+
533
+
534
+ def test_with_cache(server):
535
+ fs = fsspec.filesystem("http", headers={"head_ok": "true", "give_length": "true"})
536
+ fn = server + "/index/realfile"
537
+ fs1 = fsspec.filesystem("blockcache", fs=fs)
538
+ with fs1.open(fn, "rb") as f:
539
+ out = f.read()
540
+ assert out == fs1.cat(fn)
541
+
542
+
543
+ @pytest.mark.asyncio
544
+ async def test_async_expand_path(server):
545
+ fs = fsspec.filesystem("http", asynchronous=True, skip_instance_cache=True)
546
+
547
+ # maxdepth=1
548
+ assert await fs._expand_path(server + "/index", recursive=True, maxdepth=1) == [
549
+ server + "/index",
550
+ server + "/index/realfile",
551
+ ]
552
+
553
+ # maxdepth=0
554
+ with pytest.raises(ValueError):
555
+ await fs._expand_path(server + "/index", maxdepth=0)
556
+ with pytest.raises(ValueError):
557
+ await fs._expand_path(server + "/index", recursive=True, maxdepth=0)
558
+
559
+ await fs._session.close()
560
+
561
+
562
+ @pytest.mark.asyncio
563
+ async def test_async_walk(server):
564
+ fs = fsspec.filesystem("http", asynchronous=True, skip_instance_cache=True)
565
+
566
+ # No maxdepth
567
+ res = []
568
+ async for a in fs._walk(server + "/index"):
569
+ res.append(a) # noqa: PERF402
570
+ assert res == [(server + "/index", [], ["realfile"])]
571
+
572
+ # maxdepth=0
573
+ with pytest.raises(ValueError):
574
+ async for a in fs._walk(server + "/index", maxdepth=0):
575
+ pass
576
+
577
+ await fs._session.close()
@@ -0,0 +1,57 @@
1
+ import os
2
+ import shlex
3
+ import subprocess
4
+ import time
5
+
6
+ import pytest
7
+
8
+ import fsspec
9
+
10
+ pytest.importorskip("notebook")
11
+ requests = pytest.importorskip("requests")
12
+
13
+
14
+ @pytest.fixture()
15
+ def jupyter(tmpdir):
16
+ tmpdir = str(tmpdir)
17
+ os.environ["JUPYTER_TOKEN"] = "blah"
18
+ try:
19
+ cmd = f'jupyter notebook --notebook-dir="{tmpdir}" --no-browser --port=5566'
20
+ P = subprocess.Popen(shlex.split(cmd))
21
+ except FileNotFoundError:
22
+ pytest.skip("notebook not installed correctly")
23
+ try:
24
+ timeout = 15
25
+ while True:
26
+ try:
27
+ r = requests.get("http://localhost:5566/?token=blah")
28
+ r.raise_for_status()
29
+ break
30
+ except (requests.exceptions.BaseHTTPError, OSError):
31
+ time.sleep(0.1)
32
+ timeout -= 0.1
33
+ if timeout < 0:
34
+ pytest.xfail("Timed out for jupyter")
35
+ yield "http://localhost:5566/?token=blah", tmpdir
36
+ finally:
37
+ P.terminate()
38
+
39
+
40
+ def test_simple(jupyter):
41
+ url, d = jupyter
42
+ fs = fsspec.filesystem("jupyter", url=url)
43
+ assert fs.ls("") == []
44
+
45
+ fs.pipe("afile", b"data")
46
+ assert fs.cat("afile") == b"data"
47
+ assert "afile" in os.listdir(d)
48
+
49
+ with fs.open("bfile", "wb") as f:
50
+ f.write(b"more")
51
+ with fs.open("bfile", "rb") as f:
52
+ assert f.read() == b"more"
53
+
54
+ assert fs.info("bfile")["size"] == 4
55
+ fs.rm("afile")
56
+
57
+ assert "afile" not in os.listdir(d)
@@ -0,0 +1,33 @@
1
+ # this test case checks that the libarchive can be used from a seekable source (any fs
2
+ # with a block cache active)
3
+ import fsspec
4
+ from fsspec.implementations.tests.test_archive import archive_data, temparchive
5
+
6
+
7
+ def test_cache(ftp_writable):
8
+ host, port, username, password = "localhost", 2121, "user", "pass"
9
+
10
+ with temparchive(archive_data) as archive_file:
11
+ with fsspec.open(
12
+ "ftp:///archive.7z",
13
+ "wb",
14
+ host=host,
15
+ port=port,
16
+ username=username,
17
+ password=password,
18
+ ) as f:
19
+ f.write(open(archive_file, "rb").read())
20
+ of = fsspec.open(
21
+ "libarchive://deeply/nested/path::ftp:///archive.7z",
22
+ ftp={
23
+ "host": host,
24
+ "port": port,
25
+ "username": username,
26
+ "password": password,
27
+ },
28
+ )
29
+
30
+ with of as f:
31
+ readdata = f.read()
32
+
33
+ assert readdata == archive_data["deeply/nested/path"]