fsspec 2024.5.0__py3-none-any.whl → 2024.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. fsspec/_version.py +2 -2
  2. fsspec/caching.py +3 -2
  3. fsspec/compression.py +1 -1
  4. fsspec/implementations/cached.py +1 -13
  5. fsspec/implementations/github.py +12 -0
  6. fsspec/implementations/reference.py +6 -0
  7. fsspec/implementations/smb.py +10 -0
  8. fsspec/json.py +81 -0
  9. fsspec/registry.py +24 -18
  10. fsspec/spec.py +76 -34
  11. fsspec/utils.py +1 -1
  12. {fsspec-2024.5.0.dist-info → fsspec-2024.6.0.dist-info}/METADATA +11 -5
  13. fsspec-2024.6.0.dist-info/RECORD +55 -0
  14. fsspec/implementations/tests/__init__.py +0 -0
  15. fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_file_listing.yaml +0 -112
  16. fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_mkdir.yaml +0 -582
  17. fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_read_pyarrow_non_partitioned.yaml +0 -873
  18. fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_read_range.yaml +0 -458
  19. fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_read_range_chunked.yaml +0 -1355
  20. fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_write_and_read.yaml +0 -795
  21. fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_write_pyarrow_non_partitioned.yaml +0 -613
  22. fsspec/implementations/tests/conftest.py +0 -39
  23. fsspec/implementations/tests/local/__init__.py +0 -0
  24. fsspec/implementations/tests/local/local_fixtures.py +0 -18
  25. fsspec/implementations/tests/local/local_test.py +0 -14
  26. fsspec/implementations/tests/memory/__init__.py +0 -0
  27. fsspec/implementations/tests/memory/memory_fixtures.py +0 -27
  28. fsspec/implementations/tests/memory/memory_test.py +0 -14
  29. fsspec/implementations/tests/out.zip +0 -0
  30. fsspec/implementations/tests/test_archive.py +0 -382
  31. fsspec/implementations/tests/test_arrow.py +0 -259
  32. fsspec/implementations/tests/test_cached.py +0 -1306
  33. fsspec/implementations/tests/test_common.py +0 -35
  34. fsspec/implementations/tests/test_dask.py +0 -29
  35. fsspec/implementations/tests/test_data.py +0 -20
  36. fsspec/implementations/tests/test_dbfs.py +0 -268
  37. fsspec/implementations/tests/test_dirfs.py +0 -588
  38. fsspec/implementations/tests/test_ftp.py +0 -178
  39. fsspec/implementations/tests/test_git.py +0 -76
  40. fsspec/implementations/tests/test_http.py +0 -577
  41. fsspec/implementations/tests/test_jupyter.py +0 -57
  42. fsspec/implementations/tests/test_libarchive.py +0 -33
  43. fsspec/implementations/tests/test_local.py +0 -1285
  44. fsspec/implementations/tests/test_memory.py +0 -382
  45. fsspec/implementations/tests/test_reference.py +0 -720
  46. fsspec/implementations/tests/test_sftp.py +0 -233
  47. fsspec/implementations/tests/test_smb.py +0 -139
  48. fsspec/implementations/tests/test_tar.py +0 -243
  49. fsspec/implementations/tests/test_webhdfs.py +0 -197
  50. fsspec/implementations/tests/test_zip.py +0 -134
  51. fsspec/tests/__init__.py +0 -0
  52. fsspec/tests/conftest.py +0 -188
  53. fsspec/tests/data/listing.html +0 -1
  54. fsspec/tests/test_api.py +0 -498
  55. fsspec/tests/test_async.py +0 -230
  56. fsspec/tests/test_caches.py +0 -255
  57. fsspec/tests/test_callbacks.py +0 -89
  58. fsspec/tests/test_compression.py +0 -164
  59. fsspec/tests/test_config.py +0 -129
  60. fsspec/tests/test_core.py +0 -466
  61. fsspec/tests/test_downstream.py +0 -40
  62. fsspec/tests/test_file.py +0 -200
  63. fsspec/tests/test_fuse.py +0 -147
  64. fsspec/tests/test_generic.py +0 -90
  65. fsspec/tests/test_gui.py +0 -23
  66. fsspec/tests/test_mapping.py +0 -228
  67. fsspec/tests/test_parquet.py +0 -140
  68. fsspec/tests/test_registry.py +0 -134
  69. fsspec/tests/test_spec.py +0 -1167
  70. fsspec/tests/test_utils.py +0 -478
  71. fsspec-2024.5.0.dist-info/RECORD +0 -111
  72. {fsspec-2024.5.0.dist-info → fsspec-2024.6.0.dist-info}/WHEEL +0 -0
  73. {fsspec-2024.5.0.dist-info → fsspec-2024.6.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,1306 +0,0 @@
1
- import json
2
- import os
3
- import pickle
4
- import shutil
5
- import tempfile
6
-
7
- import pytest
8
-
9
- import fsspec
10
- from fsspec.compression import compr
11
- from fsspec.exceptions import BlocksizeMismatchError
12
- from fsspec.implementations.cache_mapper import (
13
- BasenameCacheMapper,
14
- HashCacheMapper,
15
- create_cache_mapper,
16
- )
17
- from fsspec.implementations.cached import (
18
- CachingFileSystem,
19
- LocalTempFile,
20
- WholeFileCacheFileSystem,
21
- )
22
- from fsspec.implementations.local import make_path_posix
23
- from fsspec.implementations.zip import ZipFileSystem
24
- from fsspec.tests.conftest import win
25
-
26
- from .test_ftp import FTPFileSystem
27
-
28
-
29
- @pytest.fixture
30
- def local_filecache():
31
- import tempfile
32
-
33
- original_location = tempfile.mkdtemp()
34
- cache_location = tempfile.mkdtemp()
35
- original_file = os.path.join(original_location, "afile")
36
- data = b"test data"
37
- with open(original_file, "wb") as f:
38
- f.write(data)
39
-
40
- # we can access the file and read it
41
- fs = fsspec.filesystem(
42
- "filecache", target_protocol="file", cache_storage=cache_location
43
- )
44
-
45
- return data, original_file, cache_location, fs
46
-
47
-
48
- def test_mapper():
49
- mapper0 = create_cache_mapper(True)
50
- assert mapper0("somefile") == "somefile"
51
- assert mapper0("/somefile") == "somefile"
52
- assert mapper0("/somedir/somefile") == "somefile"
53
- assert mapper0("/otherdir/somefile") == "somefile"
54
-
55
- mapper1 = create_cache_mapper(False)
56
- assert (
57
- mapper1("somefile")
58
- == "dd00b9487898b02555b6a2d90a070586d63f93e80c70aaa60c992fa9e81a72fe"
59
- )
60
- assert (
61
- mapper1("/somefile")
62
- == "884c07bc2efe65c60fb9d280a620e7f180488718fb5d97736521b7f9cf5c8b37"
63
- )
64
- assert (
65
- mapper1("/somedir/somefile")
66
- == "67a6956e5a5f95231263f03758c1fd9254fdb1c564d311674cec56b0372d2056"
67
- )
68
- assert (
69
- mapper1("/otherdir/somefile")
70
- == "f043dee01ab9b752c7f2ecaeb1a5e1b2d872018e2d0a1a26c43835ebf34e7d3e"
71
- )
72
-
73
- assert mapper0 != mapper1
74
- assert create_cache_mapper(True) == mapper0
75
- assert create_cache_mapper(False) == mapper1
76
-
77
- assert hash(mapper0) != hash(mapper1)
78
- assert hash(create_cache_mapper(True)) == hash(mapper0)
79
- assert hash(create_cache_mapper(False)) == hash(mapper1)
80
-
81
- with pytest.raises(
82
- ValueError,
83
- match="BasenameCacheMapper requires zero or positive directory_levels",
84
- ):
85
- BasenameCacheMapper(-1)
86
-
87
- mapper2 = BasenameCacheMapper(1)
88
- assert mapper2("/somefile") == "somefile"
89
- assert mapper2("/somedir/somefile") == "somedir_@_somefile"
90
- assert mapper2("/otherdir/somefile") == "otherdir_@_somefile"
91
- assert mapper2("/dir1/dir2/dir3/somefile") == "dir3_@_somefile"
92
-
93
- assert mapper2 != mapper0
94
- assert mapper2 != mapper1
95
- assert BasenameCacheMapper(1) == mapper2
96
-
97
- assert hash(mapper2) != hash(mapper0)
98
- assert hash(mapper2) != hash(mapper1)
99
- assert hash(BasenameCacheMapper(1)) == hash(mapper2)
100
-
101
- mapper3 = BasenameCacheMapper(2)
102
- assert mapper3("/somefile") == "somefile"
103
- assert mapper3("/somedir/somefile") == "somedir_@_somefile"
104
- assert mapper3("/otherdir/somefile") == "otherdir_@_somefile"
105
- assert mapper3("/dir1/dir2/dir3/somefile") == "dir2_@_dir3_@_somefile"
106
-
107
- assert mapper3 != mapper0
108
- assert mapper3 != mapper1
109
- assert mapper3 != mapper2
110
- assert BasenameCacheMapper(2) == mapper3
111
-
112
- assert hash(mapper3) != hash(mapper0)
113
- assert hash(mapper3) != hash(mapper1)
114
- assert hash(mapper3) != hash(mapper2)
115
- assert hash(BasenameCacheMapper(2)) == hash(mapper3)
116
-
117
-
118
- @pytest.mark.parametrize(
119
- "cache_mapper", [BasenameCacheMapper(), BasenameCacheMapper(1), HashCacheMapper()]
120
- )
121
- @pytest.mark.parametrize("force_save_pickle", [True, False])
122
- def test_metadata(tmpdir, cache_mapper, force_save_pickle):
123
- source = os.path.join(tmpdir, "source")
124
- afile = os.path.join(source, "afile")
125
- os.mkdir(source)
126
- open(afile, "w").write("test")
127
-
128
- fs = fsspec.filesystem(
129
- "filecache",
130
- target_protocol="file",
131
- cache_storage=os.path.join(tmpdir, "cache"),
132
- cache_mapper=cache_mapper,
133
- )
134
- fs._metadata._force_save_pickle = force_save_pickle
135
-
136
- with fs.open(afile, "rb") as f:
137
- assert f.read(5) == b"test"
138
-
139
- afile_posix = make_path_posix(afile)
140
- detail = fs._metadata.cached_files[0][afile_posix]
141
- assert sorted(detail.keys()) == ["blocks", "fn", "original", "time", "uid"]
142
- assert isinstance(detail["blocks"], bool)
143
- assert isinstance(detail["fn"], str)
144
- assert isinstance(detail["time"], float)
145
- assert isinstance(detail["uid"], str)
146
-
147
- assert detail["original"] == afile_posix
148
- assert detail["fn"] == fs._mapper(afile_posix)
149
-
150
- if isinstance(cache_mapper, BasenameCacheMapper):
151
- if cache_mapper.directory_levels == 0:
152
- assert detail["fn"] == "afile"
153
- else:
154
- assert detail["fn"] == "source_@_afile"
155
-
156
-
157
- def test_metadata_replace_pickle_with_json(tmpdir):
158
- # For backward compatibility will allow reading of old pickled metadata.
159
- # When the metadata is next saved, it is in json format.
160
- source = os.path.join(tmpdir, "source")
161
- afile = os.path.join(source, "afile")
162
- os.mkdir(source)
163
- open(afile, "w").write("test")
164
-
165
- # Save metadata in pickle format, to simulate old metadata
166
- fs = fsspec.filesystem(
167
- "filecache",
168
- target_protocol="file",
169
- cache_storage=os.path.join(tmpdir, "cache"),
170
- )
171
- fs._metadata._force_save_pickle = True
172
- with fs.open(afile, "rb") as f:
173
- assert f.read(5) == b"test"
174
-
175
- # Confirm metadata is in pickle format
176
- cache_fn = os.path.join(fs.storage[-1], "cache")
177
- with open(cache_fn, "rb") as f:
178
- metadata = pickle.load(f)
179
- assert list(metadata.keys()) == [make_path_posix(afile)]
180
-
181
- # Force rewrite of metadata, now in json format
182
- fs._metadata._force_save_pickle = False
183
- fs.pop_from_cache(afile)
184
- with fs.open(afile, "rb") as f:
185
- assert f.read(5) == b"test"
186
-
187
- # Confirm metadata is in json format
188
- with open(cache_fn, "r") as f:
189
- metadata = json.load(f)
190
- assert list(metadata.keys()) == [make_path_posix(afile)]
191
-
192
-
193
- def test_constructor_kwargs(tmpdir):
194
- fs = fsspec.filesystem("filecache", target_protocol="file", same_names=True)
195
- assert isinstance(fs._mapper, BasenameCacheMapper)
196
-
197
- fs = fsspec.filesystem("filecache", target_protocol="file", same_names=False)
198
- assert isinstance(fs._mapper, HashCacheMapper)
199
-
200
- fs = fsspec.filesystem("filecache", target_protocol="file")
201
- assert isinstance(fs._mapper, HashCacheMapper)
202
-
203
- with pytest.raises(
204
- ValueError, match="Cannot specify both same_names and cache_mapper"
205
- ):
206
- fs = fsspec.filesystem(
207
- "filecache",
208
- target_protocol="file",
209
- cache_mapper=HashCacheMapper(),
210
- same_names=True,
211
- )
212
-
213
-
214
- def test_idempotent():
215
- fs = CachingFileSystem("file")
216
- fs2 = CachingFileSystem("file")
217
- assert fs2 is fs
218
- fs3 = pickle.loads(pickle.dumps(fs))
219
- assert fs3.storage == fs.storage
220
-
221
-
222
- @pytest.mark.parametrize("force_save_pickle", [True, False])
223
- def test_blockcache_workflow(ftp_writable, tmp_path, force_save_pickle):
224
- host, port, user, pw = ftp_writable
225
- fs = FTPFileSystem(host, port, user, pw)
226
- with fs.open("/out", "wb") as f:
227
- f.write(b"test\n" * 4096)
228
-
229
- fs_kwargs = {
230
- "skip_instance_cache": True,
231
- "cache_storage": str(tmp_path),
232
- "target_protocol": "ftp",
233
- "target_options": {
234
- "host": host,
235
- "port": port,
236
- "username": user,
237
- "password": pw,
238
- },
239
- }
240
-
241
- # Open the blockcache and read a little bit of the data
242
- fs = fsspec.filesystem("blockcache", **fs_kwargs)
243
- fs._metadata._force_save_pickle = force_save_pickle
244
- with fs.open("/out", "rb", block_size=5) as f:
245
- assert f.read(5) == b"test\n"
246
-
247
- # Save the cache/close it
248
- fs.save_cache()
249
- del fs
250
-
251
- # Check that cache file only has the first two blocks
252
- if force_save_pickle:
253
- with open(tmp_path / "cache", "rb") as f:
254
- cache = pickle.load(f)
255
- else:
256
- with open(tmp_path / "cache", "r") as f:
257
- cache = json.load(f)
258
- assert "/out" in cache
259
- assert cache["/out"]["blocks"] == [0, 1]
260
-
261
- # Reopen the same cache and read some more...
262
- fs = fsspec.filesystem("blockcache", **fs_kwargs)
263
- fs._metadata._force_save_pickle = force_save_pickle
264
- with fs.open("/out", block_size=5) as f:
265
- assert f.read(5) == b"test\n"
266
- f.seek(30)
267
- assert f.read(5) == b"test\n"
268
-
269
-
270
- @pytest.mark.parametrize("impl", ["filecache", "blockcache"])
271
- def test_workflow(ftp_writable, impl):
272
- host, port, user, pw = ftp_writable
273
- fs = FTPFileSystem(host, port, user, pw)
274
- with fs.open("/out", "wb") as f:
275
- f.write(b"test")
276
- fs = fsspec.filesystem(
277
- impl,
278
- target_protocol="ftp",
279
- target_options={"host": host, "port": port, "username": user, "password": pw},
280
- )
281
- assert os.listdir(fs.storage[-1]) == []
282
- with fs.open("/out") as f:
283
- assert os.listdir(fs.storage[-1])
284
- assert f.read() == b"test"
285
- assert fs._metadata.cached_files[-1]["/out"]["blocks"]
286
- assert fs.cat("/out") == b"test"
287
- assert fs._metadata.cached_files[-1]["/out"]["blocks"] is True
288
-
289
- with fs.open("/out", "wb") as f:
290
- f.write(b"changed")
291
-
292
- if impl == "filecache":
293
- assert (
294
- fs.cat("/out") == b"changed"
295
- ) # new value, because we overwrote the cached location
296
-
297
-
298
- @pytest.mark.parametrize("impl", ["simplecache", "blockcache"])
299
- def test_glob(ftp_writable, impl):
300
- host, port, user, pw = ftp_writable
301
- fs = FTPFileSystem(host, port, user, pw)
302
- with fs.open("/out", "wb") as f:
303
- f.write(b"test")
304
- with fs.open("/out2", "wb") as f:
305
- f.write(b"test2")
306
- fs = fsspec.filesystem(
307
- impl,
308
- target_protocol="ftp",
309
- target_options={"host": host, "port": port, "username": user, "password": pw},
310
- )
311
- assert fs.glob("/wrong*") == []
312
- assert fs.glob("/ou*") == ["/out", "/out2"]
313
-
314
-
315
- def test_write():
316
- tmp = str(tempfile.mkdtemp())
317
- fn = tmp + "afile"
318
- url = f"simplecache::file://{fn}"
319
- with fsspec.open(url, "wb") as f:
320
- f.write(b"hello")
321
- assert fn not in f.name
322
- assert not os.listdir(tmp)
323
-
324
- assert open(fn, "rb").read() == b"hello"
325
-
326
-
327
- def test_clear():
328
- import tempfile
329
-
330
- origin = tempfile.mkdtemp()
331
- cache1 = tempfile.mkdtemp()
332
- data = b"test data"
333
- f1 = os.path.join(origin, "afile")
334
- with open(f1, "wb") as f:
335
- f.write(data)
336
-
337
- # populates first cache
338
- fs = fsspec.filesystem("filecache", target_protocol="file", cache_storage=cache1)
339
- assert fs.cat(f1) == data
340
-
341
- assert "cache" in os.listdir(cache1)
342
- assert len(os.listdir(cache1)) == 2
343
- assert fs._check_file(f1)
344
-
345
- fs.clear_cache()
346
- assert not fs._check_file(f1)
347
- assert len(os.listdir(cache1)) < 2
348
-
349
-
350
- @pytest.mark.parametrize("force_save_pickle", [True, False])
351
- def test_clear_expired(tmp_path, force_save_pickle):
352
- def __ager(cache_fn, fn, del_fn=False):
353
- """
354
- Modify the cache file to virtually add time lag to selected files.
355
-
356
- Parameters
357
- ---------
358
- cache_fn: str
359
- cache path
360
- fn: str
361
- file name to be modified
362
- del_fn: bool
363
- whether or not to delete 'fn' from cache details
364
- """
365
- import pathlib
366
- import time
367
-
368
- if os.path.exists(cache_fn):
369
- if force_save_pickle:
370
- with open(cache_fn, "rb") as f:
371
- cached_files = pickle.load(f)
372
- else:
373
- with open(cache_fn, "r") as f:
374
- cached_files = json.load(f)
375
- fn_posix = pathlib.Path(fn).as_posix()
376
- cached_files[fn_posix]["time"] = cached_files[fn_posix]["time"] - 691200
377
- assert os.access(cache_fn, os.W_OK), "Cache is not writable"
378
- if del_fn:
379
- del cached_files[fn_posix]["fn"]
380
- if force_save_pickle:
381
- with open(cache_fn, "wb") as f:
382
- pickle.dump(cached_files, f)
383
- else:
384
- with open(cache_fn, "w") as f:
385
- json.dump(cached_files, f)
386
- time.sleep(1)
387
-
388
- origin = tmp_path.joinpath("origin")
389
- cache1 = tmp_path.joinpath("cache1")
390
- cache2 = tmp_path.joinpath("cache2")
391
- cache3 = tmp_path.joinpath("cache3")
392
-
393
- origin.mkdir()
394
- cache1.mkdir()
395
- cache2.mkdir()
396
- cache3.mkdir()
397
-
398
- data = b"test data"
399
- f1 = origin.joinpath("afile")
400
- f2 = origin.joinpath("bfile")
401
- f3 = origin.joinpath("cfile")
402
- f4 = origin.joinpath("dfile")
403
-
404
- with open(f1, "wb") as f:
405
- f.write(data)
406
- with open(f2, "wb") as f:
407
- f.write(data)
408
- with open(f3, "wb") as f:
409
- f.write(data)
410
- with open(f4, "wb") as f:
411
- f.write(data)
412
-
413
- # populates first cache
414
- fs = fsspec.filesystem(
415
- "filecache", target_protocol="file", cache_storage=str(cache1), cache_check=1
416
- )
417
- fs._metadata._force_save_pickle = force_save_pickle
418
- assert fs.cat(str(f1)) == data
419
-
420
- # populates "last" cache if file not found in first one
421
- fs = fsspec.filesystem(
422
- "filecache",
423
- target_protocol="file",
424
- cache_storage=[str(cache1), str(cache2)],
425
- cache_check=1,
426
- )
427
- fs._metadata._force_save_pickle = force_save_pickle
428
- assert fs.cat(str(f2)) == data
429
- assert fs.cat(str(f3)) == data
430
- assert len(os.listdir(cache2)) == 3
431
-
432
- # force the expiration
433
- cache_fn = os.path.join(fs.storage[-1], "cache")
434
- __ager(cache_fn, f2)
435
-
436
- # remove from cache2 the expired files
437
- fs.clear_expired_cache()
438
- assert len(os.listdir(cache2)) == 2
439
-
440
- # check complete cleanup
441
- __ager(cache_fn, f3)
442
-
443
- fs.clear_expired_cache()
444
- assert not fs._check_file(f2)
445
- assert not fs._check_file(f3)
446
- assert len(os.listdir(cache2)) < 2
447
-
448
- # check cache1 to be untouched after cleaning
449
- assert len(os.listdir(cache1)) == 2
450
-
451
- # check cleaning with 'same_name' option enabled
452
- fs = fsspec.filesystem(
453
- "filecache",
454
- target_protocol="file",
455
- cache_storage=[str(cache1), str(cache2), str(cache3)],
456
- same_names=True,
457
- cache_check=1,
458
- )
459
- fs._metadata._force_save_pickle = force_save_pickle
460
- assert fs.cat(str(f4)) == data
461
-
462
- cache_fn = os.path.join(fs.storage[-1], "cache")
463
- __ager(cache_fn, f4)
464
-
465
- fs.clear_expired_cache()
466
- assert not fs._check_file(str(f4))
467
-
468
- # check cache metadata lacking 'fn' raises RuntimeError.
469
- fs = fsspec.filesystem(
470
- "filecache",
471
- target_protocol="file",
472
- cache_storage=str(cache1),
473
- same_names=True,
474
- cache_check=1,
475
- )
476
- fs._metadata._force_save_pickle = force_save_pickle
477
- assert fs.cat(str(f1)) == data
478
-
479
- cache_fn = os.path.join(fs.storage[-1], "cache")
480
- __ager(cache_fn, f1, del_fn=True)
481
-
482
- with pytest.raises(RuntimeError, match="Cache metadata does not contain 'fn' for"):
483
- fs.clear_expired_cache()
484
-
485
-
486
- def test_pop():
487
- import tempfile
488
-
489
- origin = tempfile.mkdtemp()
490
- cache1 = tempfile.mkdtemp()
491
- cache2 = tempfile.mkdtemp()
492
- data = b"test data"
493
- f1 = os.path.join(origin, "afile")
494
- f2 = os.path.join(origin, "bfile")
495
- with open(f1, "wb") as f:
496
- f.write(data)
497
- with open(f2, "wb") as f:
498
- f.write(data)
499
-
500
- # populates first cache
501
- fs = fsspec.filesystem("filecache", target_protocol="file", cache_storage=cache1)
502
- fs.cat(f1)
503
-
504
- # populates last cache if file not found in first cache
505
- fs = fsspec.filesystem(
506
- "filecache", target_protocol="file", cache_storage=[cache1, cache2]
507
- )
508
- assert fs.cat(f2) == data
509
- assert len(os.listdir(cache2)) == 2
510
- assert fs._check_file(f1)
511
- with pytest.raises(PermissionError):
512
- fs.pop_from_cache(f1)
513
- fs.pop_from_cache(f2)
514
- fs.pop_from_cache(os.path.join(origin, "uncached-file"))
515
- assert len(os.listdir(cache2)) == 1
516
- assert not fs._check_file(f2)
517
- assert fs._check_file(f1)
518
-
519
-
520
- def test_write_pickle_context():
521
- tmp = str(tempfile.mkdtemp())
522
- fn = tmp + "afile"
523
- url = f"simplecache::file://{fn}"
524
- with fsspec.open(url, "wb") as f:
525
- pickle.loads(pickle.dumps(f))
526
- f.write(b"hello ")
527
- pickle.dumps(f)
528
-
529
- with pytest.raises(ValueError):
530
- pickle.dumps(f)
531
-
532
- assert open(fn, "rb").read() == b"hello "
533
-
534
-
535
- def test_blocksize(ftp_writable):
536
- host, port, user, pw = ftp_writable
537
- fs = FTPFileSystem(host, port, user, pw)
538
- with fs.open("/out_block", "wb") as f:
539
- f.write(b"test" * 4000)
540
-
541
- fs = fsspec.filesystem(
542
- "blockcache",
543
- target_protocol="ftp",
544
- target_options={"host": host, "port": port, "username": user, "password": pw},
545
- )
546
-
547
- with fs.open("/out_block", block_size=20) as f:
548
- assert f.read(1) == b"t"
549
- with pytest.raises(BlocksizeMismatchError):
550
- fs.open("/out_block", block_size=30)
551
-
552
-
553
- def test_blockcache_multiinstance(ftp_writable):
554
- host, port, user, pw = ftp_writable
555
- fs = FTPFileSystem(host, port, user, pw)
556
- with fs.open("/one", "wb") as f:
557
- f.write(b"test" * 40)
558
- with fs.open("/two", "wb") as f:
559
- f.write(b"test" * 40)
560
- fs = fsspec.filesystem(
561
- "blockcache",
562
- target_protocol="ftp",
563
- target_options={"host": host, "port": port, "username": user, "password": pw},
564
- )
565
-
566
- with fs.open("/one", block_size=20) as f:
567
- assert f.read(1) == b"t"
568
- fs2 = fsspec.filesystem(
569
- "blockcache",
570
- target_protocol="ftp",
571
- target_options={"host": host, "port": port, "username": user, "password": pw},
572
- skip_instance_cache=True,
573
- cache_storage=fs.storage,
574
- )
575
- assert fs2._metadata.cached_files # loaded from metadata for "one"
576
- with fs2.open("/two", block_size=20) as f:
577
- assert f.read(1) == b"t"
578
- assert "/two" in fs2._metadata.cached_files[-1]
579
- fs.save_cache()
580
- assert list(fs._metadata.cached_files[-1]) == ["/one", "/two"]
581
- assert list(fs2._metadata.cached_files[-1]) == ["/one", "/two"]
582
-
583
-
584
- def test_metadata_save_blocked(ftp_writable, caplog):
585
- import logging
586
-
587
- host, port, user, pw = ftp_writable
588
- fs = FTPFileSystem(host, port, user, pw)
589
- with fs.open("/one", "wb") as f:
590
- f.write(b"test" * 40)
591
- fs = fsspec.filesystem(
592
- "blockcache",
593
- target_protocol="ftp",
594
- target_options={"host": host, "port": port, "username": user, "password": pw},
595
- )
596
-
597
- with fs.open("/one", block_size=20) as f:
598
- assert f.read(1) == b"t"
599
- fn = os.path.join(fs.storage[-1], "cache")
600
- with caplog.at_level(logging.DEBUG):
601
- with fs.open("/one", block_size=20) as f:
602
- f.seek(21)
603
- assert f.read(1)
604
- os.remove(fn)
605
- os.mkdir(fn)
606
- assert "Cache saving failed while closing file" in caplog.text
607
- os.rmdir(fn)
608
-
609
- def open_raise(*_, **__):
610
- raise NameError
611
-
612
- try:
613
- # To simulate an interpreter shutdown we temporarily set an open function in the
614
- # cache_metadata module which is used on the next attempt to save metadata.
615
- with caplog.at_level(logging.DEBUG):
616
- with fs.open("/one", block_size=20) as f:
617
- fsspec.implementations.cache_metadata.open = open_raise
618
- f.seek(21)
619
- assert f.read(1)
620
- finally:
621
- fsspec.implementations.cache_metadata.__dict__.pop("open", None)
622
- assert "Cache save failed due to interpreter shutdown" in caplog.text
623
-
624
-
625
- @pytest.mark.parametrize("impl", ["filecache", "simplecache", "blockcache"])
626
- def test_local_filecache_creates_dir_if_needed(impl):
627
- import tempfile
628
-
629
- original_location = tempfile.mkdtemp()
630
- cache_location = tempfile.mkdtemp()
631
- os.rmdir(cache_location)
632
- assert not os.path.exists(cache_location)
633
-
634
- original_file = os.path.join(original_location, "afile")
635
- data = b"test data"
636
- with open(original_file, "wb") as f:
637
- f.write(data)
638
-
639
- # we can access the file and read it
640
- fs = fsspec.filesystem(impl, target_protocol="file", cache_storage=cache_location)
641
-
642
- with fs.open(original_file, "rb") as f:
643
- data_in_cache = f.read()
644
-
645
- assert os.path.exists(cache_location)
646
-
647
- assert data_in_cache == data
648
-
649
-
650
- @pytest.mark.parametrize("toplevel", [True, False])
651
- @pytest.mark.parametrize("impl", ["filecache", "simplecache", "blockcache"])
652
- def test_get_mapper(impl, toplevel):
653
- import tempfile
654
-
655
- original_location = tempfile.mkdtemp()
656
- cache_location = tempfile.mkdtemp()
657
- os.rmdir(cache_location)
658
- original_file = os.path.join(original_location, "afile")
659
- data = b"test data"
660
- with open(original_file, "wb") as f:
661
- f.write(data)
662
-
663
- if toplevel:
664
- m = fsspec.get_mapper(
665
- f"{impl}::file://{original_location}",
666
- **{impl: {"cache_storage": cache_location}},
667
- )
668
- else:
669
- fs = fsspec.filesystem(
670
- impl, target_protocol="file", cache_storage=cache_location
671
- )
672
- m = fs.get_mapper(original_location)
673
-
674
- assert m["afile"] == data
675
- assert os.listdir(cache_location)
676
- assert m["afile"] == data
677
-
678
-
679
- def test_local_filecache_basic(local_filecache):
680
- data, original_file, cache_location, fs = local_filecache
681
-
682
- # reading from the file contains the right data
683
- with fs.open(original_file, "rb") as f:
684
- assert f.read() == data
685
- assert "cache" in os.listdir(cache_location)
686
-
687
- # the file in the location contains the right data
688
- fn = list(fs._metadata.cached_files[-1].values())[0]["fn"] # this is a hash value
689
- assert fn in os.listdir(cache_location)
690
- with open(os.path.join(cache_location, fn), "rb") as f:
691
- assert f.read() == data
692
-
693
- # still there when original file is removed (check=False)
694
- os.remove(original_file)
695
- with fs.open(original_file, "rb") as f:
696
- assert f.read() == data
697
-
698
-
699
- def test_local_filecache_does_not_change_when_original_data_changed(local_filecache):
700
- old_data, original_file, cache_location, fs = local_filecache
701
- new_data = b"abc"
702
-
703
- with fs.open(original_file, "rb") as f:
704
- assert f.read() == old_data
705
-
706
- with open(original_file, "wb") as f:
707
- f.write(new_data)
708
-
709
- with fs.open(original_file, "rb") as f:
710
- assert f.read() == old_data
711
-
712
-
713
- def test_local_filecache_gets_from_original_if_cache_deleted(local_filecache):
714
- old_data, original_file, cache_location, fs = local_filecache
715
- new_data = b"abc"
716
-
717
- with fs.open(original_file, "rb") as f:
718
- assert f.read() == old_data
719
-
720
- with open(original_file, "wb") as f:
721
- f.write(new_data)
722
-
723
- shutil.rmtree(cache_location)
724
- assert os.path.exists(original_file)
725
-
726
- with open(original_file, "rb") as f:
727
- assert f.read() == new_data
728
-
729
- with fs.open(original_file, "rb") as f:
730
- assert f.read() == new_data
731
-
732
- # the file in the location contains the right data
733
- fn = list(fs._metadata.cached_files[-1].values())[0]["fn"] # this is a hash value
734
- assert fn in os.listdir(cache_location)
735
- with open(os.path.join(cache_location, fn), "rb") as f:
736
- assert f.read() == new_data
737
-
738
-
739
- def test_local_filecache_with_new_cache_location_makes_a_new_copy(local_filecache):
740
- import tempfile
741
-
742
- data, original_file, old_cache_location, old_fs = local_filecache
743
- new_cache_location = tempfile.mkdtemp()
744
-
745
- with old_fs.open(original_file, "rb") as f:
746
- assert f.read() == data
747
-
748
- new_fs = fsspec.filesystem(
749
- "filecache", target_protocol="file", cache_storage=new_cache_location
750
- )
751
-
752
- with new_fs.open(original_file, "rb") as f:
753
- assert f.read() == data
754
-
755
- # the file in the location contains the right data
756
- fn = list(new_fs._metadata.cached_files[-1].values())[0][
757
- "fn"
758
- ] # this is a hash value
759
- assert fn in os.listdir(old_cache_location)
760
- assert fn in os.listdir(new_cache_location)
761
-
762
- with open(os.path.join(new_cache_location, fn), "rb") as f:
763
- assert f.read() == data
764
-
765
-
766
- def test_filecache_multicache():
767
- import tempfile
768
-
769
- origin = tempfile.mkdtemp()
770
- cache1 = tempfile.mkdtemp()
771
- cache2 = tempfile.mkdtemp()
772
- data = b"test data"
773
- f1 = os.path.join(origin, "afile")
774
- f2 = os.path.join(origin, "bfile")
775
- with open(f1, "wb") as f:
776
- f.write(data)
777
- with open(f2, "wb") as f:
778
- f.write(data * 2)
779
-
780
- # populates first cache
781
- fs = fsspec.filesystem("filecache", target_protocol="file", cache_storage=cache1)
782
- assert fs.cat(f1) == data
783
-
784
- assert len(os.listdir(cache1)) == 2 # cache and hashed afile
785
- assert len(os.listdir(cache2)) == 0 # hasn't been initialized yet
786
-
787
- # populates last cache if file not found in first cache
788
- fs = fsspec.filesystem(
789
- "filecache", target_protocol="file", cache_storage=[cache1, cache2]
790
- )
791
-
792
- assert fs.cat(f1) == data
793
- assert fs.cat(f2) == data * 2
794
-
795
- assert "cache" in os.listdir(cache1)
796
- assert "cache" in os.listdir(cache2)
797
-
798
- cache1_contents = [f for f in os.listdir(cache1) if f != "cache"]
799
- assert len(cache1_contents) == 1
800
-
801
- with open(os.path.join(cache1, cache1_contents[0]), "rb") as f:
802
- assert f.read() == data
803
-
804
- cache2_contents = [f for f in os.listdir(cache2) if f != "cache"]
805
- assert len(cache2_contents) == 1
806
-
807
- with open(os.path.join(cache2, cache2_contents[0]), "rb") as f:
808
- assert f.read() == data * 2
809
-
810
-
811
- @pytest.mark.parametrize("impl", ["filecache", "simplecache"])
812
- def test_filecache_multicache_with_same_file_different_data_reads_from_first(impl):
813
- import tempfile
814
-
815
- origin = tempfile.mkdtemp()
816
- cache1 = tempfile.mkdtemp()
817
- cache2 = tempfile.mkdtemp()
818
- data = b"test data"
819
- f1 = os.path.join(origin, "afile")
820
- with open(f1, "wb") as f:
821
- f.write(data)
822
-
823
- # populate first cache
824
- fs1 = fsspec.filesystem(impl, target_protocol="file", cache_storage=cache1)
825
- assert fs1.cat(f1) == data
826
-
827
- with open(f1, "wb") as f:
828
- f.write(data * 2)
829
-
830
- # populate second cache
831
- fs2 = fsspec.filesystem(impl, target_protocol="file", cache_storage=cache2)
832
-
833
- assert fs2.cat(f1) == data * 2
834
-
835
- # the filenames in each cache are the same, but the data is different
836
- assert sorted(os.listdir(cache1)) == sorted(os.listdir(cache2))
837
-
838
- fs = fsspec.filesystem(impl, target_protocol="file", cache_storage=[cache1, cache2])
839
-
840
- assert fs.cat(f1) == data
841
-
842
-
843
- def test_filecache_with_checks():
844
- import time
845
-
846
- origin = tempfile.mkdtemp()
847
- cache1 = tempfile.mkdtemp()
848
- data = b"test data"
849
- f1 = os.path.join(origin, "afile")
850
- with open(f1, "wb") as f:
851
- f.write(data)
852
-
853
- # populate first cache
854
- fs = fsspec.filesystem(
855
- "filecache", target_protocol="file", cache_storage=cache1, expiry_time=0.1
856
- )
857
- fs2 = fsspec.filesystem(
858
- "filecache", target_protocol="file", cache_storage=cache1, check_files=True
859
- )
860
- assert fs.cat(f1) == data
861
- assert fs2.cat(f1) == data
862
-
863
- with open(f1, "wb") as f:
864
- f.write(data * 2)
865
-
866
- assert fs.cat(f1) == data # does not change
867
- assert fs2.cat(f1) == data * 2 # changed, since origin changed
868
- with fs2.open(f1) as f:
869
- assert f.read() == data * 2 # read also sees new data
870
- time.sleep(0.11) # allow cache details to expire
871
- assert fs.cat(f1) == data * 2 # changed, since origin changed
872
-
873
-
874
- @pytest.mark.parametrize("impl", ["filecache", "simplecache", "blockcache"])
875
- @pytest.mark.parametrize("fs", ["local", "multi"], indirect=["fs"])
876
- def test_takes_fs_instance(impl, fs):
877
- origin = tempfile.mkdtemp()
878
- data = b"test data"
879
- f1 = os.path.join(origin, "afile")
880
- with open(f1, "wb") as f:
881
- f.write(data)
882
-
883
- fs2 = fsspec.filesystem(impl, fs=fs)
884
-
885
- assert fs2.cat(f1) == data
886
-
887
-
888
- def test_add_file_to_cache_after_save(local_filecache):
889
- (data, original_file, cache_location, fs) = local_filecache
890
-
891
- fs.save_cache()
892
-
893
- fs.cat(original_file)
894
- assert len(fs._metadata.cached_files[-1]) == 1
895
-
896
- fs.save_cache()
897
-
898
- fs2 = fsspec.filesystem(
899
- "filecache",
900
- target_protocol="file",
901
- cache_storage=cache_location,
902
- do_not_use_cache_for_this_instance=True, # cache is masking the issue
903
- )
904
- assert len(fs2._metadata.cached_files[-1]) == 1
905
-
906
-
907
- def test_cached_open_close_read(ftp_writable):
908
- # Regression test for <https://github.com/fsspec/filesystem_spec/issues/799>
909
- host, port, user, pw = ftp_writable
910
- fs = FTPFileSystem(host, port, user, pw)
911
- with fs.open("/out_block", "wb") as f:
912
- f.write(b"test" * 4000)
913
- fs = fsspec.filesystem(
914
- "cached",
915
- target_protocol="ftp",
916
- target_options={"host": host, "port": port, "username": user, "password": pw},
917
- )
918
- with fs.open("/out_block", block_size=1024) as f:
919
- pass
920
- with fs.open("/out_block", block_size=1024) as f:
921
- assert f.read(1) == b"t"
922
- # Regression test for <https://github.com/fsspec/filesystem_spec/issues/845>
923
- assert fs._metadata.cached_files[-1]["/out_block"]["blocks"] == {0}
924
-
925
-
926
- @pytest.mark.parametrize("impl", ["filecache", "simplecache"])
927
- @pytest.mark.parametrize("compression", ["gzip", "bz2"])
928
- def test_with_compression(impl, compression):
929
- data = b"123456789"
930
- tempdir = tempfile.mkdtemp()
931
- cachedir = tempfile.mkdtemp()
932
- fn = os.path.join(tempdir, "data")
933
- f = compr[compression](open(fn, mode="wb"), mode="w")
934
- f.write(data)
935
- f.close()
936
-
937
- with fsspec.open(
938
- f"{impl}::{fn}",
939
- "rb",
940
- compression=compression,
941
- **{impl: {"same_names": True, "cache_storage": cachedir}},
942
- ) as f:
943
- # stores original compressed file, uncompress on read
944
- assert f.read() == data
945
- assert "data" in os.listdir(cachedir)
946
- assert open(os.path.join(cachedir, "data"), "rb").read() != data
947
-
948
- cachedir = tempfile.mkdtemp()
949
-
950
- with fsspec.open(
951
- f"{impl}::{fn}",
952
- "rb",
953
- **{
954
- impl: {
955
- "same_names": True,
956
- "compression": compression,
957
- "cache_storage": cachedir,
958
- }
959
- },
960
- ) as f:
961
- # stores uncompressed data
962
- assert f.read() == data
963
- assert "data" in os.listdir(cachedir)
964
- assert open(os.path.join(cachedir, "data"), "rb").read() == data
965
-
966
-
967
- @pytest.mark.parametrize("protocol", ["simplecache", "filecache"])
968
- def test_again(protocol):
969
- fn = "memory://afile"
970
- with fsspec.open(fn, "wb") as f:
971
- f.write(b"hello")
972
- d2 = tempfile.mkdtemp()
973
- lurl = fsspec.open_local(f"{protocol}::{fn}", **{protocol: {"cache_storage": d2}})
974
- assert os.path.exists(lurl)
975
- assert d2 in lurl
976
- assert open(lurl, "rb").read() == b"hello"
977
-
978
- # remove cache dir
979
- shutil.rmtree(d2)
980
- assert not os.path.exists(lurl)
981
-
982
- # gets recreated
983
- lurl = fsspec.open_local(f"{protocol}::{fn}", **{protocol: {"cache_storage": d2}})
984
- assert open(lurl, "rb").read() == b"hello"
985
-
986
-
987
- @pytest.mark.parametrize("protocol", ["simplecache", "filecache"])
988
- def test_multi_cache(protocol):
989
- with fsspec.open_files("memory://file*", "wb", num=2) as files:
990
- for f in files:
991
- f.write(b"hello")
992
-
993
- d2 = tempfile.mkdtemp()
994
- lurl = fsspec.open_local(
995
- f"{protocol}::memory://file*",
996
- mode="rb",
997
- **{protocol: {"cache_storage": d2, "same_names": True}},
998
- )
999
- assert all(d2 in u for u in lurl)
1000
- assert all(os.path.basename(f) in ["file0", "file1"] for f in lurl)
1001
- assert all(open(u, "rb").read() == b"hello" for u in lurl)
1002
-
1003
- d2 = tempfile.mkdtemp()
1004
- lurl = fsspec.open_files(
1005
- f"{protocol}::memory://file*",
1006
- mode="rb",
1007
- **{protocol: {"cache_storage": d2, "same_names": True}},
1008
- )
1009
- with lurl as files:
1010
- for f in files:
1011
- assert os.path.basename(f.name) in ["file0", "file1"]
1012
- assert f.read() == b"hello"
1013
- fs = fsspec.filesystem("memory")
1014
- fs.store.clear()
1015
- with lurl as files:
1016
- for f in files:
1017
- assert os.path.basename(f.name) in ["file0", "file1"]
1018
- assert f.read() == b"hello"
1019
-
1020
-
1021
- @pytest.mark.parametrize("protocol", ["simplecache", "filecache", "blockcache"])
1022
- def test_multi_cat(protocol, ftp_writable):
1023
- host, port, user, pw = ftp_writable
1024
- fs = FTPFileSystem(host, port, user, pw)
1025
- for fn in ("/file0", "/file1"):
1026
- with fs.open(fn, "wb") as f:
1027
- f.write(b"hello")
1028
-
1029
- d2 = tempfile.mkdtemp()
1030
- fs = fsspec.filesystem(protocol, storage=d2, fs=fs)
1031
- assert fs.cat("file*") == {"/file0": b"hello", "/file1": b"hello"}
1032
-
1033
-
1034
- @pytest.mark.parametrize("protocol", ["simplecache", "filecache"])
1035
- def test_multi_cache_chain(protocol):
1036
- import zipfile
1037
-
1038
- d = tempfile.mkdtemp()
1039
- fn = os.path.join(d, "test.zip")
1040
- zipfile.ZipFile(fn, mode="w").open("test", "w").write(b"hello")
1041
-
1042
- with fsspec.open_files(f"zip://test::{protocol}::file://{fn}") as files:
1043
- assert d not in files[0]._fileobj._file.name
1044
- assert files[0].read() == b"hello"
1045
-
1046
- # special test contains "file:" string
1047
- fn = os.path.join(d, "file.zip")
1048
- zipfile.ZipFile(fn, mode="w").open("file", "w").write(b"hello")
1049
- with fsspec.open_files(f"zip://file::{protocol}::file://{fn}") as files:
1050
- assert d not in files[0]._fileobj._file.name
1051
- assert files[0].read() == b"hello"
1052
-
1053
-
1054
- @pytest.mark.parametrize("protocol", ["blockcache", "simplecache", "filecache"])
1055
- def test_strip(protocol):
1056
- fs = fsspec.filesystem(protocol, target_protocol="memory")
1057
- url1 = "memory://afile"
1058
- assert fs._strip_protocol(url1) == "/afile"
1059
- assert fs._strip_protocol(protocol + "://afile") == "/afile"
1060
- assert fs._strip_protocol(protocol + "::memory://afile") == "/afile"
1061
-
1062
-
1063
- @pytest.mark.parametrize("protocol", ["simplecache", "filecache"])
1064
- def test_cached_write(protocol):
1065
- d = tempfile.mkdtemp()
1066
- ofs = fsspec.open_files(f"{protocol}::file://{d}/*.out", mode="wb", num=2)
1067
- with ofs as files:
1068
- for f in files:
1069
- assert isinstance(f, LocalTempFile)
1070
- f.write(b"data")
1071
- fn = f.name
1072
-
1073
- assert sorted(os.listdir(d)) == ["0.out", "1.out"]
1074
- assert not os.path.exists(fn)
1075
-
1076
-
1077
- def test_expiry():
1078
- import time
1079
-
1080
- d = tempfile.mkdtemp()
1081
- fs = fsspec.filesystem("memory")
1082
- fn = "/afile"
1083
- fn0 = "memory://afile"
1084
- data = b"hello"
1085
- with fs.open(fn0, "wb") as f:
1086
- f.write(data)
1087
-
1088
- fs = fsspec.filesystem(
1089
- "filecache",
1090
- fs=fs,
1091
- cache_storage=d,
1092
- check_files=False,
1093
- expiry_time=0.1,
1094
- same_names=True,
1095
- )
1096
-
1097
- # get file
1098
- assert fs._check_file(fn0) is False
1099
- assert fs.open(fn0, mode="rb").read() == data
1100
- start_time = fs._metadata.cached_files[-1][fn]["time"]
1101
-
1102
- # cache time..
1103
- assert fs.last_cache - start_time < 0.19
1104
-
1105
- # cache should have refreshed
1106
- time.sleep(0.01)
1107
-
1108
- # file should still be valid... re-read
1109
- assert fs.open(fn0, mode="rb").read() == data
1110
- detail, _ = fs._check_file(fn0)
1111
- assert detail["time"] == start_time
1112
-
1113
- time.sleep(0.11)
1114
- # file should still be invalid... re-read
1115
- assert fs._check_file(fn0) is False
1116
- assert fs.open(fn0, mode="rb").read() == data
1117
- detail, _ = fs._check_file(fn0)
1118
- assert detail["time"] - start_time > 0.09
1119
-
1120
-
1121
- def test_equality(tmpdir):
1122
- """Test sane behaviour for equality and hashing.
1123
-
1124
- Make sure that different CachingFileSystem only test equal to each other
1125
- when they should, and do not test equal to the filesystem they rely upon.
1126
- Similarly, make sure their hashes differ when they should and are equal
1127
- when they should not.
1128
-
1129
- Related: GitHub#577, GitHub#578
1130
- """
1131
- from fsspec.implementations.local import LocalFileSystem
1132
-
1133
- lfs = LocalFileSystem()
1134
- dir1 = f"{tmpdir}/raspberry"
1135
- dir2 = f"{tmpdir}/banana"
1136
- cfs1 = CachingFileSystem(fs=lfs, cache_storage=dir1)
1137
- cfs2 = CachingFileSystem(fs=lfs, cache_storage=dir2)
1138
- cfs3 = CachingFileSystem(fs=lfs, cache_storage=dir2)
1139
- assert cfs1 == cfs1
1140
- assert cfs1 != cfs2
1141
- assert cfs1 != cfs3
1142
- assert cfs2 == cfs3
1143
- assert cfs1 != lfs
1144
- assert cfs2 != lfs
1145
- assert cfs3 != lfs
1146
- assert hash(lfs) != hash(cfs1)
1147
- assert hash(lfs) != hash(cfs2)
1148
- assert hash(lfs) != hash(cfs3)
1149
- assert hash(cfs1) != hash(cfs2)
1150
- assert hash(cfs1) != hash(cfs2)
1151
- assert hash(cfs2) == hash(cfs3)
1152
-
1153
-
1154
- def test_str():
1155
- """Test that the str representation refers to correct class."""
1156
- from fsspec.implementations.local import LocalFileSystem
1157
-
1158
- lfs = LocalFileSystem()
1159
- cfs = CachingFileSystem(fs=lfs)
1160
- assert "CachingFileSystem" in str(cfs)
1161
-
1162
-
1163
- def test_getitems_errors(tmpdir):
1164
- tmpdir = str(tmpdir)
1165
- os.makedirs(os.path.join(tmpdir, "afolder"))
1166
- open(os.path.join(tmpdir, "afile"), "w").write("test")
1167
- open(os.path.join(tmpdir, "afolder", "anotherfile"), "w").write("test2")
1168
- m = fsspec.get_mapper(f"file://{tmpdir}")
1169
- assert m.getitems(["afile", "bfile"], on_error="omit") == {"afile": b"test"}
1170
-
1171
- # my code
1172
- m2 = fsspec.get_mapper(f"simplecache::file://{tmpdir}")
1173
- assert m2.getitems(["afile"], on_error="omit") == {"afile": b"test"} # works
1174
- assert m2.getitems(["afile", "bfile"], on_error="omit") == {
1175
- "afile": b"test"
1176
- } # throws KeyError
1177
-
1178
- with pytest.raises(KeyError):
1179
- m.getitems(["afile", "bfile"])
1180
- out = m.getitems(["afile", "bfile"], on_error="return")
1181
- assert isinstance(out["bfile"], KeyError)
1182
- m = fsspec.get_mapper(f"file://{tmpdir}", missing_exceptions=())
1183
- assert m.getitems(["afile", "bfile"], on_error="omit") == {"afile": b"test"}
1184
- with pytest.raises(FileNotFoundError):
1185
- m.getitems(["afile", "bfile"])
1186
-
1187
-
1188
- @pytest.mark.parametrize("temp_cache", [False, True])
1189
- def test_cache_dir_auto_deleted(temp_cache, tmpdir):
1190
- import gc
1191
-
1192
- source = os.path.join(tmpdir, "source")
1193
- afile = os.path.join(source, "afile")
1194
- os.mkdir(source)
1195
- open(afile, "w").write("test")
1196
-
1197
- fs = fsspec.filesystem(
1198
- "filecache",
1199
- target_protocol="file",
1200
- cache_storage="TMP" if temp_cache else os.path.join(tmpdir, "cache"),
1201
- skip_instance_cache=True, # Important to avoid fs itself being cached
1202
- )
1203
-
1204
- cache_dir = fs.storage[-1]
1205
-
1206
- # Force cache to be created
1207
- with fs.open(afile, "rb") as f:
1208
- assert f.read(5) == b"test"
1209
-
1210
- # Confirm cache exists
1211
- local = fsspec.filesystem("file")
1212
- assert local.exists(cache_dir)
1213
-
1214
- # Delete file system
1215
- del fs
1216
- gc.collect()
1217
-
1218
- # Ensure cache has been deleted, if it is temporary
1219
- if temp_cache:
1220
- assert not local.exists(cache_dir)
1221
- else:
1222
- assert local.exists(cache_dir)
1223
-
1224
-
1225
- @pytest.mark.parametrize("protocol", ["filecache", "blockcache", "simplecache"])
1226
- def test_cache_size(tmpdir, protocol):
1227
- if win and protocol == "blockcache":
1228
- pytest.skip("Windows file locking affects blockcache size tests")
1229
-
1230
- source = os.path.join(tmpdir, "source")
1231
- afile = os.path.join(source, "afile")
1232
- os.mkdir(source)
1233
- open(afile, "w").write("test")
1234
-
1235
- fs = fsspec.filesystem(protocol, target_protocol="file")
1236
- empty_cache_size = fs.cache_size()
1237
-
1238
- # Create cache
1239
- with fs.open(afile, "rb") as f:
1240
- assert f.read(5) == b"test"
1241
- single_file_cache_size = fs.cache_size()
1242
- assert single_file_cache_size > empty_cache_size
1243
-
1244
- # Remove cached file but leave cache metadata file
1245
- fs.pop_from_cache(afile)
1246
- if win and protocol == "filecache":
1247
- empty_cache_size < fs.cache_size()
1248
- elif protocol != "simplecache":
1249
- assert empty_cache_size < fs.cache_size() < single_file_cache_size
1250
- else:
1251
- # simplecache never stores metadata
1252
- assert fs.cache_size() == single_file_cache_size
1253
-
1254
- # Completely remove cache
1255
- fs.clear_cache()
1256
- if protocol != "simplecache":
1257
- assert fs.cache_size() == empty_cache_size
1258
- else:
1259
- # Whole cache directory has been deleted
1260
- assert fs.cache_size() == 0
1261
-
1262
-
1263
- def test_spurious_directory_issue1410(tmpdir):
1264
- import zipfile
1265
-
1266
- os.chdir(tmpdir)
1267
- zipfile.ZipFile("dir.zip", mode="w").open("file.txt", "w").write(b"hello")
1268
- fs = WholeFileCacheFileSystem(fs=ZipFileSystem("dir.zip"))
1269
-
1270
- assert len(os.listdir()) == 1
1271
- with fs.open("/file.txt", "rb"):
1272
- pass
1273
-
1274
- # There was a bug reported in issue #1410 in which a directory
1275
- # would be created and the next assertion would fail.
1276
- assert len(os.listdir()) == 1
1277
- assert fs._parent("/any/path") == "any" # correct for ZIP, which has no leading /
1278
-
1279
-
1280
- def test_write_transaction(tmpdir, m, monkeypatch):
1281
- called = [0]
1282
- orig = m.put
1283
-
1284
- def patched_put(*args, **kwargs):
1285
- called[0] += 1
1286
- orig(*args, **kwargs)
1287
-
1288
- monkeypatch.setattr(m, "put", patched_put)
1289
- tmpdir = str(tmpdir)
1290
- fs, _ = fsspec.core.url_to_fs("simplecache::memory://", cache_storage=tmpdir)
1291
- with fs.transaction:
1292
- fs.pipe("myfile", b"1")
1293
- fs.pipe("otherfile", b"2")
1294
- fs.pipe("deep/dir/otherfile", b"3")
1295
- with fs.open("blarh", "wb") as f:
1296
- f.write(b"ff")
1297
- assert not m.find("")
1298
-
1299
- assert fs.info("otherfile")["size"] == 1
1300
- assert fs.info("deep")["type"] == "directory"
1301
- assert fs.isdir("deep")
1302
- assert fs.ls("deep", detail=False) == ["/deep/dir"]
1303
-
1304
- assert m.cat("myfile") == b"1"
1305
- assert m.cat("otherfile") == b"2"
1306
- assert called[0] == 1 # copy was done in one go