fsspec 2024.5.0__py3-none-any.whl → 2024.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fsspec/_version.py +2 -2
- fsspec/caching.py +3 -2
- fsspec/compression.py +1 -1
- fsspec/implementations/cached.py +1 -13
- fsspec/implementations/github.py +12 -0
- fsspec/implementations/reference.py +6 -0
- fsspec/implementations/smb.py +10 -0
- fsspec/json.py +81 -0
- fsspec/registry.py +24 -18
- fsspec/spec.py +76 -34
- fsspec/utils.py +1 -1
- {fsspec-2024.5.0.dist-info → fsspec-2024.6.0.dist-info}/METADATA +11 -5
- fsspec-2024.6.0.dist-info/RECORD +55 -0
- fsspec/implementations/tests/__init__.py +0 -0
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_file_listing.yaml +0 -112
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_mkdir.yaml +0 -582
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_read_pyarrow_non_partitioned.yaml +0 -873
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_read_range.yaml +0 -458
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_read_range_chunked.yaml +0 -1355
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_write_and_read.yaml +0 -795
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_write_pyarrow_non_partitioned.yaml +0 -613
- fsspec/implementations/tests/conftest.py +0 -39
- fsspec/implementations/tests/local/__init__.py +0 -0
- fsspec/implementations/tests/local/local_fixtures.py +0 -18
- fsspec/implementations/tests/local/local_test.py +0 -14
- fsspec/implementations/tests/memory/__init__.py +0 -0
- fsspec/implementations/tests/memory/memory_fixtures.py +0 -27
- fsspec/implementations/tests/memory/memory_test.py +0 -14
- fsspec/implementations/tests/out.zip +0 -0
- fsspec/implementations/tests/test_archive.py +0 -382
- fsspec/implementations/tests/test_arrow.py +0 -259
- fsspec/implementations/tests/test_cached.py +0 -1306
- fsspec/implementations/tests/test_common.py +0 -35
- fsspec/implementations/tests/test_dask.py +0 -29
- fsspec/implementations/tests/test_data.py +0 -20
- fsspec/implementations/tests/test_dbfs.py +0 -268
- fsspec/implementations/tests/test_dirfs.py +0 -588
- fsspec/implementations/tests/test_ftp.py +0 -178
- fsspec/implementations/tests/test_git.py +0 -76
- fsspec/implementations/tests/test_http.py +0 -577
- fsspec/implementations/tests/test_jupyter.py +0 -57
- fsspec/implementations/tests/test_libarchive.py +0 -33
- fsspec/implementations/tests/test_local.py +0 -1285
- fsspec/implementations/tests/test_memory.py +0 -382
- fsspec/implementations/tests/test_reference.py +0 -720
- fsspec/implementations/tests/test_sftp.py +0 -233
- fsspec/implementations/tests/test_smb.py +0 -139
- fsspec/implementations/tests/test_tar.py +0 -243
- fsspec/implementations/tests/test_webhdfs.py +0 -197
- fsspec/implementations/tests/test_zip.py +0 -134
- fsspec/tests/__init__.py +0 -0
- fsspec/tests/conftest.py +0 -188
- fsspec/tests/data/listing.html +0 -1
- fsspec/tests/test_api.py +0 -498
- fsspec/tests/test_async.py +0 -230
- fsspec/tests/test_caches.py +0 -255
- fsspec/tests/test_callbacks.py +0 -89
- fsspec/tests/test_compression.py +0 -164
- fsspec/tests/test_config.py +0 -129
- fsspec/tests/test_core.py +0 -466
- fsspec/tests/test_downstream.py +0 -40
- fsspec/tests/test_file.py +0 -200
- fsspec/tests/test_fuse.py +0 -147
- fsspec/tests/test_generic.py +0 -90
- fsspec/tests/test_gui.py +0 -23
- fsspec/tests/test_mapping.py +0 -228
- fsspec/tests/test_parquet.py +0 -140
- fsspec/tests/test_registry.py +0 -134
- fsspec/tests/test_spec.py +0 -1167
- fsspec/tests/test_utils.py +0 -478
- fsspec-2024.5.0.dist-info/RECORD +0 -111
- {fsspec-2024.5.0.dist-info → fsspec-2024.6.0.dist-info}/WHEEL +0 -0
- {fsspec-2024.5.0.dist-info → fsspec-2024.6.0.dist-info}/licenses/LICENSE +0 -0
fsspec/tests/test_fuse.py
DELETED
|
@@ -1,147 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import subprocess
|
|
3
|
-
import time
|
|
4
|
-
from multiprocessing import Process
|
|
5
|
-
|
|
6
|
-
import pytest
|
|
7
|
-
|
|
8
|
-
try:
|
|
9
|
-
pytest.importorskip("fuse") # noqa: E402
|
|
10
|
-
except OSError:
|
|
11
|
-
# can succeed in importing fuse, but fail to load so
|
|
12
|
-
pytest.importorskip("nonexistent") # noqa: E402
|
|
13
|
-
|
|
14
|
-
from fsspec.fuse import main, run
|
|
15
|
-
from fsspec.implementations.memory import MemoryFileSystem
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
def host_fuse(mountdir):
|
|
19
|
-
fs = MemoryFileSystem()
|
|
20
|
-
fs.touch("/mounted/testfile")
|
|
21
|
-
run(fs, "/mounted/", mountdir)
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
def test_basic(tmpdir, capfd):
|
|
25
|
-
mountdir = str(tmpdir.mkdir("mount"))
|
|
26
|
-
|
|
27
|
-
fuse_process = Process(target=host_fuse, args=(str(mountdir),))
|
|
28
|
-
fuse_process.start()
|
|
29
|
-
|
|
30
|
-
try:
|
|
31
|
-
timeout = 10
|
|
32
|
-
while True:
|
|
33
|
-
try:
|
|
34
|
-
# can fail with device not ready while waiting for fuse
|
|
35
|
-
if "testfile" in os.listdir(mountdir):
|
|
36
|
-
break
|
|
37
|
-
except Exception:
|
|
38
|
-
pass
|
|
39
|
-
timeout -= 1
|
|
40
|
-
time.sleep(1)
|
|
41
|
-
if not timeout > 0:
|
|
42
|
-
import pdb
|
|
43
|
-
|
|
44
|
-
pdb.set_trace()
|
|
45
|
-
pytest.skip(msg="fuse didn't come live")
|
|
46
|
-
|
|
47
|
-
fn = os.path.join(mountdir, "test")
|
|
48
|
-
with open(fn, "wb") as f:
|
|
49
|
-
f.write(b"data")
|
|
50
|
-
|
|
51
|
-
with open(fn) as f:
|
|
52
|
-
assert f.read() == "data"
|
|
53
|
-
|
|
54
|
-
os.remove(fn)
|
|
55
|
-
|
|
56
|
-
os.mkdir(fn)
|
|
57
|
-
assert os.listdir(fn) == []
|
|
58
|
-
|
|
59
|
-
os.mkdir(fn + "/inner")
|
|
60
|
-
|
|
61
|
-
with pytest.raises(OSError):
|
|
62
|
-
os.rmdir(fn)
|
|
63
|
-
|
|
64
|
-
captured = capfd.readouterr()
|
|
65
|
-
assert "Traceback" not in captured.out
|
|
66
|
-
assert "Traceback" not in captured.err
|
|
67
|
-
|
|
68
|
-
os.rmdir(fn + "/inner")
|
|
69
|
-
os.rmdir(fn)
|
|
70
|
-
finally:
|
|
71
|
-
fuse_process.terminate()
|
|
72
|
-
fuse_process.join(timeout=10)
|
|
73
|
-
if fuse_process.is_alive():
|
|
74
|
-
fuse_process.kill()
|
|
75
|
-
fuse_process.join()
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
def host_mount_local(source_dir, mount_dir, debug_log):
|
|
79
|
-
main(["local", source_dir, mount_dir, "-l", debug_log, "--ready-file"])
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
@pytest.fixture()
|
|
83
|
-
def mount_local(tmpdir):
|
|
84
|
-
source_dir = tmpdir.mkdir("source")
|
|
85
|
-
mount_dir = tmpdir.mkdir("local")
|
|
86
|
-
debug_log = tmpdir / "debug.log"
|
|
87
|
-
fuse_process = Process(
|
|
88
|
-
target=host_mount_local, args=(str(source_dir), str(mount_dir), str(debug_log))
|
|
89
|
-
)
|
|
90
|
-
fuse_process.start()
|
|
91
|
-
ready_file = mount_dir / ".fuse_ready"
|
|
92
|
-
for _ in range(20):
|
|
93
|
-
if ready_file.exists() and open(ready_file).read() == b"ready":
|
|
94
|
-
break
|
|
95
|
-
time.sleep(0.1)
|
|
96
|
-
try:
|
|
97
|
-
yield (source_dir, mount_dir)
|
|
98
|
-
finally:
|
|
99
|
-
fuse_process.terminate()
|
|
100
|
-
fuse_process.join(timeout=10)
|
|
101
|
-
if fuse_process.is_alive():
|
|
102
|
-
fuse_process.kill()
|
|
103
|
-
fuse_process.join()
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
def test_mount(mount_local):
|
|
107
|
-
source_dir, mount_dir = mount_local
|
|
108
|
-
assert os.listdir(mount_dir) == []
|
|
109
|
-
assert os.listdir(source_dir) == []
|
|
110
|
-
|
|
111
|
-
mount_dir.mkdir("a")
|
|
112
|
-
|
|
113
|
-
assert os.listdir(mount_dir) == ["a"]
|
|
114
|
-
assert os.listdir(source_dir) == ["a"]
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
def test_chmod(mount_local):
|
|
118
|
-
source_dir, mount_dir = mount_local
|
|
119
|
-
open(mount_dir / "text", "w").write("test")
|
|
120
|
-
assert os.listdir(source_dir) == ["text"]
|
|
121
|
-
|
|
122
|
-
cp = subprocess.run(
|
|
123
|
-
["cp", str(mount_dir / "text"), str(mount_dir / "new")],
|
|
124
|
-
stdout=subprocess.PIPE,
|
|
125
|
-
stderr=subprocess.PIPE,
|
|
126
|
-
check=False,
|
|
127
|
-
)
|
|
128
|
-
|
|
129
|
-
assert cp.stderr == b""
|
|
130
|
-
assert cp.stdout == b""
|
|
131
|
-
assert set(os.listdir(source_dir)) == {"text", "new"}
|
|
132
|
-
assert open(mount_dir / "new").read() == "test"
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
def test_seek_rw(mount_local):
|
|
136
|
-
source_dir, mount_dir = mount_local
|
|
137
|
-
fh = open(mount_dir / "text", "w")
|
|
138
|
-
fh.write("teST")
|
|
139
|
-
fh.seek(2)
|
|
140
|
-
fh.write("st")
|
|
141
|
-
fh.close()
|
|
142
|
-
|
|
143
|
-
fh = open(mount_dir / "text", "r")
|
|
144
|
-
assert fh.read() == "test"
|
|
145
|
-
fh.seek(2)
|
|
146
|
-
assert fh.read() == "st"
|
|
147
|
-
fh.close()
|
fsspec/tests/test_generic.py
DELETED
|
@@ -1,90 +0,0 @@
|
|
|
1
|
-
import pytest
|
|
2
|
-
|
|
3
|
-
import fsspec
|
|
4
|
-
from fsspec.tests.conftest import data, server # noqa: F401
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
def test_remote_async_ops(server):
|
|
8
|
-
fsspec.filesystem("http", headers={"give_length": "true", "head_ok": "true"})
|
|
9
|
-
fs = fsspec.filesystem("generic", default_method="current")
|
|
10
|
-
out = fs.info(server + "/index/realfile")
|
|
11
|
-
assert out["size"] == len(data)
|
|
12
|
-
assert out["type"] == "file"
|
|
13
|
-
assert fs.isfile(server + "/index/realfile") # this method from superclass
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
def test_touch_rm(m):
|
|
17
|
-
m.touch("afile")
|
|
18
|
-
m.touch("dir/afile")
|
|
19
|
-
|
|
20
|
-
fs = fsspec.filesystem("generic", default_method="current")
|
|
21
|
-
fs.rm("memory://afile")
|
|
22
|
-
assert not m.exists("afile")
|
|
23
|
-
|
|
24
|
-
fs.rm("memory://dir", recursive=True)
|
|
25
|
-
assert not m.exists("dir/afile")
|
|
26
|
-
assert not m.exists("dir")
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
def test_cp_async_to_sync(server, m):
|
|
30
|
-
fsspec.filesystem("http", headers={"give_length": "true", "head_ok": "true"})
|
|
31
|
-
fs = fsspec.filesystem("generic", default_method="current")
|
|
32
|
-
fs.cp([server + "/index/realfile"], ["memory://realfile"])
|
|
33
|
-
assert m.cat("realfile") == data
|
|
34
|
-
|
|
35
|
-
fs.rm("memory://realfile")
|
|
36
|
-
assert not m.exists("realfile")
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
def test_pipe_cat_sync(m):
|
|
40
|
-
fs = fsspec.filesystem("generic", default_method="current")
|
|
41
|
-
fs.pipe("memory://afile", b"data")
|
|
42
|
-
assert fs.cat("memory://afile") == b"data"
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
def test_cat_async(server):
|
|
46
|
-
fsspec.filesystem("http", headers={"give_length": "true", "head_ok": "true"})
|
|
47
|
-
fs = fsspec.filesystem("generic", default_method="current")
|
|
48
|
-
assert fs.cat(server + "/index/realfile") == data
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
def test_rsync(tmpdir, m):
|
|
52
|
-
from fsspec.generic import GenericFileSystem, rsync
|
|
53
|
-
|
|
54
|
-
fs = GenericFileSystem()
|
|
55
|
-
fs.pipe("memory:///deep/path/afile", b"data1")
|
|
56
|
-
fs.pipe("memory:///deep/afile", b"data2")
|
|
57
|
-
|
|
58
|
-
with pytest.raises(ValueError):
|
|
59
|
-
rsync("memory:///deep/afile", f"file://{tmpdir}")
|
|
60
|
-
rsync("memory://", f"file://{tmpdir}")
|
|
61
|
-
|
|
62
|
-
allfiles = fs.find(f"file://{tmpdir}", withdirs=True, detail=True)
|
|
63
|
-
pos_tmpdir = fsspec.implementations.local.make_path_posix(str(tmpdir)) # for WIN
|
|
64
|
-
assert set(allfiles) == {
|
|
65
|
-
f"file://{pos_tmpdir}{_}"
|
|
66
|
-
for _ in [
|
|
67
|
-
"",
|
|
68
|
-
"/deep",
|
|
69
|
-
"/deep/path",
|
|
70
|
-
"/deep/path/afile",
|
|
71
|
-
"/deep/afile",
|
|
72
|
-
]
|
|
73
|
-
}
|
|
74
|
-
fs.rm("memory:///deep/afile")
|
|
75
|
-
rsync("memory://", f"file://{tmpdir}", delete_missing=True)
|
|
76
|
-
allfiles2 = fs.find(f"file://{tmpdir}", withdirs=True, detail=True)
|
|
77
|
-
assert set(allfiles2) == {
|
|
78
|
-
f"file://{pos_tmpdir}{_}"
|
|
79
|
-
for _ in [
|
|
80
|
-
"",
|
|
81
|
-
"/deep",
|
|
82
|
-
"/deep/path",
|
|
83
|
-
"/deep/path/afile",
|
|
84
|
-
]
|
|
85
|
-
}
|
|
86
|
-
# the file was not updated, since size was correct
|
|
87
|
-
assert (
|
|
88
|
-
allfiles[f"file://{pos_tmpdir}/deep/path/afile"]
|
|
89
|
-
== allfiles2[f"file://{pos_tmpdir}/deep/path/afile"]
|
|
90
|
-
)
|
fsspec/tests/test_gui.py
DELETED
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
import pytest
|
|
2
|
-
|
|
3
|
-
panel = pytest.importorskip("panel")
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def test_basic():
|
|
7
|
-
import fsspec.gui
|
|
8
|
-
|
|
9
|
-
gui = fsspec.gui.FileSelector()
|
|
10
|
-
assert "url" in str(gui.panel)
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
def test_kwargs(tmpdir):
|
|
14
|
-
"""confirm kwargs are passed to the filesystem instance"""
|
|
15
|
-
import fsspec.gui
|
|
16
|
-
|
|
17
|
-
gui = fsspec.gui.FileSelector(f"file://{tmpdir}", kwargs="{'auto_mkdir': True}")
|
|
18
|
-
|
|
19
|
-
assert gui.fs.auto_mkdir
|
|
20
|
-
|
|
21
|
-
gui = fsspec.gui.FileSelector(f"file://{tmpdir}", kwargs={"auto_mkdir": True})
|
|
22
|
-
|
|
23
|
-
assert gui.fs.auto_mkdir
|
fsspec/tests/test_mapping.py
DELETED
|
@@ -1,228 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import pickle
|
|
3
|
-
import platform
|
|
4
|
-
import sys
|
|
5
|
-
import uuid
|
|
6
|
-
|
|
7
|
-
import pytest
|
|
8
|
-
|
|
9
|
-
import fsspec
|
|
10
|
-
from fsspec.implementations.local import LocalFileSystem
|
|
11
|
-
from fsspec.implementations.memory import MemoryFileSystem
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
def test_mapping_prefix(tmpdir):
|
|
15
|
-
tmpdir = str(tmpdir)
|
|
16
|
-
os.makedirs(os.path.join(tmpdir, "afolder"))
|
|
17
|
-
open(os.path.join(tmpdir, "afile"), "w").write("test")
|
|
18
|
-
open(os.path.join(tmpdir, "afolder", "anotherfile"), "w").write("test2")
|
|
19
|
-
|
|
20
|
-
m = fsspec.get_mapper(f"file://{tmpdir}")
|
|
21
|
-
assert "afile" in m
|
|
22
|
-
assert m["afolder/anotherfile"] == b"test2"
|
|
23
|
-
|
|
24
|
-
fs = fsspec.filesystem("file")
|
|
25
|
-
m2 = fs.get_mapper(tmpdir)
|
|
26
|
-
m3 = fs.get_mapper(f"file://{tmpdir}")
|
|
27
|
-
|
|
28
|
-
assert m == m2 == m3
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
def test_getitems_errors(tmpdir):
|
|
32
|
-
tmpdir = str(tmpdir)
|
|
33
|
-
os.makedirs(os.path.join(tmpdir, "afolder"))
|
|
34
|
-
open(os.path.join(tmpdir, "afile"), "w").write("test")
|
|
35
|
-
open(os.path.join(tmpdir, "afolder", "anotherfile"), "w").write("test2")
|
|
36
|
-
m = fsspec.get_mapper(f"file://{tmpdir}")
|
|
37
|
-
assert m.getitems(["afile", "bfile"], on_error="omit") == {"afile": b"test"}
|
|
38
|
-
with pytest.raises(KeyError):
|
|
39
|
-
m.getitems(["afile", "bfile"])
|
|
40
|
-
out = m.getitems(["afile", "bfile"], on_error="return")
|
|
41
|
-
assert isinstance(out["bfile"], KeyError)
|
|
42
|
-
m = fsspec.get_mapper(f"file://{tmpdir}", missing_exceptions=())
|
|
43
|
-
assert m.getitems(["afile", "bfile"], on_error="omit") == {"afile": b"test"}
|
|
44
|
-
with pytest.raises(FileNotFoundError):
|
|
45
|
-
m.getitems(["afile", "bfile"])
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
def test_ops():
|
|
49
|
-
MemoryFileSystem.store.clear()
|
|
50
|
-
m = fsspec.get_mapper("memory://")
|
|
51
|
-
assert not m
|
|
52
|
-
assert list(m) == []
|
|
53
|
-
|
|
54
|
-
with pytest.raises(KeyError):
|
|
55
|
-
m["hi"]
|
|
56
|
-
|
|
57
|
-
assert m.pop("key", 0) == 0
|
|
58
|
-
|
|
59
|
-
m["key0"] = b"data"
|
|
60
|
-
assert list(m) == ["key0"]
|
|
61
|
-
assert m["key0"] == b"data"
|
|
62
|
-
|
|
63
|
-
m.clear()
|
|
64
|
-
|
|
65
|
-
assert list(m) == []
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
def test_pickle():
|
|
69
|
-
m = fsspec.get_mapper("memory://")
|
|
70
|
-
assert isinstance(m.fs, MemoryFileSystem)
|
|
71
|
-
m["key"] = b"data"
|
|
72
|
-
m2 = pickle.loads(pickle.dumps(m))
|
|
73
|
-
assert list(m) == list(m2)
|
|
74
|
-
assert m.missing_exceptions == m2.missing_exceptions
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
def test_keys_view():
|
|
78
|
-
# https://github.com/fsspec/filesystem_spec/issues/186
|
|
79
|
-
m = fsspec.get_mapper("memory://")
|
|
80
|
-
m["key"] = b"data"
|
|
81
|
-
|
|
82
|
-
keys = m.keys()
|
|
83
|
-
assert len(keys) == 1
|
|
84
|
-
# check that we don't consume the keys
|
|
85
|
-
assert len(keys) == 1
|
|
86
|
-
m.clear()
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
def test_multi():
|
|
90
|
-
m = fsspec.get_mapper("memory:///")
|
|
91
|
-
data = {"a": b"data1", "b": b"data2"}
|
|
92
|
-
m.setitems(data)
|
|
93
|
-
|
|
94
|
-
assert m.getitems(list(data)) == data
|
|
95
|
-
m.delitems(list(data))
|
|
96
|
-
assert not list(m)
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
def test_setitem_types():
|
|
100
|
-
import array
|
|
101
|
-
|
|
102
|
-
m = fsspec.get_mapper("memory://")
|
|
103
|
-
m["a"] = array.array("i", [1])
|
|
104
|
-
if sys.byteorder == "little":
|
|
105
|
-
assert m["a"] == b"\x01\x00\x00\x00"
|
|
106
|
-
else:
|
|
107
|
-
assert m["a"] == b"\x00\x00\x00\x01"
|
|
108
|
-
m["b"] = bytearray(b"123")
|
|
109
|
-
assert m["b"] == b"123"
|
|
110
|
-
m.setitems({"c": array.array("i", [1]), "d": bytearray(b"123")})
|
|
111
|
-
if sys.byteorder == "little":
|
|
112
|
-
assert m["c"] == b"\x01\x00\x00\x00"
|
|
113
|
-
else:
|
|
114
|
-
assert m["c"] == b"\x00\x00\x00\x01"
|
|
115
|
-
assert m["d"] == b"123"
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
def test_setitem_numpy():
|
|
119
|
-
m = fsspec.get_mapper("memory://")
|
|
120
|
-
np = pytest.importorskip("numpy")
|
|
121
|
-
m["c"] = np.array(1, dtype="<i4") # scalar
|
|
122
|
-
assert m["c"] == b"\x01\x00\x00\x00"
|
|
123
|
-
m["c"] = np.array([1, 2], dtype="<i4") # array
|
|
124
|
-
assert m["c"] == b"\x01\x00\x00\x00\x02\x00\x00\x00"
|
|
125
|
-
m["c"] = np.array(
|
|
126
|
-
np.datetime64("2000-01-01T23:59:59.999999999"), dtype="<M8[ns]"
|
|
127
|
-
) # datetime64 scalar
|
|
128
|
-
assert m["c"] == b"\xff\xff\x91\xe3c\x9b#\r"
|
|
129
|
-
m["c"] = np.array(
|
|
130
|
-
[
|
|
131
|
-
np.datetime64("1900-01-01T23:59:59.999999999"),
|
|
132
|
-
np.datetime64("2000-01-01T23:59:59.999999999"),
|
|
133
|
-
],
|
|
134
|
-
dtype="<M8[ns]",
|
|
135
|
-
) # datetime64 array
|
|
136
|
-
assert m["c"] == b"\xff\xff}p\xf8fX\xe1\xff\xff\x91\xe3c\x9b#\r"
|
|
137
|
-
m["c"] = np.array(
|
|
138
|
-
np.timedelta64(3155673612345678901, "ns"), dtype="<m8[ns]"
|
|
139
|
-
) # timedelta64 scalar
|
|
140
|
-
assert m["c"] == b"5\x1c\xf0Rn4\xcb+"
|
|
141
|
-
m["c"] = np.array(
|
|
142
|
-
[
|
|
143
|
-
np.timedelta64(450810516049382700, "ns"),
|
|
144
|
-
np.timedelta64(3155673612345678901, "ns"),
|
|
145
|
-
],
|
|
146
|
-
dtype="<m8[ns]",
|
|
147
|
-
) # timedelta64 scalar
|
|
148
|
-
assert m["c"] == b',M"\x9e\xc6\x99A\x065\x1c\xf0Rn4\xcb+'
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
def test_empty_url():
|
|
152
|
-
m = fsspec.get_mapper()
|
|
153
|
-
assert isinstance(m.fs, LocalFileSystem)
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
def test_fsmap_access_with_root_prefix(tmp_path):
|
|
157
|
-
# "/a" and "a" are the same for LocalFileSystem
|
|
158
|
-
tmp_path.joinpath("a").write_bytes(b"data")
|
|
159
|
-
m = fsspec.get_mapper(f"file://{tmp_path}")
|
|
160
|
-
assert m["/a"] == m["a"] == b"data"
|
|
161
|
-
|
|
162
|
-
# "/a" and "a" differ for MemoryFileSystem
|
|
163
|
-
m = fsspec.get_mapper(f"memory://{uuid.uuid4()}")
|
|
164
|
-
m["/a"] = b"data"
|
|
165
|
-
|
|
166
|
-
assert m["/a"] == b"data"
|
|
167
|
-
with pytest.raises(KeyError):
|
|
168
|
-
_ = m["a"]
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
@pytest.mark.parametrize(
|
|
172
|
-
"key",
|
|
173
|
-
[
|
|
174
|
-
pytest.param(b"k", id="bytes"),
|
|
175
|
-
pytest.param(1234, id="int"),
|
|
176
|
-
pytest.param((1,), id="tuple"),
|
|
177
|
-
pytest.param([""], id="list"),
|
|
178
|
-
],
|
|
179
|
-
)
|
|
180
|
-
def test_fsmap_non_str_keys(key):
|
|
181
|
-
m = fsspec.get_mapper()
|
|
182
|
-
|
|
183
|
-
# Once the deprecation period passes
|
|
184
|
-
# FSMap.__getitem__ should raise TypeError for non-str keys
|
|
185
|
-
# with pytest.raises(TypeError):
|
|
186
|
-
# _ = m[key]
|
|
187
|
-
|
|
188
|
-
with pytest.warns(DeprecationWarning):
|
|
189
|
-
with pytest.raises(KeyError):
|
|
190
|
-
_ = m[key]
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
def test_fsmap_error_on_protocol_keys():
|
|
194
|
-
root = uuid.uuid4()
|
|
195
|
-
m = fsspec.get_mapper(f"memory://{root}", create=True)
|
|
196
|
-
m["a"] = b"data"
|
|
197
|
-
|
|
198
|
-
assert m["a"] == b"data"
|
|
199
|
-
with pytest.raises(KeyError):
|
|
200
|
-
_ = m[f"memory://{root}/a"]
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
def test_fsmap_access_with_suffix(tmp_path):
|
|
204
|
-
tmp_path.joinpath("b").mkdir()
|
|
205
|
-
tmp_path.joinpath("b", "a").write_bytes(b"data")
|
|
206
|
-
if platform.system() == "Windows":
|
|
207
|
-
# on Windows opening a directory will raise PermissionError
|
|
208
|
-
# see: https://bugs.python.org/issue43095
|
|
209
|
-
missing_exceptions = (
|
|
210
|
-
FileNotFoundError,
|
|
211
|
-
IsADirectoryError,
|
|
212
|
-
NotADirectoryError,
|
|
213
|
-
PermissionError,
|
|
214
|
-
)
|
|
215
|
-
else:
|
|
216
|
-
missing_exceptions = None
|
|
217
|
-
m = fsspec.get_mapper(f"file://{tmp_path}", missing_exceptions=missing_exceptions)
|
|
218
|
-
with pytest.raises(KeyError):
|
|
219
|
-
_ = m["b/"]
|
|
220
|
-
assert m["b/a/"] == b"data"
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
def test_fsmap_dirfs():
|
|
224
|
-
m = fsspec.get_mapper("memory://")
|
|
225
|
-
|
|
226
|
-
fs = m.dirfs
|
|
227
|
-
assert isinstance(fs, fsspec.implementations.dirfs.DirFileSystem)
|
|
228
|
-
assert fs.path == m.root
|
fsspec/tests/test_parquet.py
DELETED
|
@@ -1,140 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
|
|
3
|
-
import pytest
|
|
4
|
-
|
|
5
|
-
try:
|
|
6
|
-
import fastparquet
|
|
7
|
-
except ImportError:
|
|
8
|
-
fastparquet = None
|
|
9
|
-
try:
|
|
10
|
-
import pyarrow.parquet as pq
|
|
11
|
-
except ImportError:
|
|
12
|
-
pq = None
|
|
13
|
-
|
|
14
|
-
from fsspec.core import url_to_fs
|
|
15
|
-
from fsspec.parquet import _get_parquet_byte_ranges, open_parquet_file
|
|
16
|
-
|
|
17
|
-
# Define `engine` fixture
|
|
18
|
-
FASTPARQUET_MARK = pytest.mark.skipif(not fastparquet, reason="fastparquet not found")
|
|
19
|
-
PYARROW_MARK = pytest.mark.skipif(not pq, reason="pyarrow not found")
|
|
20
|
-
ANY_ENGINE_MARK = pytest.mark.skipif(
|
|
21
|
-
not (fastparquet or pq),
|
|
22
|
-
reason="No parquet engine (fastparquet or pyarrow) found",
|
|
23
|
-
)
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
@pytest.fixture(
|
|
27
|
-
params=[
|
|
28
|
-
pytest.param("fastparquet", marks=FASTPARQUET_MARK),
|
|
29
|
-
pytest.param("pyarrow", marks=PYARROW_MARK),
|
|
30
|
-
pytest.param("auto", marks=ANY_ENGINE_MARK),
|
|
31
|
-
]
|
|
32
|
-
)
|
|
33
|
-
def engine(request):
|
|
34
|
-
return request.param
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
@pytest.mark.parametrize("columns", [None, ["x"], ["x", "y"], ["z"]])
|
|
38
|
-
@pytest.mark.parametrize("max_gap", [0, 64])
|
|
39
|
-
@pytest.mark.parametrize("max_block", [64, 256_000_000])
|
|
40
|
-
@pytest.mark.parametrize("footer_sample_size", [8, 1_000])
|
|
41
|
-
@pytest.mark.parametrize("range_index", [True, False])
|
|
42
|
-
def test_open_parquet_file(
|
|
43
|
-
tmpdir, engine, columns, max_gap, max_block, footer_sample_size, range_index
|
|
44
|
-
):
|
|
45
|
-
# Pandas required for this test
|
|
46
|
-
pd = pytest.importorskip("pandas")
|
|
47
|
-
|
|
48
|
-
# Write out a simple DataFrame
|
|
49
|
-
path = os.path.join(str(tmpdir), "test.parquet")
|
|
50
|
-
nrows = 40
|
|
51
|
-
df = pd.DataFrame(
|
|
52
|
-
{
|
|
53
|
-
"x": [i * 7 % 5 for i in range(nrows)],
|
|
54
|
-
"y": [[0, i] for i in range(nrows)], # list
|
|
55
|
-
"z": [{"a": i, "b": "cat"} for i in range(nrows)], # struct
|
|
56
|
-
},
|
|
57
|
-
index=pd.Index([10 * i for i in range(nrows)], name="myindex"),
|
|
58
|
-
)
|
|
59
|
-
if range_index:
|
|
60
|
-
df = df.reset_index(drop=True)
|
|
61
|
-
df.index.name = "myindex"
|
|
62
|
-
df.to_parquet(path)
|
|
63
|
-
|
|
64
|
-
# "Traditional read" (without `open_parquet_file`)
|
|
65
|
-
expect = pd.read_parquet(path, columns=columns)
|
|
66
|
-
|
|
67
|
-
# Use `_get_parquet_byte_ranges` to re-write a
|
|
68
|
-
# place-holder file with all bytes NOT required
|
|
69
|
-
# to read `columns` set to b"0". The purpose of
|
|
70
|
-
# this step is to make sure the read will fail
|
|
71
|
-
# if the correct bytes have not been accurately
|
|
72
|
-
# selected by `_get_parquet_byte_ranges`. If this
|
|
73
|
-
# test were reading from remote storage, we would
|
|
74
|
-
# not need this logic to capture errors.
|
|
75
|
-
fs = url_to_fs(path)[0]
|
|
76
|
-
data = _get_parquet_byte_ranges(
|
|
77
|
-
[path],
|
|
78
|
-
fs,
|
|
79
|
-
columns=columns,
|
|
80
|
-
engine=engine,
|
|
81
|
-
max_gap=max_gap,
|
|
82
|
-
max_block=max_block,
|
|
83
|
-
footer_sample_size=footer_sample_size,
|
|
84
|
-
)[path]
|
|
85
|
-
file_size = fs.size(path)
|
|
86
|
-
with open(path, "wb") as f:
|
|
87
|
-
f.write(b"0" * file_size)
|
|
88
|
-
|
|
89
|
-
if footer_sample_size == 8:
|
|
90
|
-
# We know 8 bytes is too small to include
|
|
91
|
-
# the footer metadata, so there should NOT
|
|
92
|
-
# be a key for the last 8 bytes of the file
|
|
93
|
-
bad_key = (file_size - 8, file_size)
|
|
94
|
-
assert bad_key not in data.keys()
|
|
95
|
-
|
|
96
|
-
for (start, stop), byte_data in data.items():
|
|
97
|
-
f.seek(start)
|
|
98
|
-
f.write(byte_data)
|
|
99
|
-
|
|
100
|
-
# Read back the modified file with `open_parquet_file`
|
|
101
|
-
with open_parquet_file(
|
|
102
|
-
path,
|
|
103
|
-
columns=columns,
|
|
104
|
-
engine=engine,
|
|
105
|
-
max_gap=max_gap,
|
|
106
|
-
max_block=max_block,
|
|
107
|
-
footer_sample_size=footer_sample_size,
|
|
108
|
-
) as f:
|
|
109
|
-
result = pd.read_parquet(f, columns=columns)
|
|
110
|
-
|
|
111
|
-
# Check that `result` matches `expect`
|
|
112
|
-
pd.testing.assert_frame_equal(expect, result)
|
|
113
|
-
|
|
114
|
-
# Try passing metadata
|
|
115
|
-
if engine == "fastparquet":
|
|
116
|
-
# Should work fine for "fastparquet"
|
|
117
|
-
pf = fastparquet.ParquetFile(path)
|
|
118
|
-
with open_parquet_file(
|
|
119
|
-
path,
|
|
120
|
-
metadata=pf,
|
|
121
|
-
columns=columns,
|
|
122
|
-
engine=engine,
|
|
123
|
-
max_gap=max_gap,
|
|
124
|
-
max_block=max_block,
|
|
125
|
-
footer_sample_size=footer_sample_size,
|
|
126
|
-
) as f:
|
|
127
|
-
result = pd.read_parquet(f, columns=columns)
|
|
128
|
-
pd.testing.assert_frame_equal(expect, result)
|
|
129
|
-
elif engine == "pyarrow":
|
|
130
|
-
# Should raise ValueError for "pyarrow"
|
|
131
|
-
with pytest.raises(ValueError):
|
|
132
|
-
open_parquet_file(
|
|
133
|
-
path,
|
|
134
|
-
metadata=["Not-None"],
|
|
135
|
-
columns=columns,
|
|
136
|
-
engine=engine,
|
|
137
|
-
max_gap=max_gap,
|
|
138
|
-
max_block=max_block,
|
|
139
|
-
footer_sample_size=footer_sample_size,
|
|
140
|
-
)
|