PyPI - fsspec - Versions diffs - 2024.3.0__py3-none-any.whl → 2024.5.0__py3-none-any.whl - Mend

fsspec 2024.3.0py3-none-any.whl → 2024.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (87) hide show

fsspec/__init__.py +2 -3
fsspec/_version.py +14 -19
fsspec/caching.py +83 -14
fsspec/compression.py +1 -0
fsspec/core.py +31 -6
fsspec/exceptions.py +1 -0
fsspec/generic.py +1 -1
fsspec/gui.py +1 -1
fsspec/implementations/arrow.py +0 -2
fsspec/implementations/cache_mapper.py +1 -2
fsspec/implementations/cache_metadata.py +7 -7
fsspec/implementations/dirfs.py +2 -2
fsspec/implementations/http.py +9 -9
fsspec/implementations/local.py +97 -48
fsspec/implementations/memory.py +9 -0
fsspec/implementations/smb.py +3 -1
fsspec/implementations/tests/__init__.py +0 -0
fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_file_listing.yaml +112 -0
fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_mkdir.yaml +582 -0
fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_read_pyarrow_non_partitioned.yaml +873 -0
fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_read_range.yaml +458 -0
fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_read_range_chunked.yaml +1355 -0
fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_write_and_read.yaml +795 -0
fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_write_pyarrow_non_partitioned.yaml +613 -0
fsspec/implementations/tests/conftest.py +39 -0
fsspec/implementations/tests/local/__init__.py +0 -0
fsspec/implementations/tests/local/local_fixtures.py +18 -0
fsspec/implementations/tests/local/local_test.py +14 -0
fsspec/implementations/tests/memory/__init__.py +0 -0
fsspec/implementations/tests/memory/memory_fixtures.py +27 -0
fsspec/implementations/tests/memory/memory_test.py +14 -0
fsspec/implementations/tests/out.zip +0 -0
fsspec/implementations/tests/test_archive.py +382 -0
fsspec/implementations/tests/test_arrow.py +259 -0
fsspec/implementations/tests/test_cached.py +1306 -0
fsspec/implementations/tests/test_common.py +35 -0
fsspec/implementations/tests/test_dask.py +29 -0
fsspec/implementations/tests/test_data.py +20 -0
fsspec/implementations/tests/test_dbfs.py +268 -0
fsspec/implementations/tests/test_dirfs.py +588 -0
fsspec/implementations/tests/test_ftp.py +178 -0
fsspec/implementations/tests/test_git.py +76 -0
fsspec/implementations/tests/test_http.py +577 -0
fsspec/implementations/tests/test_jupyter.py +57 -0
fsspec/implementations/tests/test_libarchive.py +33 -0
fsspec/implementations/tests/test_local.py +1285 -0
fsspec/implementations/tests/test_memory.py +382 -0
fsspec/implementations/tests/test_reference.py +720 -0
fsspec/implementations/tests/test_sftp.py +233 -0
fsspec/implementations/tests/test_smb.py +139 -0
fsspec/implementations/tests/test_tar.py +243 -0
fsspec/implementations/tests/test_webhdfs.py +197 -0
fsspec/implementations/tests/test_zip.py +134 -0
fsspec/implementations/webhdfs.py +1 -3
fsspec/mapping.py +2 -2
fsspec/parquet.py +0 -8
fsspec/registry.py +4 -0
fsspec/spec.py +21 -4
fsspec/tests/__init__.py +0 -0
fsspec/tests/abstract/mv.py +57 -0
fsspec/tests/conftest.py +188 -0
fsspec/tests/data/listing.html +1 -0
fsspec/tests/test_api.py +498 -0
fsspec/tests/test_async.py +230 -0
fsspec/tests/test_caches.py +255 -0
fsspec/tests/test_callbacks.py +89 -0
fsspec/tests/test_compression.py +164 -0
fsspec/tests/test_config.py +129 -0
fsspec/tests/test_core.py +466 -0
fsspec/tests/test_downstream.py +40 -0
fsspec/tests/test_file.py +200 -0
fsspec/tests/test_fuse.py +147 -0
fsspec/tests/test_generic.py +90 -0
fsspec/tests/test_gui.py +23 -0
fsspec/tests/test_mapping.py +228 -0
fsspec/tests/test_parquet.py +140 -0
fsspec/tests/test_registry.py +134 -0
fsspec/tests/test_spec.py +1167 -0
fsspec/tests/test_utils.py +478 -0
fsspec/utils.py +0 -2
fsspec-2024.5.0.dist-info/METADATA +273 -0
fsspec-2024.5.0.dist-info/RECORD +111 -0
{fsspec-2024.3.0.dist-info → fsspec-2024.5.0.dist-info}/WHEEL +1 -2
fsspec-2024.3.0.dist-info/METADATA +0 -167
fsspec-2024.3.0.dist-info/RECORD +0 -54
fsspec-2024.3.0.dist-info/top_level.txt +0 -1
{fsspec-2024.3.0.dist-info → fsspec-2024.5.0.dist-info/licenses}/LICENSE +0 -0

fsspec/implementations/tests/test_common.py ADDED Viewed

@@ -0,0 +1,35 @@
+import datetime
+import time
+import pytest
+from fsspec import AbstractFileSystem
+from fsspec.implementations.tests.conftest import READ_ONLY_FILESYSTEMS
+@pytest.mark.parametrize("fs", ["local"], indirect=["fs"])
+def test_created(fs: AbstractFileSystem, temp_file):
+    try:
+        fs.touch(temp_file)
+        created = fs.created(path=temp_file)
+        assert isinstance(created, datetime.datetime)
+    finally:
+        if not isinstance(fs, tuple(READ_ONLY_FILESYSTEMS)):
+            fs.rm(temp_file)
+@pytest.mark.parametrize("fs", ["local", "memory", "arrow"], indirect=["fs"])
+def test_modified(fs: AbstractFileSystem, temp_file):
+    try:
+        fs.touch(temp_file)
+        # created = fs.created(path=temp_file)
+        created = datetime.datetime.now(
+            tz=datetime.timezone.utc
+        )  # pyarrow only have modified
+        time.sleep(0.05)
+        fs.touch(temp_file)
+        modified = fs.modified(path=temp_file)
+        assert isinstance(modified, datetime.datetime)
+        assert modified > created
+    finally:
+        fs.rm(temp_file)

fsspec/implementations/tests/test_dask.py ADDED Viewed

@@ -0,0 +1,29 @@
+import pytest
+import fsspec
+pytest.importorskip("distributed")
+@pytest.fixture()
+def cli(tmpdir):
+    import dask.distributed
+    client = dask.distributed.Client(n_workers=1)
+    def setup():
+        m = fsspec.filesystem("memory")
+        with m.open("afile", "wb") as f:
+            f.write(b"data")
+    client.run(setup)
+    try:
+        yield client
+    finally:
+        client.shutdown()
+def test_basic(cli):
+    fs = fsspec.filesystem("dask", target_protocol="memory")
+    assert fs.ls("", detail=False) == ["/afile"]
+    assert fs.cat("/afile") == b"data"

fsspec/implementations/tests/test_data.py ADDED Viewed

@@ -0,0 +1,20 @@
+import fsspec
+def test_1():
+    with fsspec.open("data:text/plain;base64,SGVsbG8sIFdvcmxkIQ==") as f:
+        assert f.read() == b"Hello, World!"
+    with fsspec.open("data:,Hello%2C%20World%21") as f:
+        assert f.read() == b"Hello, World!"
+def test_info():
+    fs = fsspec.filesystem("data")
+    info = fs.info("data:text/html,%3Ch1%3EHello%2C%20World%21%3C%2Fh1%3E")
+    assert info == {
+        "name": "%3Ch1%3EHello%2C%20World%21%3C%2Fh1%3E",
+        "size": 22,
+        "type": "file",
+        "mimetype": "text/html",
+    }

fsspec/implementations/tests/test_dbfs.py ADDED Viewed

@@ -0,0 +1,268 @@
+"""
+Test-Cases for the DataBricks Filesystem.
+This test case is somewhat special, as there is no "mock" databricks
+API available. We use the [vcr(https://github.com/kevin1024/vcrpy)
+package to record the requests and responses to the real databricks API and
+replay them on tests.
+This however means, that when you change the tests (or when the API
+itself changes, which is very unlikely to occur as it is versioned),
+you need to re-record the answers. This can be done as follows:
+1. Delete all casettes files in the "./cassettes/test_dbfs" folder
+2. Spin up a databricks cluster. For example,
+   you can use an Azure Databricks instance for this.
+3. Take note of the instance details (the instance URL. For example for an Azure
+   databricks cluster, this has the form
+   adb-<some-number>.<two digits>.azuredatabricks.net)
+   and your personal token (Find out more here:
+   https://docs.databricks.com/dev-tools/api/latest/authentication.html)
+4. Set the two environment variables `DBFS_INSTANCE` and `DBFS_TOKEN`
+5. Now execute the tests as normal. The results of the API calls will be recorded.
+6. Unset the environment variables and replay the tests.
+"""
+import os
+import sys
+from urllib.parse import urlparse
+import numpy
+import pytest
+import fsspec
+if sys.version_info >= (3, 10):
+    pytest.skip("These tests need to be re-recorded.", allow_module_level=True)
+DUMMY_INSTANCE = "my_instance.com"
+INSTANCE = os.getenv("DBFS_INSTANCE", DUMMY_INSTANCE)
+TOKEN = os.getenv("DBFS_TOKEN", "")
+@pytest.fixture(scope="module")
+def vcr_config():
+    """
+    To not record information in the instance and token details
+    (which are sensitive), we delete them from both the
+    request and the response before storing it.
+    We also delete the date as it is likely to change
+    (and will make git diffs harder).
+    If the DBFS_TOKEN env variable is set, we record with VCR.
+    If not, we only replay (to not accidentally record with a wrong URL).
+    """
+    def before_record_response(response):
+        try:
+            del response["headers"]["x-databricks-org-id"]
+            del response["headers"]["date"]
+        except KeyError:
+            pass
+        return response
+    def before_record_request(request):
+        # Replace the instance URL
+        uri = urlparse(request.uri)
+        uri = uri._replace(netloc=DUMMY_INSTANCE)
+        request.uri = uri.geturl()
+        return request
+    if TOKEN:
+        return {
+            "record_mode": "once",
+            "filter_headers": [("authorization", "DUMMY")],
+            "before_record_response": before_record_response,
+            "before_record_request": before_record_request,
+        }
+    else:
+        return {
+            "record_mode": "none",
+        }
+@pytest.fixture
+def dbfsFS():
+    fs = fsspec.filesystem("dbfs", instance=INSTANCE, token=TOKEN)
+    return fs
+@pytest.fixture
+def make_mock_diabetes_ds():
+    pa = pytest.importorskip("pyarrow")
+    names = [
+        "Pregnancies",
+        "Glucose",
+        "BloodPressure",
+        "SkinThickness",
+        "Insulin",
+        "BMI",
+        "DiabetesPedigreeFunction",
+        "Age",
+        "Outcome",
+    ]
+    pregnancies = pa.array(numpy.random.randint(low=0, high=17, size=25))
+    glucose = pa.array(numpy.random.randint(low=0, high=199, size=25))
+    blood_pressure = pa.array(numpy.random.randint(low=0, high=122, size=25))
+    skin_thickness = pa.array(numpy.random.randint(low=0, high=99, size=25))
+    insulin = pa.array(numpy.random.randint(low=0, high=846, size=25))
+    bmi = pa.array(numpy.random.uniform(0.0, 67.1, size=25))
+    diabetes_pedigree_function = pa.array(numpy.random.uniform(0.08, 2.42, size=25))
+    age = pa.array(numpy.random.randint(low=21, high=81, size=25))
+    outcome = pa.array(numpy.random.randint(low=0, high=1, size=25))
+    return pa.Table.from_arrays(
+        arrays=[
+            pregnancies,
+            glucose,
+            blood_pressure,
+            skin_thickness,
+            insulin,
+            bmi,
+            diabetes_pedigree_function,
+            age,
+            outcome,
+        ],
+        names=names,
+    )
+@pytest.mark.vcr()
+def test_dbfs_file_listing(dbfsFS):
+    assert "/FileStore" in dbfsFS.ls("/", detail=False)
+    assert {"name": "/FileStore", "size": 0, "type": "directory"} in dbfsFS.ls(
+        "/", detail=True
+    )
+@pytest.mark.vcr()
+def test_dbfs_mkdir(dbfsFS):
+    dbfsFS.rm("/FileStore/my", recursive=True)
+    assert "/FileStore/my" not in dbfsFS.ls("/FileStore/", detail=False)
+    dbfsFS.mkdir("/FileStore/my/dir", create_parents=True)
+    assert "/FileStore/my" in dbfsFS.ls("/FileStore/", detail=False)
+    assert "/FileStore/my/dir" in dbfsFS.ls("/FileStore/my/", detail=False)
+    with pytest.raises(FileExistsError):
+        dbfsFS.mkdir("/FileStore/my/dir", create_parents=True, exist_ok=False)
+    with pytest.raises(OSError):
+        dbfsFS.rm("/FileStore/my", recursive=False)
+    assert "/FileStore/my" in dbfsFS.ls("/FileStore/", detail=False)
+    dbfsFS.rm("/FileStore/my", recursive=True)
+    assert "/FileStore/my" not in dbfsFS.ls("/FileStore/", detail=False)
+@pytest.mark.vcr()
+def test_dbfs_write_and_read(dbfsFS):
+    dbfsFS.rm("/FileStore/file.csv")
+    assert "/FileStore/file.csv" not in dbfsFS.ls("/FileStore/", detail=False)
+    content = b"This is a test\n" * 100000 + b"For this is the end\n"
+    with dbfsFS.open("/FileStore/file.csv", "wb") as f:
+        f.write(content)
+    assert "/FileStore/file.csv" in dbfsFS.ls("/FileStore", detail=False)
+    with dbfsFS.open("/FileStore/file.csv", "rb") as f:
+        data = f.read()
+        assert data == content
+    dbfsFS.rm("/FileStore/file.csv")
+    assert "/FileStore/file.csv" not in dbfsFS.ls("/FileStore/", detail=False)
+@pytest.mark.vcr()
+def test_dbfs_read_range(dbfsFS):
+    dbfsFS.rm("/FileStore/file.txt")
+    assert "/FileStore/file.txt" not in dbfsFS.ls("/FileStore/", detail=False)
+    content = b"This is a test\n"
+    with dbfsFS.open("/FileStore/file.txt", "wb") as f:
+        f.write(content)
+    assert "/FileStore/file.txt" in dbfsFS.ls("/FileStore", detail=False)
+    assert dbfsFS.cat_file("/FileStore/file.txt", start=8, end=14) == content[8:14]
+    dbfsFS.rm("/FileStore/file.txt")
+    assert "/FileStore/file.txt" not in dbfsFS.ls("/FileStore/", detail=False)
+@pytest.mark.vcr()
+def test_dbfs_read_range_chunked(dbfsFS):
+    dbfsFS.rm("/FileStore/large_file.txt")
+    assert "/FileStore/large_file.txt" not in dbfsFS.ls("/FileStore/", detail=False)
+    content = b"This is a test\n" * (1 * 2**18) + b"For this is the end\n"
+    with dbfsFS.open("/FileStore/large_file.txt", "wb") as f:
+        f.write(content)
+    assert "/FileStore/large_file.txt" in dbfsFS.ls("/FileStore", detail=False)
+    assert dbfsFS.cat_file("/FileStore/large_file.txt", start=8) == content[8:]
+    dbfsFS.rm("/FileStore/large_file.txt")
+    assert "/FileStore/large_file.txt" not in dbfsFS.ls("/FileStore/", detail=False)
+@pytest.mark.vcr()
+def test_dbfs_write_pyarrow_non_partitioned(dbfsFS, make_mock_diabetes_ds):
+    pytest.importorskip("pyarrow.dataset")
+    pq = pytest.importorskip("pyarrow.parquet")
+    dbfsFS.rm("/FileStore/pyarrow", recursive=True)
+    assert "/FileStore/pyarrow" not in dbfsFS.ls("/FileStore/", detail=False)
+    pq.write_to_dataset(
+        make_mock_diabetes_ds,
+        filesystem=dbfsFS,
+        compression="none",
+        existing_data_behavior="error",
+        root_path="/FileStore/pyarrow/diabetes",
+        use_threads=False,
+    )
+    assert len(dbfsFS.ls("/FileStore/pyarrow/diabetes", detail=False)) == 1
+    assert (
+        "/FileStore/pyarrow/diabetes"
+        in dbfsFS.ls("/FileStore/pyarrow/diabetes", detail=False)[0]
+        and ".parquet" in dbfsFS.ls("/FileStore/pyarrow/diabetes", detail=False)[0]
+    )
+    dbfsFS.rm("/FileStore/pyarrow", recursive=True)
+    assert "/FileStore/pyarrow" not in dbfsFS.ls("/FileStore/", detail=False)
+@pytest.mark.vcr()
+def test_dbfs_read_pyarrow_non_partitioned(dbfsFS, make_mock_diabetes_ds):
+    ds = pytest.importorskip("pyarrow.dataset")
+    pq = pytest.importorskip("pyarrow.parquet")
+    dbfsFS.rm("/FileStore/pyarrow", recursive=True)
+    assert "/FileStore/pyarrow" not in dbfsFS.ls("/FileStore/", detail=False)
+    pq.write_to_dataset(
+        make_mock_diabetes_ds,
+        filesystem=dbfsFS,
+        compression="none",
+        existing_data_behavior="error",
+        root_path="/FileStore/pyarrow/diabetes",
+        use_threads=False,
+    )
+    assert len(dbfsFS.ls("/FileStore/pyarrow/diabetes", detail=False)) == 1
+    assert (
+        "/FileStore/pyarrow/diabetes"
+        in dbfsFS.ls("/FileStore/pyarrow/diabetes", detail=False)[0]
+        and ".parquet" in dbfsFS.ls("/FileStore/pyarrow/diabetes", detail=False)[0]
+    )
+    arr_res = ds.dataset(
+        source="/FileStore/pyarrow/diabetes",
+        filesystem=dbfsFS,
+    ).to_table()
+    assert arr_res.num_rows == make_mock_diabetes_ds.num_rows
+    assert arr_res.num_columns == make_mock_diabetes_ds.num_columns
+    assert set(arr_res.schema).difference(set(make_mock_diabetes_ds.schema)) == set()
+    dbfsFS.rm("/FileStore/pyarrow", recursive=True)
+    assert "/FileStore/pyarrow" not in dbfsFS.ls("/FileStore/", detail=False)

fsspec 2024.3.0__py3-none-any.whl → 2024.5.0__py3-none-any.whl

fsspec 2024.3.0py3-none-any.whl → 2024.5.0py3-none-any.whl