PyPI - pyogrio - Versions diffs - 0.7.2__cp39-cp39-manylinux_2_28_aarch64.whl → 0.9.0__cp39-cp39-manylinux_2_28_aarch64.whl - Mend

pyogrio 0.7.2__cp39-cp39-manylinux_2_28_aarch64.whl → 0.9.0__cp39-cp39-manylinux_2_28_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pyogrio might be problematic. Click here for more details.

Files changed (48) hide show

pyogrio/__init__.py +4 -0
pyogrio/_compat.py +6 -1
pyogrio/_err.cpython-39-aarch64-linux-gnu.so +0 -0
pyogrio/_err.pyx +7 -3
pyogrio/_geometry.cpython-39-aarch64-linux-gnu.so +0 -0
pyogrio/_io.cpython-39-aarch64-linux-gnu.so +0 -0
pyogrio/_io.pyx +904 -242
pyogrio/_ogr.cpython-39-aarch64-linux-gnu.so +0 -0
pyogrio/_ogr.pxd +69 -13
pyogrio/_ogr.pyx +8 -24
pyogrio/_version.py +3 -3
pyogrio/_vsi.cpython-39-aarch64-linux-gnu.so +0 -0
pyogrio/_vsi.pxd +4 -0
pyogrio/_vsi.pyx +140 -0
pyogrio/core.py +43 -44
pyogrio/gdal_data/GDAL-targets-release.cmake +3 -3
pyogrio/gdal_data/GDAL-targets.cmake +10 -6
pyogrio/gdal_data/GDALConfigVersion.cmake +3 -3
pyogrio/gdal_data/gdalinfo_output.schema.json +2 -0
pyogrio/gdal_data/gdalvrt.xsd +163 -0
pyogrio/gdal_data/ogrinfo_output.schema.json +12 -1
pyogrio/gdal_data/vcpkg.spdx.json +26 -26
pyogrio/gdal_data/vcpkg_abi_info.txt +27 -26
pyogrio/geopandas.py +140 -34
pyogrio/proj_data/ITRF2008 +2 -2
pyogrio/proj_data/proj-config-version.cmake +2 -2
pyogrio/proj_data/proj-config.cmake +2 -1
pyogrio/proj_data/proj-targets.cmake +13 -13
pyogrio/proj_data/proj.db +0 -0
pyogrio/proj_data/proj4-targets.cmake +13 -13
pyogrio/proj_data/vcpkg.spdx.json +20 -42
pyogrio/proj_data/vcpkg_abi_info.txt +14 -15
pyogrio/raw.py +438 -116
pyogrio/tests/conftest.py +75 -6
pyogrio/tests/fixtures/poly_not_enough_points.shp.zip +0 -0
pyogrio/tests/test_arrow.py +841 -7
pyogrio/tests/test_core.py +99 -7
pyogrio/tests/test_geopandas_io.py +827 -121
pyogrio/tests/test_path.py +23 -3
pyogrio/tests/test_raw_io.py +276 -50
pyogrio/util.py +39 -19
{pyogrio-0.7.2.dist-info → pyogrio-0.9.0.dist-info}/METADATA +2 -2
{pyogrio-0.7.2.dist-info → pyogrio-0.9.0.dist-info}/RECORD +210 -207
{pyogrio-0.7.2.dist-info → pyogrio-0.9.0.dist-info}/WHEEL +1 -1
pyogrio.libs/{libgdal-cb554135.so.33.3.7.2 → libgdal-6ff0914e.so.34.3.8.5} +0 -0
pyogrio/tests/win32.py +0 -86
{pyogrio-0.7.2.dist-info → pyogrio-0.9.0.dist-info}/LICENSE +0 -0
{pyogrio-0.7.2.dist-info → pyogrio-0.9.0.dist-info}/top_level.txt +0 -0

pyogrio/tests/test_geopandas_io.py CHANGED Viewed

@@ -1,10 +1,12 @@
 import contextlib
 from datetime import datetime
-import os
+from io import BytesIO
+import locale
 import numpy as np
 import pytest
-from pyogrio import list_layers, read_info, __gdal_version__
+from pyogrio import list_layers, list_drivers, read_info, __gdal_version__
 from pyogrio.errors import DataLayerError, DataSourceError, FeatureError, GeometryError
 from pyogrio.geopandas import read_dataframe, write_dataframe, PANDAS_GE_20
 from pyogrio.raw import (
@@ -14,10 +16,11 @@ from pyogrio.raw import (
 from pyogrio.tests.conftest import (
     ALL_EXTS,
     DRIVERS,
-    requires_arrow_api,
+    requires_pyarrow_api,
+    requires_arrow_write_api,
     requires_gdal_geos,
 )
-from pyogrio._compat import PANDAS_GE_15
+from pyogrio._compat import PANDAS_GE_15, HAS_ARROW_WRITE_API
 try:
     import pandas as pd
@@ -45,13 +48,30 @@ pytest.importorskip("geopandas")
     scope="session",
     params=[
         False,
-        pytest.param(True, marks=requires_arrow_api),
+        pytest.param(True, marks=requires_pyarrow_api),
     ],
 )
 def use_arrow(request):
     return request.param
+@pytest.fixture(autouse=True)
+def skip_if_no_arrow_write_api(request):
+    # automatically skip tests with use_arrow=True and that require Arrow write
+    # API (marked with `@pytest.mark.requires_arrow_write_api`) if it is not available
+    use_arrow = (
+        request.getfixturevalue("use_arrow")
+        if "use_arrow" in request.fixturenames
+        else False
+    )
+    if (
+        use_arrow
+        and not HAS_ARROW_WRITE_API
+        and request.node.get_closest_marker("requires_arrow_write_api")
+    ):
+        pytest.skip("GDAL>=3.8 required for Arrow write API")
 def spatialite_available(path):
     try:
         _ = read_dataframe(
@@ -62,6 +82,45 @@ def spatialite_available(path):
         return False
+@pytest.mark.parametrize("encoding", ["utf-8", "cp1252", None])
+def test_read_csv_encoding(tmp_path, encoding):
+    # Write csv test file. Depending on the os this will be written in a different
+    # encoding: for linux and macos this is utf-8, for windows it is cp1252.
+    csv_path = tmp_path / "test.csv"
+    with open(csv_path, "w", encoding=encoding) as csv:
+        csv.write("näme,city\n")
+        csv.write("Wilhelm Röntgen,Zürich\n")
+    # Read csv. The data should be read with the same default encoding as the csv file
+    # was written in, but should have been converted to utf-8 in the dataframe returned.
+    # Hence, the asserts below, with strings in utf-8, be OK.
+    df = read_dataframe(csv_path, encoding=encoding)
+    assert len(df) == 1
+    assert df.columns.tolist() == ["näme", "city"]
+    assert df.city.tolist() == ["Zürich"]
+    assert df.näme.tolist() == ["Wilhelm Röntgen"]
+@pytest.mark.skipif(
+    locale.getpreferredencoding().upper() == "UTF-8",
+    reason="test requires non-UTF-8 default platform",
+)
+def test_read_csv_platform_encoding(tmp_path):
+    """verify that read defaults to platform encoding; only works on Windows (CP1252)"""
+    csv_path = tmp_path / "test.csv"
+    with open(csv_path, "w", encoding=locale.getpreferredencoding()) as csv:
+        csv.write("näme,city\n")
+        csv.write("Wilhelm Röntgen,Zürich\n")
+    df = read_dataframe(csv_path)
+    assert len(df) == 1
+    assert df.columns.tolist() == ["näme", "city"]
+    assert df.city.tolist() == ["Zürich"]
+    assert df.näme.tolist() == ["Wilhelm Röntgen"]
 def test_read_dataframe(naturalearth_lowres_all_ext):
     df = read_dataframe(naturalearth_lowres_all_ext)
@@ -77,8 +136,8 @@ def test_read_dataframe(naturalearth_lowres_all_ext):
     ]
-def test_read_dataframe_vsi(naturalearth_lowres_vsi):
-    df = read_dataframe(naturalearth_lowres_vsi[1])
+def test_read_dataframe_vsi(naturalearth_lowres_vsi, use_arrow):
+    df = read_dataframe(naturalearth_lowres_vsi[1], use_arrow=use_arrow)
     assert len(df) == 177
@@ -154,6 +213,7 @@ def test_read_force_2d(test_fgdb_vsi, use_arrow):
 @pytest.mark.filterwarnings("ignore: Measured")
+@pytest.mark.filterwarnings("ignore: More than one layer found in")
 def test_read_layer(test_fgdb_vsi, use_arrow):
     layers = list_layers(test_fgdb_vsi)
     kwargs = {"use_arrow": use_arrow, "read_geometry": False, "max_features": 1}
@@ -186,8 +246,13 @@ def test_read_datetime(test_fgdb_vsi, use_arrow):
         assert df.SURVEY_DAT.dtype.name == "datetime64[ns]"
-def test_read_datetime_tz(test_datetime_tz, tmp_path):
+@pytest.mark.filterwarnings("ignore: Non-conformant content for record 1 in column ")
+@pytest.mark.requires_arrow_write_api
+def test_read_datetime_tz(test_datetime_tz, tmp_path, use_arrow):
     df = read_dataframe(test_datetime_tz)
+    # Make the index non-consecutive to test this case as well. Added for issue
+    # https://github.com/geopandas/pyogrio/issues/324
+    df = df.set_index(np.array([0, 2]))
     raw_expected = ["2020-01-01T09:00:00.123-05:00", "2020-01-01T10:00:00-05:00"]
     if PANDAS_GE_20:
@@ -195,15 +260,22 @@ def test_read_datetime_tz(test_datetime_tz, tmp_path):
     else:
         expected = pd.to_datetime(raw_expected)
     expected = pd.Series(expected, name="datetime_col")
-    assert_series_equal(df.datetime_col, expected)
+    assert_series_equal(df.datetime_col, expected, check_index=False)
     # test write and read round trips
     fpath = tmp_path / "test.gpkg"
-    write_dataframe(df, fpath)
-    df_read = read_dataframe(fpath)
+    write_dataframe(df, fpath, use_arrow=use_arrow)
+    df_read = read_dataframe(fpath, use_arrow=use_arrow)
+    if use_arrow:
+        # with Arrow, the datetimes are always read as UTC
+        expected = expected.dt.tz_convert("UTC")
     assert_series_equal(df_read.datetime_col, expected)
-def test_write_datetime_mixed_offset(tmp_path):
+@pytest.mark.filterwarnings(
+    "ignore: Non-conformant content for record 1 in column dates"
+)
+@pytest.mark.requires_arrow_write_api
+def test_write_datetime_mixed_offset(tmp_path, use_arrow):
     # Australian Summer Time AEDT (GMT+11), Standard Time AEST (GMT+10)
     dates = ["2023-01-01 11:00:01.111", "2023-06-01 10:00:01.111"]
     naive_col = pd.Series(pd.to_datetime(dates), name="dates")
@@ -217,14 +289,18 @@ def test_write_datetime_mixed_offset(tmp_path):
         crs="EPSG:4326",
     )
     fpath = tmp_path / "test.gpkg"
-    write_dataframe(df, fpath)
-    result = read_dataframe(fpath)
+    write_dataframe(df, fpath, use_arrow=use_arrow)
+    result = read_dataframe(fpath, use_arrow=use_arrow)
     # GDAL tz only encodes offsets, not timezones
     # check multiple offsets are read as utc datetime instead of string values
     assert_series_equal(result["dates"], utc_col)
-def test_read_write_datetime_tz_with_nulls(tmp_path):
+@pytest.mark.filterwarnings(
+    "ignore: Non-conformant content for record 1 in column dates"
+)
+@pytest.mark.requires_arrow_write_api
+def test_read_write_datetime_tz_with_nulls(tmp_path, use_arrow):
     dates_raw = ["2020-01-01T09:00:00.123-05:00", "2020-01-01T10:00:00-05:00", pd.NaT]
     if PANDAS_GE_20:
         dates = pd.to_datetime(dates_raw, format="ISO8601").as_unit("ms")
@@ -235,13 +311,18 @@ def test_read_write_datetime_tz_with_nulls(tmp_path):
         crs="EPSG:4326",
     )
     fpath = tmp_path / "test.gpkg"
-    write_dataframe(df, fpath)
-    result = read_dataframe(fpath)
+    write_dataframe(df, fpath, use_arrow=use_arrow)
+    result = read_dataframe(fpath, use_arrow=use_arrow)
+    if use_arrow:
+        # with Arrow, the datetimes are always read as UTC
+        df["dates"] = df["dates"].dt.tz_convert("UTC")
     assert_geodataframe_equal(df, result)
 def test_read_null_values(test_fgdb_vsi, use_arrow):
-    df = read_dataframe(test_fgdb_vsi, use_arrow=use_arrow, read_geometry=False)
+    df = read_dataframe(
+        test_fgdb_vsi, layer="basetable_2", use_arrow=use_arrow, read_geometry=False
+    )
     # make sure that Null values are preserved
     assert df.SEGMENT_NAME.isnull().max()
@@ -331,6 +412,21 @@ def test_read_where_invalid(request, naturalearth_lowres_all_ext, use_arrow):
         )
+def test_read_where_ignored_field(naturalearth_lowres, use_arrow):
+    # column included in where is not also included in list of columns, which means
+    # GDAL will return no features
+    # NOTE: this behavior is inconsistent across drivers so only shapefiles are
+    # tested for this
+    df = read_dataframe(
+        naturalearth_lowres,
+        where=""" "iso_a3" = 'CAN' """,
+        columns=["name"],
+        use_arrow=use_arrow,
+    )
+    assert len(df) == 0
 @pytest.mark.parametrize("bbox", [(1,), (1, 2), (1, 2, 3)])
 def test_read_bbox_invalid(naturalearth_lowres_all_ext, bbox, use_arrow):
     with pytest.raises(ValueError, match="Invalid bbox"):
@@ -349,7 +445,7 @@ def test_read_bbox(naturalearth_lowres_all_ext, use_arrow, bbox, expected):
     if (
         use_arrow
         and __gdal_version__ < (3, 8, 0)
-        and os.path.splitext(naturalearth_lowres_all_ext)[1] == ".gpkg"
+        and naturalearth_lowres_all_ext.suffix == ".gpkg"
     ):
         pytest.xfail(reason="GDAL bug: https://github.com/OSGeo/gdal/issues/8347")
@@ -438,7 +534,7 @@ def test_read_mask(
     if (
         use_arrow
         and __gdal_version__ < (3, 8, 0)
-        and os.path.splitext(naturalearth_lowres_all_ext)[1] == ".gpkg"
+        and naturalearth_lowres_all_ext.suffix == ".gpkg"
     ):
         pytest.xfail(reason="GDAL bug: https://github.com/OSGeo/gdal/issues/8347")
@@ -470,14 +566,45 @@ def test_read_mask_where(naturalearth_lowres_all_ext, use_arrow):
     assert np.array_equal(df.iso_a3, ["CAN"])
-def test_read_fids(naturalearth_lowres_all_ext):
+@pytest.mark.parametrize("fids", [[1, 5, 10], np.array([1, 5, 10], dtype=np.int64)])
+def test_read_fids(naturalearth_lowres_all_ext, fids, use_arrow):
     # ensure keyword is properly passed through
-    fids = np.array([1, 10, 5], dtype=np.int64)
-    df = read_dataframe(naturalearth_lowres_all_ext, fids=fids, fid_as_index=True)
+    df = read_dataframe(
+        naturalearth_lowres_all_ext, fids=fids, fid_as_index=True, use_arrow=use_arrow
+    )
     assert len(df) == 3
     assert np.array_equal(fids, df.index.values)
+@requires_pyarrow_api
+def test_read_fids_arrow_max_exception(naturalearth_lowres):
+    # Maximum number at time of writing is 4997 for "OGRSQL". For e.g. for SQLite based
+    # formats like Geopackage, there is no limit.
+    nb_fids = 4998
+    fids = range(nb_fids)
+    with pytest.raises(ValueError, match=f"error applying filter for {nb_fids} fids"):
+        _ = read_dataframe(naturalearth_lowres, fids=fids, use_arrow=True)
+@requires_pyarrow_api
+@pytest.mark.skipif(
+    __gdal_version__ >= (3, 8, 0), reason="GDAL >= 3.8.0 does not need to warn"
+)
+def test_read_fids_arrow_warning_old_gdal(naturalearth_lowres_all_ext):
+    # A warning should be given for old GDAL versions, except for some file formats.
+    if naturalearth_lowres_all_ext.suffix not in [".gpkg", ".geojson"]:
+        handler = pytest.warns(
+            UserWarning,
+            match="Using 'fids' and 'use_arrow=True' with GDAL < 3.8 can be slow",
+        )
+    else:
+        handler = contextlib.nullcontext()
+    with handler:
+        df = read_dataframe(naturalearth_lowres_all_ext, fids=[22], use_arrow=True)
+        assert len(df) == 1
 def test_read_fids_force_2d(test_fgdb_vsi):
     with pytest.warns(
         UserWarning, match=r"Measured \(M\) geometry types are not supported"
@@ -573,13 +700,17 @@ def test_read_sql(naturalearth_lowres_all_ext, use_arrow):
     # The geometry column cannot be specified when using the
     # default OGRSQL dialect but is returned nonetheless, so 4 columns.
     sql = "SELECT iso_a3 AS iso_a3_renamed, name, pop_est FROM naturalearth_lowres"
-    df = read_dataframe(naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL")
+    df = read_dataframe(
+        naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL", use_arrow=use_arrow
+    )
     assert len(df.columns) == 4
     assert len(df) == 177
     # Should return single row
     sql = "SELECT * FROM naturalearth_lowres WHERE iso_a3 = 'CAN'"
-    df = read_dataframe(naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL")
+    df = read_dataframe(
+        naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL", use_arrow=use_arrow
+    )
     assert len(df) == 1
     assert len(df.columns) == 6
     assert df.iloc[0].iso_a3 == "CAN"
@@ -587,7 +718,9 @@ def test_read_sql(naturalearth_lowres_all_ext, use_arrow):
     sql = """SELECT *
                FROM naturalearth_lowres
               WHERE iso_a3 IN ('CAN', 'USA', 'MEX')"""
-    df = read_dataframe(naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL")
+    df = read_dataframe(
+        naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL", use_arrow=use_arrow
+    )
     assert len(df.columns) == 6
     assert len(df) == 3
     assert df.iso_a3.tolist() == ["CAN", "USA", "MEX"]
@@ -596,7 +729,9 @@ def test_read_sql(naturalearth_lowres_all_ext, use_arrow):
                FROM naturalearth_lowres
               WHERE iso_a3 IN ('CAN', 'USA', 'MEX')
               ORDER BY name"""
-    df = read_dataframe(naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL")
+    df = read_dataframe(
+        naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL", use_arrow=use_arrow
+    )
     assert len(df.columns) == 6
     assert len(df) == 3
     assert df.iso_a3.tolist() == ["CAN", "MEX", "USA"]
@@ -605,7 +740,9 @@ def test_read_sql(naturalearth_lowres_all_ext, use_arrow):
     sql = """SELECT *
                FROM naturalearth_lowres
               WHERE POP_EST >= 10000000 AND POP_EST < 100000000"""
-    df = read_dataframe(naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL")
+    df = read_dataframe(
+        naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL", use_arrow=use_arrow
+    )
     assert len(df) == 75
     assert len(df.columns) == 6
     assert df.pop_est.min() >= 10000000
@@ -613,25 +750,36 @@ def test_read_sql(naturalearth_lowres_all_ext, use_arrow):
     # Should match no items.
     sql = "SELECT * FROM naturalearth_lowres WHERE ISO_A3 = 'INVALID'"
-    df = read_dataframe(naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL")
+    df = read_dataframe(
+        naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL", use_arrow=use_arrow
+    )
     assert len(df) == 0
-def test_read_sql_invalid(naturalearth_lowres_all_ext):
+def test_read_sql_invalid(naturalearth_lowres_all_ext, use_arrow):
     if naturalearth_lowres_all_ext.suffix == ".gpkg":
         with pytest.raises(Exception, match="In ExecuteSQL().*"):
-            read_dataframe(naturalearth_lowres_all_ext, sql="invalid")
+            read_dataframe(
+                naturalearth_lowres_all_ext, sql="invalid", use_arrow=use_arrow
+            )
     else:
         with pytest.raises(Exception, match="SQL Expression Parsing Error"):
-            read_dataframe(naturalearth_lowres_all_ext, sql="invalid")
+            read_dataframe(
+                naturalearth_lowres_all_ext, sql="invalid", use_arrow=use_arrow
+            )
     with pytest.raises(
         ValueError, match="'sql' paramater cannot be combined with 'layer'"
     ):
-        read_dataframe(naturalearth_lowres_all_ext, sql="whatever", layer="invalid")
+        read_dataframe(
+            naturalearth_lowres_all_ext,
+            sql="whatever",
+            layer="invalid",
+            use_arrow=use_arrow,
+        )
-def test_read_sql_columns_where(naturalearth_lowres_all_ext):
+def test_read_sql_columns_where(naturalearth_lowres_all_ext, use_arrow):
     sql = "SELECT iso_a3 AS iso_a3_renamed, name, pop_est FROM naturalearth_lowres"
     df = read_dataframe(
         naturalearth_lowres_all_ext,
@@ -639,13 +787,14 @@ def test_read_sql_columns_where(naturalearth_lowres_all_ext):
         sql_dialect="OGRSQL",
         columns=["iso_a3_renamed", "name"],
         where="iso_a3_renamed IN ('CAN', 'USA', 'MEX')",
+        use_arrow=use_arrow,
     )
     assert len(df.columns) == 3
     assert len(df) == 3
     assert df.iso_a3_renamed.tolist() == ["CAN", "USA", "MEX"]
-def test_read_sql_columns_where_bbox(naturalearth_lowres_all_ext):
+def test_read_sql_columns_where_bbox(naturalearth_lowres_all_ext, use_arrow):
     sql = "SELECT iso_a3 AS iso_a3_renamed, name, pop_est FROM naturalearth_lowres"
     df = read_dataframe(
         naturalearth_lowres_all_ext,
@@ -654,13 +803,14 @@ def test_read_sql_columns_where_bbox(naturalearth_lowres_all_ext):
         columns=["iso_a3_renamed", "name"],
         where="iso_a3_renamed IN ('CRI', 'PAN')",
         bbox=(-85, 8, -80, 10),
+        use_arrow=use_arrow,
     )
     assert len(df.columns) == 3
     assert len(df) == 2
     assert df.iso_a3_renamed.tolist() == ["PAN", "CRI"]
-def test_read_sql_skip_max(naturalearth_lowres_all_ext):
+def test_read_sql_skip_max(naturalearth_lowres_all_ext, use_arrow):
     sql = """SELECT *
                FROM naturalearth_lowres
               WHERE iso_a3 IN ('CAN', 'MEX', 'USA')
@@ -671,6 +821,7 @@ def test_read_sql_skip_max(naturalearth_lowres_all_ext):
         skip_features=1,
         max_features=1,
         sql_dialect="OGRSQL",
+        use_arrow=use_arrow,
     )
     assert len(df.columns) == 6
     assert len(df) == 1
@@ -678,13 +829,21 @@ def test_read_sql_skip_max(naturalearth_lowres_all_ext):
     sql = "SELECT * FROM naturalearth_lowres LIMIT 1"
     df = read_dataframe(
-        naturalearth_lowres_all_ext, sql=sql, max_features=3, sql_dialect="OGRSQL"
+        naturalearth_lowres_all_ext,
+        sql=sql,
+        max_features=3,
+        sql_dialect="OGRSQL",
+        use_arrow=use_arrow,
     )
     assert len(df) == 1
     sql = "SELECT * FROM naturalearth_lowres LIMIT 1"
     df = read_dataframe(
-        naturalearth_lowres_all_ext, sql=sql, skip_features=1, sql_dialect="OGRSQL"
+        naturalearth_lowres_all_ext,
+        sql=sql,
+        sql_dialect="OGRSQL",
+        skip_features=1,
+        use_arrow=use_arrow,
     )
     assert len(df) == 0
@@ -695,10 +854,12 @@ def test_read_sql_skip_max(naturalearth_lowres_all_ext):
     [ext for ext in ALL_EXTS if ext != ".gpkg"],
     indirect=["naturalearth_lowres"],
 )
-def test_read_sql_dialect_sqlite_nogpkg(naturalearth_lowres):
+def test_read_sql_dialect_sqlite_nogpkg(naturalearth_lowres, use_arrow):
     # Should return singular item
     sql = "SELECT * FROM naturalearth_lowres WHERE iso_a3 = 'CAN'"
-    df = read_dataframe(naturalearth_lowres, sql=sql, sql_dialect="SQLITE")
+    df = read_dataframe(
+        naturalearth_lowres, sql=sql, sql_dialect="SQLITE", use_arrow=use_arrow
+    )
     assert len(df) == 1
     assert len(df.columns) == 6
     assert df.iloc[0].iso_a3 == "CAN"
@@ -708,7 +869,9 @@ def test_read_sql_dialect_sqlite_nogpkg(naturalearth_lowres):
     sql = """SELECT ST_Buffer(geometry, 5) AS geometry, name, pop_est, iso_a3
                FROM naturalearth_lowres
               WHERE ISO_A3 = 'CAN'"""
-    df = read_dataframe(naturalearth_lowres, sql=sql, sql_dialect="SQLITE")
+    df = read_dataframe(
+        naturalearth_lowres, sql=sql, sql_dialect="SQLITE", use_arrow=use_arrow
+    )
     assert len(df) == 1
     assert len(df.columns) == 4
     assert df.iloc[0].geometry.area > area_canada
@@ -718,12 +881,14 @@ def test_read_sql_dialect_sqlite_nogpkg(naturalearth_lowres):
 @pytest.mark.parametrize(
     "naturalearth_lowres", [".gpkg"], indirect=["naturalearth_lowres"]
 )
-def test_read_sql_dialect_sqlite_gpkg(naturalearth_lowres):
+def test_read_sql_dialect_sqlite_gpkg(naturalearth_lowres, use_arrow):
     # "INDIRECT_SQL" prohibits GDAL from passing the SQL statement to sqlite.
     # Because the statement is processed within GDAL it is possible to use
     # spatialite functions even if sqlite isn't built with spatialite support.
     sql = "SELECT * FROM naturalearth_lowres WHERE iso_a3 = 'CAN'"
-    df = read_dataframe(naturalearth_lowres, sql=sql, sql_dialect="INDIRECT_SQLITE")
+    df = read_dataframe(
+        naturalearth_lowres, sql=sql, sql_dialect="INDIRECT_SQLITE", use_arrow=use_arrow
+    )
     assert len(df) == 1
     assert len(df.columns) == 6
     assert df.iloc[0].iso_a3 == "CAN"
@@ -733,29 +898,67 @@ def test_read_sql_dialect_sqlite_gpkg(naturalearth_lowres):
     sql = """SELECT ST_Buffer(geom, 5) AS geometry, name, pop_est, iso_a3
                FROM naturalearth_lowres
               WHERE ISO_A3 = 'CAN'"""
-    df = read_dataframe(naturalearth_lowres, sql=sql, sql_dialect="INDIRECT_SQLITE")
+    df = read_dataframe(
+        naturalearth_lowres, sql=sql, sql_dialect="INDIRECT_SQLITE", use_arrow=use_arrow
+    )
     assert len(df) == 1
     assert len(df.columns) == 4
     assert df.iloc[0].geometry.area > area_canada
+@pytest.mark.parametrize("encoding", ["utf-8", "cp1252", None])
+def test_write_csv_encoding(tmp_path, encoding):
+    """Test if write_dataframe uses the default encoding correctly."""
+    # Write csv test file. Depending on the os this will be written in a different
+    # encoding: for linux and macos this is utf-8, for windows it is cp1252.
+    csv_path = tmp_path / "test.csv"
+    with open(csv_path, "w", encoding=encoding) as csv:
+        csv.write("näme,city\n")
+        csv.write("Wilhelm Röntgen,Zürich\n")
+    # Write csv test file with the same data using write_dataframe. It should use the
+    # same encoding as above.
+    df = pd.DataFrame({"näme": ["Wilhelm Röntgen"], "city": ["Zürich"]})
+    csv_pyogrio_path = tmp_path / "test_pyogrio.csv"
+    write_dataframe(df, csv_pyogrio_path, encoding=encoding)
+    # Check if the text files written both ways can be read again and give same result.
+    with open(csv_path, "r", encoding=encoding) as csv:
+        csv_str = csv.read()
+    with open(csv_pyogrio_path, "r", encoding=encoding) as csv_pyogrio:
+        csv_pyogrio_str = csv_pyogrio.read()
+    assert csv_str == csv_pyogrio_str
+    # Check if they files are binary identical, to be 100% sure they were written with
+    # the same encoding.
+    with open(csv_path, "rb") as csv:
+        csv_bytes = csv.read()
+    with open(csv_pyogrio_path, "rb") as csv_pyogrio:
+        csv_pyogrio_bytes = csv_pyogrio.read()
+    assert csv_bytes == csv_pyogrio_bytes
 @pytest.mark.parametrize("ext", ALL_EXTS)
-def test_write_dataframe(tmp_path, naturalearth_lowres, ext):
+@pytest.mark.requires_arrow_write_api
+def test_write_dataframe(tmp_path, naturalearth_lowres, ext, use_arrow):
     input_gdf = read_dataframe(naturalearth_lowres)
     output_path = tmp_path / f"test{ext}"
     if ext == ".fgb":
         # For .fgb, spatial_index=False to avoid the rows being reordered
-        write_dataframe(input_gdf, output_path, spatial_index=False)
+        write_dataframe(
+            input_gdf, output_path, use_arrow=use_arrow, spatial_index=False
+        )
     else:
-        write_dataframe(input_gdf, output_path)
+        write_dataframe(input_gdf, output_path, use_arrow=use_arrow)
     assert output_path.exists()
     result_gdf = read_dataframe(output_path)
     geometry_types = result_gdf.geometry.type.unique()
     if DRIVERS[ext] in DRIVERS_NO_MIXED_SINGLE_MULTI:
-        assert geometry_types == ["MultiPolygon"]
+        assert list(geometry_types) == ["MultiPolygon"]
     else:
         assert set(geometry_types) == set(["MultiPolygon", "Polygon"])
@@ -776,14 +979,21 @@ def test_write_dataframe(tmp_path, naturalearth_lowres, ext):
 @pytest.mark.filterwarnings("ignore:.*No SRS set on layer.*")
+@pytest.mark.parametrize("write_geodf", [True, False])
 @pytest.mark.parametrize("ext", [ext for ext in ALL_EXTS + [".xlsx"] if ext != ".fgb"])
-def test_write_dataframe_no_geom(tmp_path, naturalearth_lowres, ext):
-    """Test writing a dataframe without a geometry column.
+@pytest.mark.requires_arrow_write_api
+def test_write_dataframe_no_geom(
+    request, tmp_path, naturalearth_lowres, write_geodf, ext, use_arrow
+):
+    """Test writing a (geo)dataframe without a geometry column.
     FlatGeobuf (.fgb) doesn't seem to support this, and just writes an empty file.
     """
     # Prepare test data
     input_df = read_dataframe(naturalearth_lowres, read_geometry=False)
+    if write_geodf:
+        input_df = gp.GeoDataFrame(input_df)
     output_path = tmp_path / f"test{ext}"
     # A shapefile without geometry column results in only a .dbf file.
@@ -793,7 +1003,7 @@ def test_write_dataframe_no_geom(tmp_path, naturalearth_lowres, ext):
     # Determine driver
     driver = DRIVERS[ext] if ext != ".xlsx" else "XLSX"
-    write_dataframe(input_df, output_path, driver=driver)
+    write_dataframe(input_df, output_path, use_arrow=use_arrow, driver=driver)
     assert output_path.exists()
     result_df = read_dataframe(output_path)
@@ -806,6 +1016,9 @@ def test_write_dataframe_no_geom(tmp_path, naturalearth_lowres, ext):
     if ext in [".gpkg", ".shp", ".xlsx"]:
         # These file types return a DataFrame when read.
         assert not isinstance(result_df, gp.GeoDataFrame)
+        if isinstance(input_df, gp.GeoDataFrame):
+            input_df = pd.DataFrame(input_df)
         pd.testing.assert_frame_equal(
             result_df, input_df, check_index_type=False, check_dtype=check_dtype
         )
@@ -822,12 +1035,27 @@ def test_write_dataframe_no_geom(tmp_path, naturalearth_lowres, ext):
         )
+@pytest.mark.requires_arrow_write_api
+def test_write_dataframe_index(tmp_path, naturalearth_lowres, use_arrow):
+    # dataframe writing ignores the index
+    input_gdf = read_dataframe(naturalearth_lowres)
+    input_gdf = input_gdf.set_index("iso_a3")
+    output_path = tmp_path / "test.shp"
+    write_dataframe(input_gdf, output_path, use_arrow=use_arrow)
+    result_gdf = read_dataframe(output_path)
+    assert isinstance(result_gdf.index, pd.RangeIndex)
+    assert_geodataframe_equal(result_gdf, input_gdf.reset_index(drop=True))
 @pytest.mark.parametrize("ext", [ext for ext in ALL_EXTS if ext not in ".geojsonl"])
-def test_write_empty_dataframe(tmp_path, ext):
+@pytest.mark.requires_arrow_write_api
+def test_write_empty_dataframe(tmp_path, ext, use_arrow):
     expected = gp.GeoDataFrame(geometry=[], crs=4326)
     filename = tmp_path / f"test{ext}"
-    write_dataframe(expected, filename)
+    write_dataframe(expected, filename, use_arrow=use_arrow)
     assert filename.exists()
     df = read_dataframe(filename)
@@ -835,83 +1063,119 @@ def test_write_empty_dataframe(tmp_path, ext):
 @pytest.mark.parametrize("ext", [".geojsonl", ".geojsons"])
-def test_write_read_empty_dataframe_unsupported(tmp_path, ext):
+@pytest.mark.requires_arrow_write_api
+def test_write_read_empty_dataframe_unsupported(tmp_path, ext, use_arrow):
     # Writing empty dataframe to .geojsons or .geojsonl results logically in a 0 byte
     # file, but gdal isn't able to read those again at the time of writing.
     # Issue logged here: https://github.com/geopandas/pyogrio/issues/94
     expected = gp.GeoDataFrame(geometry=[], crs=4326)
     filename = tmp_path / f"test{ext}"
-    write_dataframe(expected, filename)
+    write_dataframe(expected, filename, use_arrow=use_arrow)
     assert filename.exists()
     with pytest.raises(
-        Exception, match=".* not recognized as a supported file format."
+        Exception, match=".* not recognized as( being in)? a supported file format."
     ):
-        _ = read_dataframe(filename)
+        _ = read_dataframe(filename, use_arrow=use_arrow)
-def test_write_dataframe_gpkg_multiple_layers(tmp_path, naturalearth_lowres):
+@pytest.mark.requires_arrow_write_api
+def test_write_dataframe_gpkg_multiple_layers(tmp_path, naturalearth_lowres, use_arrow):
     input_gdf = read_dataframe(naturalearth_lowres)
-    output_path = tmp_path / "test.gpkg"
+    filename = tmp_path / "test.gpkg"
-    write_dataframe(input_gdf, output_path, layer="first", promote_to_multi=True)
+    write_dataframe(
+        input_gdf,
+        filename,
+        layer="first",
+        promote_to_multi=True,
+        use_arrow=use_arrow,
+    )
-    assert os.path.exists(output_path)
-    assert np.array_equal(list_layers(output_path), [["first", "MultiPolygon"]])
+    assert filename.exists()
+    assert np.array_equal(list_layers(filename), [["first", "MultiPolygon"]])
-    write_dataframe(input_gdf, output_path, layer="second", promote_to_multi=True)
+    write_dataframe(
+        input_gdf,
+        filename,
+        layer="second",
+        promote_to_multi=True,
+        use_arrow=use_arrow,
+    )
     assert np.array_equal(
-        list_layers(output_path),
+        list_layers(filename),
         [["first", "MultiPolygon"], ["second", "MultiPolygon"]],
     )
 @pytest.mark.parametrize("ext", ALL_EXTS)
-def test_write_dataframe_append(tmp_path, naturalearth_lowres, ext):
+@pytest.mark.requires_arrow_write_api
+def test_write_dataframe_append(request, tmp_path, naturalearth_lowres, ext, use_arrow):
     if ext == ".fgb" and __gdal_version__ <= (3, 5, 0):
         pytest.skip("Append to FlatGeobuf fails for GDAL <= 3.5.0")
     if ext in (".geojsonl", ".geojsons") and __gdal_version__ <= (3, 6, 0):
         pytest.skip("Append to GeoJSONSeq only available for GDAL >= 3.6.0")
+    if use_arrow and ext.startswith(".geojson"):
+        # Bug in GDAL when appending int64 to GeoJSON
+        # (https://github.com/OSGeo/gdal/issues/9792)
+        request.node.add_marker(
+            pytest.mark.xfail(reason="Bugs with append when writing Arrow to GeoJSON")
+        )
     input_gdf = read_dataframe(naturalearth_lowres)
-    output_path = tmp_path / f"test{ext}"
+    filename = tmp_path / f"test{ext}"
-    write_dataframe(input_gdf, output_path)
+    write_dataframe(input_gdf, filename, use_arrow=use_arrow)
-    assert os.path.exists(output_path)
-    assert len(read_dataframe(output_path)) == 177
+    filename.exists()
+    assert len(read_dataframe(filename)) == 177
-    write_dataframe(input_gdf, output_path, append=True)
-    assert len(read_dataframe(output_path)) == 354
+    write_dataframe(input_gdf, filename, use_arrow=use_arrow, append=True)
+    assert len(read_dataframe(filename)) == 354
 @pytest.mark.parametrize("spatial_index", [False, True])
-def test_write_dataframe_gdal_options(tmp_path, naturalearth_lowres, spatial_index):
+@pytest.mark.requires_arrow_write_api
+def test_write_dataframe_gdal_options(
+    tmp_path, naturalearth_lowres, spatial_index, use_arrow
+):
     df = read_dataframe(naturalearth_lowres)
     outfilename1 = tmp_path / "test1.shp"
-    write_dataframe(df, outfilename1, SPATIAL_INDEX="YES" if spatial_index else "NO")
+    write_dataframe(
+        df,
+        outfilename1,
+        use_arrow=use_arrow,
+        SPATIAL_INDEX="YES" if spatial_index else "NO",
+    )
     assert outfilename1.exists() is True
     index_filename1 = tmp_path / "test1.qix"
     assert index_filename1.exists() is spatial_index
     # using explicit layer_options instead
     outfilename2 = tmp_path / "test2.shp"
-    write_dataframe(df, outfilename2, layer_options=dict(spatial_index=spatial_index))
+    write_dataframe(
+        df,
+        outfilename2,
+        use_arrow=use_arrow,
+        layer_options=dict(spatial_index=spatial_index),
+    )
     assert outfilename2.exists() is True
     index_filename2 = tmp_path / "test2.qix"
     assert index_filename2.exists() is spatial_index
-def test_write_dataframe_gdal_options_unknown(tmp_path, naturalearth_lowres):
+@pytest.mark.requires_arrow_write_api
+def test_write_dataframe_gdal_options_unknown(tmp_path, naturalearth_lowres, use_arrow):
     df = read_dataframe(naturalearth_lowres)
     # geojson has no spatial index, so passing keyword should raise
     outfilename = tmp_path / "test.geojson"
     with pytest.raises(ValueError, match="unrecognized option 'SPATIAL_INDEX'"):
-        write_dataframe(df, outfilename, spatial_index=True)
+        write_dataframe(df, outfilename, use_arrow=use_arrow, spatial_index=True)
 def _get_gpkg_table_names(path):
@@ -924,21 +1188,25 @@ def _get_gpkg_table_names(path):
     return [res[0] for res in result]
-def test_write_dataframe_gdal_options_dataset(tmp_path, naturalearth_lowres):
+@pytest.mark.requires_arrow_write_api
+def test_write_dataframe_gdal_options_dataset(tmp_path, naturalearth_lowres, use_arrow):
     df = read_dataframe(naturalearth_lowres)
     test_default_filename = tmp_path / "test_default.gpkg"
-    write_dataframe(df, test_default_filename)
+    write_dataframe(df, test_default_filename, use_arrow=use_arrow)
     assert "gpkg_ogr_contents" in _get_gpkg_table_names(test_default_filename)
     test_no_contents_filename = tmp_path / "test_no_contents.gpkg"
-    write_dataframe(df, test_default_filename, ADD_GPKG_OGR_CONTENTS="NO")
+    write_dataframe(
+        df, test_default_filename, use_arrow=use_arrow, ADD_GPKG_OGR_CONTENTS="NO"
+    )
     assert "gpkg_ogr_contents" not in _get_gpkg_table_names(test_no_contents_filename)
     test_no_contents_filename2 = tmp_path / "test_no_contents2.gpkg"
     write_dataframe(
         df,
         test_no_contents_filename2,
+        use_arrow=use_arrow,
         dataset_options=dict(add_gpkg_ogr_contents=False),
     )
     assert "gpkg_ogr_contents" not in _get_gpkg_table_names(test_no_contents_filename2)
@@ -955,6 +1223,7 @@ def test_write_dataframe_gdal_options_dataset(tmp_path, naturalearth_lowres):
         (".geojson", False, ["MultiPolygon", "Polygon"], "Unknown"),
     ],
 )
+@pytest.mark.requires_arrow_write_api
 def test_write_dataframe_promote_to_multi(
     tmp_path,
     naturalearth_lowres,
@@ -962,11 +1231,14 @@ def test_write_dataframe_promote_to_multi(
     promote_to_multi,
     expected_geometry_types,
     expected_geometry_type,
+    use_arrow,
 ):
     input_gdf = read_dataframe(naturalearth_lowres)
     output_path = tmp_path / f"test_promote{ext}"
-    write_dataframe(input_gdf, output_path, promote_to_multi=promote_to_multi)
+    write_dataframe(
+        input_gdf, output_path, use_arrow=use_arrow, promote_to_multi=promote_to_multi
+    )
     assert output_path.exists()
     output_gdf = read_dataframe(output_path)
@@ -999,6 +1271,7 @@ def test_write_dataframe_promote_to_multi(
         (".shp", True, "Unknown", ["MultiPolygon", "Polygon"], "Polygon"),
     ],
 )
+@pytest.mark.requires_arrow_write_api
 def test_write_dataframe_promote_to_multi_layer_geom_type(
     tmp_path,
     naturalearth_lowres,
@@ -1007,6 +1280,7 @@ def test_write_dataframe_promote_to_multi_layer_geom_type(
     geometry_type,
     expected_geometry_types,
     expected_geometry_type,
+    use_arrow,
 ):
     input_gdf = read_dataframe(naturalearth_lowres)
@@ -1023,6 +1297,7 @@ def test_write_dataframe_promote_to_multi_layer_geom_type(
         write_dataframe(
             input_gdf,
             output_path,
+            use_arrow=use_arrow,
             promote_to_multi=promote_to_multi,
             geometry_type=geometry_type,
         )
@@ -1041,9 +1316,15 @@ def test_write_dataframe_promote_to_multi_layer_geom_type(
         (".fgb", False, "Polygon", "Mismatched geometry type"),
         (".fgb", None, "Point", "Mismatched geometry type"),
         (".fgb", None, "Polygon", "Mismatched geometry type"),
-        (".shp", None, "Point", "Could not add feature to layer at index"),
+        (
+            ".shp",
+            None,
+            "Point",
+            "Could not add feature to layer at index|Error while writing batch to OGR layer",
+        ),
     ],
 )
+@pytest.mark.requires_arrow_write_api
 def test_write_dataframe_promote_to_multi_layer_geom_type_invalid(
     tmp_path,
     naturalearth_lowres,
@@ -1051,31 +1332,37 @@ def test_write_dataframe_promote_to_multi_layer_geom_type_invalid(
     promote_to_multi,
     geometry_type,
     expected_raises_match,
+    use_arrow,
 ):
     input_gdf = read_dataframe(naturalearth_lowres)
     output_path = tmp_path / f"test{ext}"
-    with pytest.raises(FeatureError, match=expected_raises_match):
+    with pytest.raises((FeatureError, DataLayerError), match=expected_raises_match):
         write_dataframe(
             input_gdf,
             output_path,
+            use_arrow=use_arrow,
             promote_to_multi=promote_to_multi,
             geometry_type=geometry_type,
         )
-def test_write_dataframe_layer_geom_type_invalid(tmp_path, naturalearth_lowres):
+@pytest.mark.requires_arrow_write_api
+def test_write_dataframe_layer_geom_type_invalid(
+    tmp_path, naturalearth_lowres, use_arrow
+):
     df = read_dataframe(naturalearth_lowres)
     filename = tmp_path / "test.geojson"
     with pytest.raises(
         GeometryError, match="Geometry type is not supported: NotSupported"
     ):
-        write_dataframe(df, filename, geometry_type="NotSupported")
+        write_dataframe(df, filename, use_arrow=use_arrow, geometry_type="NotSupported")
 @pytest.mark.parametrize("ext", [ext for ext in ALL_EXTS if ext not in ".shp"])
-def test_write_dataframe_truly_mixed(tmp_path, ext):
+@pytest.mark.requires_arrow_write_api
+def test_write_dataframe_truly_mixed(tmp_path, ext, use_arrow):
     geometry = [
         shapely.Point(0, 0),
         shapely.LineString([(0, 0), (1, 1)]),
@@ -1095,9 +1382,9 @@ def test_write_dataframe_truly_mixed(tmp_path, ext):
     if ext == ".fgb":
         # For .fgb, spatial_index=False to avoid the rows being reordered
-        write_dataframe(df, filename, spatial_index=False)
+        write_dataframe(df, filename, use_arrow=use_arrow, spatial_index=False)
     else:
-        write_dataframe(df, filename)
+        write_dataframe(df, filename, use_arrow=use_arrow)
     # Drivers that support mixed geometries will default to "Unknown" geometry type
     assert read_info(filename)["geometry_type"] == "Unknown"
@@ -1105,7 +1392,8 @@ def test_write_dataframe_truly_mixed(tmp_path, ext):
     assert_geodataframe_equal(result, df, check_geom_type=True)
-def test_write_dataframe_truly_mixed_invalid(tmp_path):
+@pytest.mark.requires_arrow_write_api
+def test_write_dataframe_truly_mixed_invalid(tmp_path, use_arrow):
     # Shapefile doesn't support generic "Geometry" / "Unknown" type
     # for mixed geometries
@@ -1123,9 +1411,12 @@ def test_write_dataframe_truly_mixed_invalid(tmp_path):
     msg = (
         "Could not add feature to layer at index 1: Attempt to "
         r"write non-point \(LINESTRING\) geometry to point shapefile."
+        # DataLayerError when using Arrow
+        "|Error while writing batch to OGR layer: Attempt to "
+        r"write non-point \(LINESTRING\) geometry to point shapefile."
     )
-    with pytest.raises(FeatureError, match=msg):
-        write_dataframe(df, tmp_path / "test.shp")
+    with pytest.raises((FeatureError, DataLayerError), match=msg):
+        write_dataframe(df, tmp_path / "test.shp", use_arrow=use_arrow)
 @pytest.mark.parametrize("ext", [ext for ext in ALL_EXTS if ext not in ".fgb"])
@@ -1138,11 +1429,12 @@ def test_write_dataframe_truly_mixed_invalid(tmp_path):
         [None, None],
     ],
 )
-def test_write_dataframe_infer_geometry_with_nulls(tmp_path, geoms, ext):
+@pytest.mark.requires_arrow_write_api
+def test_write_dataframe_infer_geometry_with_nulls(tmp_path, geoms, ext, use_arrow):
     filename = tmp_path / f"test{ext}"
     df = gp.GeoDataFrame({"col": [1.0, 2.0]}, geometry=geoms, crs="EPSG:4326")
-    write_dataframe(df, filename)
+    write_dataframe(df, filename, use_arrow=use_arrow)
     result = read_dataframe(filename)
     assert_geodataframe_equal(result, df)
@@ -1150,16 +1442,19 @@ def test_write_dataframe_infer_geometry_with_nulls(tmp_path, geoms, ext):
 @pytest.mark.filterwarnings(
     "ignore: You will likely lose important projection information"
 )
-def test_custom_crs_io(tmpdir, naturalearth_lowres_all_ext):
+@pytest.mark.requires_arrow_write_api
+def test_custom_crs_io(tmp_path, naturalearth_lowres_all_ext, use_arrow):
     df = read_dataframe(naturalearth_lowres_all_ext)
     # project Belgium to a custom Albers Equal Area projection
-    expected = df.loc[df.name == "Belgium"].to_crs(
-        "+proj=aea +lat_1=49.5 +lat_2=51.5 +lon_0=4.3"
+    expected = (
+        df.loc[df.name == "Belgium"]
+        .reset_index(drop=True)
+        .to_crs("+proj=aea +lat_1=49.5 +lat_2=51.5 +lon_0=4.3")
     )
-    filename = os.path.join(str(tmpdir), "test.shp")
-    write_dataframe(expected, filename)
+    filename = tmp_path / "test.shp"
+    write_dataframe(expected, filename, use_arrow=use_arrow)
-    assert os.path.exists(filename)
+    assert filename.exists()
     df = read_dataframe(filename)
@@ -1171,6 +1466,7 @@ def test_custom_crs_io(tmpdir, naturalearth_lowres_all_ext):
 def test_write_read_mixed_column_values(tmp_path):
+    # use_arrow=True is tested separately below
     mixed_values = ["test", 1.0, 1, datetime.now(), None, np.nan]
     geoms = [shapely.Point(0, 0) for _ in mixed_values]
     test_gdf = gp.GeoDataFrame(
@@ -1187,7 +1483,21 @@ def test_write_read_mixed_column_values(tmp_path):
             assert output_gdf["mixed"][idx] == str(value)
-def test_write_read_null(tmp_path):
+@requires_arrow_write_api
+def test_write_read_mixed_column_values_arrow(tmp_path):
+    # Arrow cannot represent a column of mixed types
+    mixed_values = ["test", 1.0, 1, datetime.now(), None, np.nan]
+    geoms = [shapely.Point(0, 0) for _ in mixed_values]
+    test_gdf = gp.GeoDataFrame(
+        {"geometry": geoms, "mixed": mixed_values}, crs="epsg:31370"
+    )
+    output_path = tmp_path / "test_write_mixed_column.gpkg"
+    with pytest.raises(TypeError, match=".*Conversion failed for column"):
+        write_dataframe(test_gdf, output_path, use_arrow=True)
+@pytest.mark.requires_arrow_write_api
+def test_write_read_null(tmp_path, use_arrow):
     output_path = tmp_path / "test_write_nan.gpkg"
     geom = shapely.Point(0, 0)
     test_data = {
@@ -1196,7 +1506,7 @@ def test_write_read_null(tmp_path):
         "object_str": ["test", None, np.nan],
     }
     test_gdf = gp.GeoDataFrame(test_data, crs="epsg:31370")
-    write_dataframe(test_gdf, output_path)
+    write_dataframe(test_gdf, output_path, use_arrow=use_arrow)
     result_gdf = read_dataframe(output_path)
     assert len(test_gdf) == len(result_gdf)
     assert result_gdf["float64"][0] == 1.0
@@ -1219,7 +1529,7 @@ def test_write_read_null(tmp_path):
             ["2.5D MultiLineString", "MultiLineString Z"],
         ),
         (
-            "MultiPolygon Z (((0 0 0, 0 1 0, 1 1 0, 0 0 0)), ((1 1 1, 1 2 1, 2 2 1, 1 1 1)))",  # NOQA
+            "MultiPolygon Z (((0 0 0, 0 1 0, 1 1 0, 0 0 0)), ((1 1 1, 1 2 1, 2 2 1, 1 1 1)))",
             ["2.5D MultiPolygon", "MultiPolygon Z"],
         ),
         (
@@ -1228,11 +1538,12 @@ def test_write_read_null(tmp_path):
         ),
     ],
 )
-def test_write_geometry_z_types(tmp_path, wkt, geom_types):
+@pytest.mark.requires_arrow_write_api
+def test_write_geometry_z_types(tmp_path, wkt, geom_types, use_arrow):
     filename = tmp_path / "test.fgb"
     gdf = gp.GeoDataFrame(geometry=from_wkt([wkt]), crs="EPSG:4326")
     for geom_type in geom_types:
-        write_dataframe(gdf, filename, geometry_type=geom_type)
+        write_dataframe(gdf, filename, use_arrow=use_arrow, geometry_type=geom_type)
         df = read_dataframe(filename)
         assert_geodataframe_equal(df, gdf)
@@ -1261,7 +1572,7 @@ def test_write_geometry_z_types(tmp_path, wkt, geom_types):
             "MultiPolygon Z",
             False,
             [
-                "MultiPolygon Z (((0 0 0, 0 1 0, 1 1 0, 0 0 0)), ((1 1 1, 1 2 1, 2 2 1, 1 1 1)))"  # noqa: E501
+                "MultiPolygon Z (((0 0 0, 0 1 0, 1 1 0, 0 0 0)), ((1 1 1, 1 2 1, 2 2 1, 1 1 1)))"
             ],
         ),
         (
@@ -1286,8 +1597,9 @@ def test_write_geometry_z_types(tmp_path, wkt, geom_types):
         ),
     ],
 )
+@pytest.mark.requires_arrow_write_api
 def test_write_geometry_z_types_auto(
-    tmp_path, ext, test_descr, exp_geometry_type, mixed_dimensions, wkt
+    tmp_path, ext, test_descr, exp_geometry_type, mixed_dimensions, wkt, use_arrow
 ):
     # Shapefile has some different behaviour that other file types
     if ext == ".shp":
@@ -1314,10 +1626,10 @@ def test_write_geometry_z_types_auto(
             DataSourceError,
             match=("Mixed 2D and 3D coordinates are not supported by"),
         ):
-            write_dataframe(gdf, filename)
+            write_dataframe(gdf, filename, use_arrow=use_arrow)
         return
     else:
-        write_dataframe(gdf, filename)
+        write_dataframe(gdf, filename, use_arrow=use_arrow)
     info = read_info(filename)
     assert info["geometry_type"] == exp_geometry_type
@@ -1329,11 +1641,48 @@ def test_write_geometry_z_types_auto(
     assert_geodataframe_equal(gdf, result_gdf)
-def test_read_multisurface(data_dir):
-    df = read_dataframe(data_dir / "test_multisurface.gpkg")
+@pytest.mark.parametrize(
+    "on_invalid, message",
+    [
+        (
+            "warn",
+            "Invalid WKB: geometry is returned as None. IllegalArgumentException: "
+            "Invalid number of points in LinearRing found 2 - must be 0 or >=",
+        ),
+        ("raise", "Invalid number of points in LinearRing found 2 - must be 0 or >="),
+        ("ignore", None),
+    ],
+)
+def test_read_invalid_shp(data_dir, use_arrow, on_invalid, message):
+    if on_invalid == "raise":
+        handler = pytest.raises(shapely.errors.GEOSException, match=message)
+    elif on_invalid == "warn":
+        handler = pytest.warns(match=message)
+    elif on_invalid == "ignore":
+        handler = contextlib.nullcontext()
+    else:
+        raise ValueError(f"unknown value for on_invalid: {on_invalid}")
+    with handler:
+        df = read_dataframe(
+            data_dir / "poly_not_enough_points.shp.zip",
+            use_arrow=use_arrow,
+            on_invalid=on_invalid,
+        )
+        df.geometry.isnull().all()
+def test_read_multisurface(data_dir, use_arrow):
+    if use_arrow:
+        with pytest.raises(shapely.errors.GEOSException):
+            # TODO(Arrow)
+            # shapely fails parsing the WKB
+            read_dataframe(data_dir / "test_multisurface.gpkg", use_arrow=True)
+    else:
+        df = read_dataframe(data_dir / "test_multisurface.gpkg")
-    # MultiSurface should be converted to MultiPolygon
-    assert df.geometry.type.tolist() == ["MultiPolygon"]
+        # MultiSurface should be converted to MultiPolygon
+        assert df.geometry.type.tolist() == ["MultiPolygon"]
 def test_read_dataset_kwargs(data_dir, use_arrow):
@@ -1372,7 +1721,8 @@ def test_read_invalid_dataset_kwargs(naturalearth_lowres, use_arrow):
         read_dataframe(naturalearth_lowres, use_arrow=use_arrow, INVALID="YES")
-def test_write_nullable_dtypes(tmp_path):
+@pytest.mark.requires_arrow_write_api
+def test_write_nullable_dtypes(tmp_path, use_arrow):
     path = tmp_path / "test_nullable_dtypes.gpkg"
     test_data = {
         "col1": pd.Series([1, 2, 3], dtype="int64"),
@@ -1384,7 +1734,7 @@ def test_write_nullable_dtypes(tmp_path):
     input_gdf = gp.GeoDataFrame(
         test_data, geometry=[shapely.Point(0, 0)] * 3, crs="epsg:31370"
     )
-    write_dataframe(input_gdf, path)
+    write_dataframe(input_gdf, path, use_arrow=use_arrow)
     output_gdf = read_dataframe(path)
     # We read it back as default (non-nullable) numpy dtypes, so we cast
     # to those for the expected result
@@ -1393,19 +1743,21 @@ def test_write_nullable_dtypes(tmp_path):
     expected["col3"] = expected["col3"].astype("float32")
     expected["col4"] = expected["col4"].astype("float64")
     expected["col5"] = expected["col5"].astype(object)
+    expected.loc[1, "col5"] = None  # pandas converts to pd.NA on line above
     assert_geodataframe_equal(output_gdf, expected)
 @pytest.mark.parametrize(
     "metadata_type", ["dataset_metadata", "layer_metadata", "metadata"]
 )
-def test_metadata_io(tmpdir, naturalearth_lowres, metadata_type):
+@pytest.mark.requires_arrow_write_api
+def test_metadata_io(tmp_path, naturalearth_lowres, metadata_type, use_arrow):
     metadata = {"level": metadata_type}
     df = read_dataframe(naturalearth_lowres)
-    filename = os.path.join(str(tmpdir), "test.gpkg")
-    write_dataframe(df, filename, **{metadata_type: metadata})
+    filename = tmp_path / "test.gpkg"
+    write_dataframe(df, filename, use_arrow=use_arrow, **{metadata_type: metadata})
     metadata_key = "layer_metadata" if metadata_type == "metadata" else metadata_type
@@ -1421,22 +1773,27 @@ def test_metadata_io(tmpdir, naturalearth_lowres, metadata_type):
         {"key": 1},
     ],
 )
-def test_invalid_metadata(tmpdir, naturalearth_lowres, metadata_type, metadata):
+@pytest.mark.requires_arrow_write_api
+def test_invalid_metadata(
+    tmp_path, naturalearth_lowres, metadata_type, metadata, use_arrow
+):
+    df = read_dataframe(naturalearth_lowres)
     with pytest.raises(ValueError, match="must be a string"):
-        filename = os.path.join(str(tmpdir), "test.gpkg")
         write_dataframe(
-            read_dataframe(naturalearth_lowres), filename, **{metadata_type: metadata}
+            df, tmp_path / "test.gpkg", use_arrow=use_arrow, **{metadata_type: metadata}
         )
 @pytest.mark.parametrize("metadata_type", ["dataset_metadata", "layer_metadata"])
-def test_metadata_unsupported(tmpdir, naturalearth_lowres, metadata_type):
+@pytest.mark.requires_arrow_write_api
+def test_metadata_unsupported(tmp_path, naturalearth_lowres, metadata_type, use_arrow):
     """metadata is silently ignored"""
-    filename = os.path.join(str(tmpdir), "test.geojson")
+    filename = tmp_path / "test.geojson"
     write_dataframe(
         read_dataframe(naturalearth_lowres),
         filename,
+        use_arrow=use_arrow,
         **{metadata_type: {"key": "value"}},
     )
@@ -1466,3 +1823,352 @@ def test_read_dataframe_arrow_dtypes(tmp_path):
     assert isinstance(result["col"].dtype, pd.ArrowDtype)
     result["col"] = result["col"].astype("float64")
     assert_geodataframe_equal(result, df)
+@requires_pyarrow_api
+@pytest.mark.skipif(
+    __gdal_version__ < (3, 8, 3), reason="Arrow bool value bug fixed in GDAL >= 3.8.3"
+)
+@pytest.mark.parametrize("ext", ALL_EXTS)
+def test_arrow_bool_roundtrip(tmp_path, ext):
+    filename = tmp_path / f"test{ext}"
+    kwargs = {}
+    if ext == ".fgb":
+        # For .fgb, spatial_index=False to avoid the rows being reordered
+        kwargs["spatial_index"] = False
+    df = gp.GeoDataFrame(
+        {"bool_col": [True, False, True, False, True], "geometry": [Point(0, 0)] * 5},
+        crs="EPSG:4326",
+    )
+    write_dataframe(df, filename, **kwargs)
+    result = read_dataframe(filename, use_arrow=True)
+    # Shapefiles do not support bool columns; these are returned as int32
+    assert_geodataframe_equal(result, df, check_dtype=ext != ".shp")
+@requires_pyarrow_api
+@pytest.mark.skipif(
+    __gdal_version__ >= (3, 8, 3), reason="Arrow bool value bug fixed in GDAL >= 3.8.3"
+)
+@pytest.mark.parametrize("ext", ALL_EXTS)
+def test_arrow_bool_exception(tmp_path, ext):
+    filename = tmp_path / f"test{ext}"
+    df = gp.GeoDataFrame(
+        {"bool_col": [True, False, True, False, True], "geometry": [Point(0, 0)] * 5},
+        crs="EPSG:4326",
+    )
+    write_dataframe(df, filename)
+    if ext in {".fgb", ".gpkg"}:
+        # only raise exception for GPKG / FGB
+        with pytest.raises(
+            RuntimeError,
+            match="GDAL < 3.8.3 does not correctly read boolean data values using "
+            "the Arrow API",
+        ):
+            read_dataframe(filename, use_arrow=True)
+        # do not raise exception if no bool columns are read
+        read_dataframe(filename, use_arrow=True, columns=[])
+    else:
+        _ = read_dataframe(filename, use_arrow=True)
+@pytest.mark.filterwarnings("ignore:File /vsimem:RuntimeWarning")
+@pytest.mark.parametrize("driver", ["GeoJSON", "GPKG"])
+def test_write_memory(naturalearth_lowres, driver):
+    df = read_dataframe(naturalearth_lowres)
+    buffer = BytesIO()
+    write_dataframe(df, buffer, driver=driver, layer="test")
+    assert len(buffer.getbuffer()) > 0
+    actual = read_dataframe(buffer)
+    assert len(actual) == len(df)
+    is_json = driver == "GeoJSON"
+    assert_geodataframe_equal(
+        actual,
+        df,
+        check_less_precise=is_json,
+        check_index_type=False,
+        check_dtype=not is_json,
+    )
+def test_write_memory_driver_required(naturalearth_lowres):
+    df = read_dataframe(naturalearth_lowres)
+    buffer = BytesIO()
+    with pytest.raises(
+        ValueError,
+        match="driver must be provided to write to in-memory file",
+    ):
+        write_dataframe(df.head(1), buffer, driver=None, layer="test")
+@pytest.mark.parametrize("driver", ["ESRI Shapefile", "OpenFileGDB"])
+def test_write_memory_unsupported_driver(naturalearth_lowres, driver):
+    if driver == "OpenFileGDB" and __gdal_version__ < (3, 6, 0):
+        pytest.skip("OpenFileGDB write support only available for GDAL >= 3.6.0")
+    df = read_dataframe(naturalearth_lowres)
+    buffer = BytesIO()
+    with pytest.raises(
+        ValueError, match=f"writing to in-memory file is not supported for {driver}"
+    ):
+        write_dataframe(df, buffer, driver=driver, layer="test")
+@pytest.mark.parametrize("driver", ["GeoJSON", "GPKG"])
+def test_write_memory_append_unsupported(naturalearth_lowres, driver):
+    df = read_dataframe(naturalearth_lowres)
+    buffer = BytesIO()
+    with pytest.raises(
+        NotImplementedError, match="append is not supported for in-memory files"
+    ):
+        write_dataframe(df.head(1), buffer, driver=driver, layer="test", append=True)
+def test_write_memory_existing_unsupported(naturalearth_lowres):
+    df = read_dataframe(naturalearth_lowres)
+    buffer = BytesIO(b"0000")
+    with pytest.raises(
+        NotImplementedError,
+        match="writing to existing in-memory object is not supported",
+    ):
+        write_dataframe(df.head(1), buffer, driver="GeoJSON", layer="test")
+@pytest.mark.parametrize("ext", ["gpkg", "geojson"])
+def test_non_utf8_encoding_io(tmp_path, ext, encoded_text):
+    """Verify that we write non-UTF data to the data source
+    IMPORTANT: this may not be valid for the data source and will likely render
+    them unusable in other tools, but should successfully roundtrip unless we
+    disable writing using other encodings.
+    NOTE: FlatGeobuff driver cannot handle non-UTF data in GDAL >= 3.9
+    NOTE: pyarrow cannot handle non-UTF-8 characters in this way
+    """
+    encoding, text = encoded_text
+    output_path = tmp_path / f"test.{ext}"
+    df = gp.GeoDataFrame({text: [text], "geometry": [Point(0, 0)]}, crs="EPSG:4326")
+    write_dataframe(df, output_path, encoding=encoding)
+    # cannot open these files without specifying encoding
+    with pytest.raises(UnicodeDecodeError):
+        read_dataframe(output_path)
+    # must provide encoding to read these properly
+    actual = read_dataframe(output_path, encoding=encoding)
+    assert actual.columns[0] == text
+    assert actual[text].values[0] == text
+@requires_pyarrow_api
+@pytest.mark.parametrize("ext", ["gpkg", "geojson"])
+def test_non_utf8_encoding_io_arrow_exception(tmp_path, ext, encoded_text):
+    encoding, text = encoded_text
+    output_path = tmp_path / f"test.{ext}"
+    df = gp.GeoDataFrame({text: [text], "geometry": [Point(0, 0)]}, crs="EPSG:4326")
+    write_dataframe(df, output_path, encoding=encoding)
+    # cannot open these files without specifying encoding
+    with pytest.raises(UnicodeDecodeError):
+        read_dataframe(output_path)
+    with pytest.raises(
+        ValueError, match="non-UTF-8 encoding is not supported for Arrow"
+    ):
+        read_dataframe(output_path, encoding=encoding, use_arrow=True)
+def test_non_utf8_encoding_io_shapefile(tmp_path, encoded_text, use_arrow):
+    encoding, text = encoded_text
+    output_path = tmp_path / "test.shp"
+    df = gp.GeoDataFrame({text: [text], "geometry": [Point(0, 0)]}, crs="EPSG:4326")
+    write_dataframe(df, output_path, encoding=encoding)
+    # NOTE: GDAL automatically creates a cpg file with the encoding name, which
+    # means that if we read this without specifying the encoding it uses the
+    # correct one
+    actual = read_dataframe(output_path, use_arrow=use_arrow)
+    assert actual.columns[0] == text
+    assert actual[text].values[0] == text
+    # verify that if cpg file is not present, that user-provided encoding must be used
+    output_path.with_suffix(".cpg").unlink()
+    # We will assume ISO-8859-1, which is wrong
+    miscoded = text.encode(encoding).decode("ISO-8859-1")
+    if use_arrow:
+        # pyarrow cannot decode column name with incorrect encoding
+        with pytest.raises(UnicodeDecodeError):
+            read_dataframe(output_path, use_arrow=True)
+    else:
+        bad = read_dataframe(output_path, use_arrow=False)
+        assert bad.columns[0] == miscoded
+        assert bad[miscoded].values[0] == miscoded
+    # If encoding is provided, that should yield correct text
+    actual = read_dataframe(output_path, encoding=encoding, use_arrow=use_arrow)
+    assert actual.columns[0] == text
+    assert actual[text].values[0] == text
+    # if ENCODING open option, that should yield correct text
+    actual = read_dataframe(output_path, use_arrow=use_arrow, ENCODING=encoding)
+    assert actual.columns[0] == text
+    assert actual[text].values[0] == text
+def test_encoding_read_option_collision_shapefile(naturalearth_lowres, use_arrow):
+    """Providing both encoding parameter and ENCODING open option (even if blank) is not allowed"""
+    with pytest.raises(
+        ValueError, match='cannot provide both encoding parameter and "ENCODING" option'
+    ):
+        read_dataframe(
+            naturalearth_lowres, encoding="CP936", ENCODING="", use_arrow=use_arrow
+        )
+def test_encoding_write_layer_option_collision_shapefile(tmp_path, encoded_text):
+    """Providing both encoding parameter and ENCODING layer creation option (even if blank) is not allowed"""
+    encoding, text = encoded_text
+    output_path = tmp_path / "test.shp"
+    df = gp.GeoDataFrame({text: [text], "geometry": [Point(0, 0)]}, crs="EPSG:4326")
+    with pytest.raises(
+        ValueError,
+        match='cannot provide both encoding parameter and "ENCODING" layer creation option',
+    ):
+        write_dataframe(
+            df, output_path, encoding=encoding, layer_options={"ENCODING": ""}
+        )
+def test_non_utf8_encoding_shapefile_sql(tmp_path, use_arrow):
+    encoding = "CP936"
+    output_path = tmp_path / "test.shp"
+    mandarin = "中文"
+    df = gp.GeoDataFrame(
+        {mandarin: mandarin, "geometry": [Point(0, 0)]}, crs="EPSG:4326"
+    )
+    write_dataframe(df, output_path, encoding=encoding)
+    actual = read_dataframe(
+        output_path,
+        sql=f"select * from test where \"{mandarin}\" = '{mandarin}'",
+        use_arrow=use_arrow,
+    )
+    assert actual.columns[0] == mandarin
+    assert actual[mandarin].values[0] == mandarin
+    actual = read_dataframe(
+        output_path,
+        sql=f"select * from test where \"{mandarin}\" = '{mandarin}'",
+        encoding=encoding,
+        use_arrow=use_arrow,
+    )
+    assert actual.columns[0] == mandarin
+    assert actual[mandarin].values[0] == mandarin
+@pytest.mark.requires_arrow_write_api
+def test_write_kml_file_coordinate_order(tmp_path, use_arrow):
+    # confirm KML coordinates are written in lon, lat order even if CRS axis specifies otherwise
+    points = [Point(10, 20), Point(30, 40), Point(50, 60)]
+    gdf = gp.GeoDataFrame(geometry=points, crs="EPSG:4326")
+    output_path = tmp_path / "test.kml"
+    write_dataframe(
+        gdf, output_path, layer="tmp_layer", driver="KML", use_arrow=use_arrow
+    )
+    gdf_in = read_dataframe(output_path, use_arrow=use_arrow)
+    assert np.array_equal(gdf_in.geometry.values, points)
+    if "LIBKML" in list_drivers():
+        # test appending to the existing file only if LIBKML is available
+        # as it appears to fall back on LIBKML driver when appending.
+        points_append = [Point(70, 80), Point(90, 100), Point(110, 120)]
+        gdf_append = gp.GeoDataFrame(geometry=points_append, crs="EPSG:4326")
+        write_dataframe(
+            gdf_append,
+            output_path,
+            layer="tmp_layer",
+            driver="KML",
+            use_arrow=use_arrow,
+            append=True,
+        )
+        # force_2d used to only compare xy geometry as z-dimension is undesirably
+        # introduced when the kml file is over-written.
+        gdf_in_appended = read_dataframe(
+            output_path, use_arrow=use_arrow, force_2d=True
+        )
+        assert np.array_equal(gdf_in_appended.geometry.values, points + points_append)
+@pytest.mark.requires_arrow_write_api
+def test_write_geojson_rfc7946_coordinates(tmp_path, use_arrow):
+    points = [Point(10, 20), Point(30, 40), Point(50, 60)]
+    gdf = gp.GeoDataFrame(geometry=points, crs="EPSG:4326")
+    output_path = tmp_path / "test.geojson"
+    write_dataframe(
+        gdf,
+        output_path,
+        layer="tmp_layer",
+        driver="GeoJSON",
+        RFC7946=True,
+        use_arrow=use_arrow,
+    )
+    gdf_in = read_dataframe(output_path, use_arrow=use_arrow)
+    assert np.array_equal(gdf_in.geometry.values, points)
+    # test appending to the existing file
+    points_append = [Point(70, 80), Point(90, 100), Point(110, 120)]
+    gdf_append = gp.GeoDataFrame(geometry=points_append, crs="EPSG:4326")
+    write_dataframe(
+        gdf_append,
+        output_path,
+        layer="tmp_layer",
+        driver="GeoJSON",
+        RFC7946=True,
+        use_arrow=use_arrow,
+        append=True,
+    )
+    gdf_in_appended = read_dataframe(output_path, use_arrow=use_arrow)
+    assert np.array_equal(gdf_in_appended.geometry.values, points + points_append)