pyogrio 0.10.0__cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyogrio might be problematic. Click here for more details.
- pyogrio/__init__.py +55 -0
- pyogrio/_compat.py +47 -0
- pyogrio/_env.py +59 -0
- pyogrio/_err.cpython-313-x86_64-linux-gnu.so +0 -0
- pyogrio/_geometry.cpython-313-x86_64-linux-gnu.so +0 -0
- pyogrio/_io.cpython-313-x86_64-linux-gnu.so +0 -0
- pyogrio/_ogr.cpython-313-x86_64-linux-gnu.so +0 -0
- pyogrio/_version.py +21 -0
- pyogrio/_vsi.cpython-313-x86_64-linux-gnu.so +0 -0
- pyogrio/core.py +386 -0
- pyogrio/errors.py +25 -0
- pyogrio/gdal_data/GDAL-targets-release.cmake +19 -0
- pyogrio/gdal_data/GDAL-targets.cmake +105 -0
- pyogrio/gdal_data/GDALConfig.cmake +24 -0
- pyogrio/gdal_data/GDALConfigVersion.cmake +85 -0
- pyogrio/gdal_data/GDALLogoBW.svg +138 -0
- pyogrio/gdal_data/GDALLogoColor.svg +126 -0
- pyogrio/gdal_data/GDALLogoGS.svg +126 -0
- pyogrio/gdal_data/LICENSE.TXT +467 -0
- pyogrio/gdal_data/MM_m_idofic.csv +321 -0
- pyogrio/gdal_data/copyright +467 -0
- pyogrio/gdal_data/cubewerx_extra.wkt +48 -0
- pyogrio/gdal_data/default.rsc +0 -0
- pyogrio/gdal_data/ecw_cs.wkt +1453 -0
- pyogrio/gdal_data/eedaconf.json +23 -0
- pyogrio/gdal_data/epsg.wkt +1 -0
- pyogrio/gdal_data/esri_StatePlane_extra.wkt +631 -0
- pyogrio/gdal_data/gdalicon.png +0 -0
- pyogrio/gdal_data/gdalinfo_output.schema.json +346 -0
- pyogrio/gdal_data/gdalmdiminfo_output.schema.json +321 -0
- pyogrio/gdal_data/gdaltileindex.xsd +269 -0
- pyogrio/gdal_data/gdalvrt.xsd +880 -0
- pyogrio/gdal_data/gfs.xsd +246 -0
- pyogrio/gdal_data/gml_registry.xml +117 -0
- pyogrio/gdal_data/gml_registry.xsd +66 -0
- pyogrio/gdal_data/grib2_center.csv +251 -0
- pyogrio/gdal_data/grib2_process.csv +102 -0
- pyogrio/gdal_data/grib2_subcenter.csv +63 -0
- pyogrio/gdal_data/grib2_table_4_2_0_0.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_0_1.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_0_13.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_0_14.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_0_15.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_0_16.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_0_17.csv +11 -0
- pyogrio/gdal_data/grib2_table_4_2_0_18.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_0_19.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_0_190.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_0_191.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_0_2.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_0_20.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_0_21.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_0_3.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_0_4.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_0_5.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_0_6.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_0_7.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_10_0.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_10_1.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_10_191.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_10_2.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_10_3.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_10_4.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_1_0.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_1_1.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_1_2.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_20_0.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_20_1.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_20_2.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_2_0.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_2_3.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_2_4.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_2_5.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_2_6.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_3_0.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_3_1.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_3_2.csv +28 -0
- pyogrio/gdal_data/grib2_table_4_2_3_3.csv +8 -0
- pyogrio/gdal_data/grib2_table_4_2_3_4.csv +14 -0
- pyogrio/gdal_data/grib2_table_4_2_3_5.csv +11 -0
- pyogrio/gdal_data/grib2_table_4_2_3_6.csv +11 -0
- pyogrio/gdal_data/grib2_table_4_2_4_0.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_4_1.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_4_10.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_4_2.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_4_3.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_4_4.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_4_5.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_4_6.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_4_7.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_4_8.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_4_9.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_local_Canada.csv +5 -0
- pyogrio/gdal_data/grib2_table_4_2_local_HPC.csv +2 -0
- pyogrio/gdal_data/grib2_table_4_2_local_MRMS.csv +175 -0
- pyogrio/gdal_data/grib2_table_4_2_local_NCEP.csv +401 -0
- pyogrio/gdal_data/grib2_table_4_2_local_NDFD.csv +38 -0
- pyogrio/gdal_data/grib2_table_4_2_local_index.csv +7 -0
- pyogrio/gdal_data/grib2_table_4_5.csv +261 -0
- pyogrio/gdal_data/grib2_table_versions.csv +3 -0
- pyogrio/gdal_data/gt_datum.csv +229 -0
- pyogrio/gdal_data/gt_ellips.csv +24 -0
- pyogrio/gdal_data/header.dxf +1124 -0
- pyogrio/gdal_data/inspire_cp_BasicPropertyUnit.gfs +57 -0
- pyogrio/gdal_data/inspire_cp_CadastralBoundary.gfs +60 -0
- pyogrio/gdal_data/inspire_cp_CadastralParcel.gfs +81 -0
- pyogrio/gdal_data/inspire_cp_CadastralZoning.gfs +161 -0
- pyogrio/gdal_data/jpfgdgml_AdmArea.gfs +59 -0
- pyogrio/gdal_data/jpfgdgml_AdmBdry.gfs +49 -0
- pyogrio/gdal_data/jpfgdgml_AdmPt.gfs +59 -0
- pyogrio/gdal_data/jpfgdgml_BldA.gfs +54 -0
- pyogrio/gdal_data/jpfgdgml_BldL.gfs +54 -0
- pyogrio/gdal_data/jpfgdgml_Cntr.gfs +54 -0
- pyogrio/gdal_data/jpfgdgml_CommBdry.gfs +49 -0
- pyogrio/gdal_data/jpfgdgml_CommPt.gfs +59 -0
- pyogrio/gdal_data/jpfgdgml_Cstline.gfs +54 -0
- pyogrio/gdal_data/jpfgdgml_ElevPt.gfs +54 -0
- pyogrio/gdal_data/jpfgdgml_GCP.gfs +94 -0
- pyogrio/gdal_data/jpfgdgml_LeveeEdge.gfs +49 -0
- pyogrio/gdal_data/jpfgdgml_RailCL.gfs +54 -0
- pyogrio/gdal_data/jpfgdgml_RdASL.gfs +44 -0
- pyogrio/gdal_data/jpfgdgml_RdArea.gfs +54 -0
- pyogrio/gdal_data/jpfgdgml_RdCompt.gfs +59 -0
- pyogrio/gdal_data/jpfgdgml_RdEdg.gfs +59 -0
- pyogrio/gdal_data/jpfgdgml_RdMgtBdry.gfs +49 -0
- pyogrio/gdal_data/jpfgdgml_RdSgmtA.gfs +59 -0
- pyogrio/gdal_data/jpfgdgml_RvrMgtBdry.gfs +49 -0
- pyogrio/gdal_data/jpfgdgml_SBAPt.gfs +49 -0
- pyogrio/gdal_data/jpfgdgml_SBArea.gfs +54 -0
- pyogrio/gdal_data/jpfgdgml_SBBdry.gfs +44 -0
- pyogrio/gdal_data/jpfgdgml_WA.gfs +54 -0
- pyogrio/gdal_data/jpfgdgml_WL.gfs +54 -0
- pyogrio/gdal_data/jpfgdgml_WStrA.gfs +54 -0
- pyogrio/gdal_data/jpfgdgml_WStrL.gfs +54 -0
- pyogrio/gdal_data/nitf_spec.xml +3306 -0
- pyogrio/gdal_data/nitf_spec.xsd +189 -0
- pyogrio/gdal_data/ogrinfo_output.schema.json +528 -0
- pyogrio/gdal_data/ogrvrt.xsd +546 -0
- pyogrio/gdal_data/osmconf.ini +132 -0
- pyogrio/gdal_data/ozi_datum.csv +131 -0
- pyogrio/gdal_data/ozi_ellips.csv +35 -0
- pyogrio/gdal_data/pci_datum.txt +530 -0
- pyogrio/gdal_data/pci_ellips.txt +129 -0
- pyogrio/gdal_data/pdfcomposition.xsd +721 -0
- pyogrio/gdal_data/pds4_template.xml +65 -0
- pyogrio/gdal_data/plscenesconf.json +1985 -0
- pyogrio/gdal_data/ruian_vf_ob_v1.gfs +1455 -0
- pyogrio/gdal_data/ruian_vf_st_uvoh_v1.gfs +86 -0
- pyogrio/gdal_data/ruian_vf_st_v1.gfs +1489 -0
- pyogrio/gdal_data/ruian_vf_v1.gfs +2126 -0
- pyogrio/gdal_data/s57agencies.csv +249 -0
- pyogrio/gdal_data/s57attributes.csv +484 -0
- pyogrio/gdal_data/s57expectedinput.csv +1008 -0
- pyogrio/gdal_data/s57objectclasses.csv +287 -0
- pyogrio/gdal_data/seed_2d.dgn +0 -0
- pyogrio/gdal_data/seed_3d.dgn +0 -0
- pyogrio/gdal_data/stateplane.csv +259 -0
- pyogrio/gdal_data/tms_LINZAntarticaMapTileGrid.json +190 -0
- pyogrio/gdal_data/tms_MapML_APSTILE.json +268 -0
- pyogrio/gdal_data/tms_MapML_CBMTILE.json +346 -0
- pyogrio/gdal_data/tms_NZTM2000.json +243 -0
- pyogrio/gdal_data/trailer.dxf +434 -0
- pyogrio/gdal_data/usage +4 -0
- pyogrio/gdal_data/vcpkg-cmake-wrapper.cmake +23 -0
- pyogrio/gdal_data/vcpkg.spdx.json +264 -0
- pyogrio/gdal_data/vcpkg_abi_info.txt +41 -0
- pyogrio/gdal_data/vdv452.xml +367 -0
- pyogrio/gdal_data/vdv452.xsd +63 -0
- pyogrio/gdal_data/vicar.json +164 -0
- pyogrio/geopandas.py +683 -0
- pyogrio/proj_data/CH +22 -0
- pyogrio/proj_data/GL27 +23 -0
- pyogrio/proj_data/ITRF2000 +24 -0
- pyogrio/proj_data/ITRF2008 +94 -0
- pyogrio/proj_data/ITRF2014 +55 -0
- pyogrio/proj_data/copyright +34 -0
- pyogrio/proj_data/deformation_model.schema.json +582 -0
- pyogrio/proj_data/nad.lst +142 -0
- pyogrio/proj_data/nad27 +810 -0
- pyogrio/proj_data/nad83 +745 -0
- pyogrio/proj_data/other.extra +53 -0
- pyogrio/proj_data/proj-config-version.cmake +44 -0
- pyogrio/proj_data/proj-config.cmake +79 -0
- pyogrio/proj_data/proj-targets-release.cmake +19 -0
- pyogrio/proj_data/proj-targets.cmake +107 -0
- pyogrio/proj_data/proj.db +0 -0
- pyogrio/proj_data/proj.ini +51 -0
- pyogrio/proj_data/proj4-targets-release.cmake +19 -0
- pyogrio/proj_data/proj4-targets.cmake +107 -0
- pyogrio/proj_data/projjson.schema.json +1174 -0
- pyogrio/proj_data/triangulation.schema.json +214 -0
- pyogrio/proj_data/usage +4 -0
- pyogrio/proj_data/vcpkg.spdx.json +198 -0
- pyogrio/proj_data/vcpkg_abi_info.txt +27 -0
- pyogrio/proj_data/world +214 -0
- pyogrio/raw.py +887 -0
- pyogrio/tests/__init__.py +0 -0
- pyogrio/tests/conftest.py +398 -0
- pyogrio/tests/fixtures/README.md +108 -0
- pyogrio/tests/fixtures/curve.gpkg +0 -0
- pyogrio/tests/fixtures/curvepolygon.gpkg +0 -0
- pyogrio/tests/fixtures/line_zm.gpkg +0 -0
- pyogrio/tests/fixtures/multisurface.gpkg +0 -0
- pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.cpg +1 -0
- pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.dbf +0 -0
- pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.prj +1 -0
- pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.shp +0 -0
- pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.shx +0 -0
- pyogrio/tests/fixtures/sample.osm.pbf +0 -0
- pyogrio/tests/fixtures/test_gpkg_nulls.gpkg +0 -0
- pyogrio/tests/test_arrow.py +1195 -0
- pyogrio/tests/test_core.py +678 -0
- pyogrio/tests/test_geopandas_io.py +2314 -0
- pyogrio/tests/test_path.py +364 -0
- pyogrio/tests/test_raw_io.py +1515 -0
- pyogrio/tests/test_util.py +56 -0
- pyogrio/util.py +247 -0
- pyogrio-0.10.0.dist-info/LICENSE +21 -0
- pyogrio-0.10.0.dist-info/METADATA +129 -0
- pyogrio-0.10.0.dist-info/RECORD +223 -0
- pyogrio-0.10.0.dist-info/WHEEL +6 -0
- pyogrio-0.10.0.dist-info/top_level.txt +1 -0
- pyogrio.libs/libgdal-2d56d59d.so.35.3.9.1 +0 -0
|
@@ -0,0 +1,2314 @@
|
|
|
1
|
+
import contextlib
|
|
2
|
+
import locale
|
|
3
|
+
import warnings
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from io import BytesIO
|
|
6
|
+
from zipfile import ZipFile
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
|
|
10
|
+
from pyogrio import (
|
|
11
|
+
__gdal_version__,
|
|
12
|
+
list_drivers,
|
|
13
|
+
list_layers,
|
|
14
|
+
read_info,
|
|
15
|
+
vsi_listtree,
|
|
16
|
+
vsi_unlink,
|
|
17
|
+
)
|
|
18
|
+
from pyogrio._compat import HAS_ARROW_WRITE_API, HAS_PYPROJ, PANDAS_GE_15
|
|
19
|
+
from pyogrio.errors import DataLayerError, DataSourceError, FeatureError, GeometryError
|
|
20
|
+
from pyogrio.geopandas import PANDAS_GE_20, read_dataframe, write_dataframe
|
|
21
|
+
from pyogrio.raw import (
|
|
22
|
+
DRIVERS_NO_MIXED_DIMENSIONS,
|
|
23
|
+
DRIVERS_NO_MIXED_SINGLE_MULTI,
|
|
24
|
+
)
|
|
25
|
+
from pyogrio.tests.conftest import (
|
|
26
|
+
ALL_EXTS,
|
|
27
|
+
DRIVERS,
|
|
28
|
+
START_FID,
|
|
29
|
+
requires_arrow_write_api,
|
|
30
|
+
requires_gdal_geos,
|
|
31
|
+
requires_pyarrow_api,
|
|
32
|
+
requires_pyproj,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
import pytest
|
|
36
|
+
|
|
37
|
+
try:
|
|
38
|
+
import geopandas as gp
|
|
39
|
+
import pandas as pd
|
|
40
|
+
from geopandas.array import from_wkt
|
|
41
|
+
|
|
42
|
+
import shapely # if geopandas is present, shapely is expected to be present
|
|
43
|
+
from shapely.geometry import Point
|
|
44
|
+
|
|
45
|
+
from geopandas.testing import assert_geodataframe_equal
|
|
46
|
+
from pandas.testing import (
|
|
47
|
+
assert_index_equal,
|
|
48
|
+
assert_series_equal,
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
except ImportError:
|
|
52
|
+
pass
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
pytest.importorskip("geopandas")
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@pytest.fixture(
|
|
59
|
+
scope="session",
|
|
60
|
+
params=[
|
|
61
|
+
False,
|
|
62
|
+
pytest.param(True, marks=requires_pyarrow_api),
|
|
63
|
+
],
|
|
64
|
+
)
|
|
65
|
+
def use_arrow(request):
|
|
66
|
+
return request.param
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
@pytest.fixture(autouse=True)
|
|
70
|
+
def skip_if_no_arrow_write_api(request):
|
|
71
|
+
# automatically skip tests with use_arrow=True and that require Arrow write
|
|
72
|
+
# API (marked with `@pytest.mark.requires_arrow_write_api`) if it is not available
|
|
73
|
+
use_arrow = (
|
|
74
|
+
request.getfixturevalue("use_arrow")
|
|
75
|
+
if "use_arrow" in request.fixturenames
|
|
76
|
+
else False
|
|
77
|
+
)
|
|
78
|
+
if (
|
|
79
|
+
use_arrow
|
|
80
|
+
and not HAS_ARROW_WRITE_API
|
|
81
|
+
and request.node.get_closest_marker("requires_arrow_write_api")
|
|
82
|
+
):
|
|
83
|
+
pytest.skip("GDAL>=3.8 required for Arrow write API")
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def spatialite_available(path):
|
|
87
|
+
try:
|
|
88
|
+
_ = read_dataframe(
|
|
89
|
+
path, sql="select spatialite_version();", sql_dialect="SQLITE"
|
|
90
|
+
)
|
|
91
|
+
return True
|
|
92
|
+
except Exception:
|
|
93
|
+
return False
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
@pytest.mark.parametrize("encoding", ["utf-8", "cp1252", None])
|
|
97
|
+
def test_read_csv_encoding(tmp_path, encoding):
|
|
98
|
+
# Write csv test file. Depending on the os this will be written in a different
|
|
99
|
+
# encoding: for linux and macos this is utf-8, for windows it is cp1252.
|
|
100
|
+
csv_path = tmp_path / "test.csv"
|
|
101
|
+
with open(csv_path, "w", encoding=encoding) as csv:
|
|
102
|
+
csv.write("näme,city\n")
|
|
103
|
+
csv.write("Wilhelm Röntgen,Zürich\n")
|
|
104
|
+
|
|
105
|
+
# Read csv. The data should be read with the same default encoding as the csv file
|
|
106
|
+
# was written in, but should have been converted to utf-8 in the dataframe returned.
|
|
107
|
+
# Hence, the asserts below, with strings in utf-8, be OK.
|
|
108
|
+
df = read_dataframe(csv_path, encoding=encoding)
|
|
109
|
+
|
|
110
|
+
assert len(df) == 1
|
|
111
|
+
assert df.columns.tolist() == ["näme", "city"]
|
|
112
|
+
assert df.city.tolist() == ["Zürich"]
|
|
113
|
+
assert df.näme.tolist() == ["Wilhelm Röntgen"]
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
@pytest.mark.skipif(
|
|
117
|
+
locale.getpreferredencoding().upper() == "UTF-8",
|
|
118
|
+
reason="test requires non-UTF-8 default platform",
|
|
119
|
+
)
|
|
120
|
+
def test_read_csv_platform_encoding(tmp_path):
|
|
121
|
+
"""verify that read defaults to platform encoding; only works on Windows (CP1252)"""
|
|
122
|
+
csv_path = tmp_path / "test.csv"
|
|
123
|
+
with open(csv_path, "w", encoding=locale.getpreferredencoding()) as csv:
|
|
124
|
+
csv.write("näme,city\n")
|
|
125
|
+
csv.write("Wilhelm Röntgen,Zürich\n")
|
|
126
|
+
|
|
127
|
+
df = read_dataframe(csv_path)
|
|
128
|
+
|
|
129
|
+
assert len(df) == 1
|
|
130
|
+
assert df.columns.tolist() == ["näme", "city"]
|
|
131
|
+
assert df.city.tolist() == ["Zürich"]
|
|
132
|
+
assert df.näme.tolist() == ["Wilhelm Röntgen"]
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def test_read_dataframe(naturalearth_lowres_all_ext):
|
|
136
|
+
df = read_dataframe(naturalearth_lowres_all_ext)
|
|
137
|
+
|
|
138
|
+
if HAS_PYPROJ:
|
|
139
|
+
assert df.crs == "EPSG:4326"
|
|
140
|
+
assert len(df) == 177
|
|
141
|
+
assert df.columns.tolist() == [
|
|
142
|
+
"pop_est",
|
|
143
|
+
"continent",
|
|
144
|
+
"name",
|
|
145
|
+
"iso_a3",
|
|
146
|
+
"gdp_md_est",
|
|
147
|
+
"geometry",
|
|
148
|
+
]
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def test_read_dataframe_vsi(naturalearth_lowres_vsi, use_arrow):
|
|
152
|
+
df = read_dataframe(naturalearth_lowres_vsi[1], use_arrow=use_arrow)
|
|
153
|
+
assert len(df) == 177
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
@pytest.mark.parametrize(
|
|
157
|
+
"columns, fid_as_index, exp_len", [(None, False, 3), ([], True, 3), ([], False, 0)]
|
|
158
|
+
)
|
|
159
|
+
def test_read_layer_without_geometry(
|
|
160
|
+
no_geometry_file, columns, fid_as_index, use_arrow, exp_len
|
|
161
|
+
):
|
|
162
|
+
result = read_dataframe(
|
|
163
|
+
no_geometry_file,
|
|
164
|
+
columns=columns,
|
|
165
|
+
fid_as_index=fid_as_index,
|
|
166
|
+
use_arrow=use_arrow,
|
|
167
|
+
)
|
|
168
|
+
assert type(result) is pd.DataFrame
|
|
169
|
+
assert len(result) == exp_len
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
@pytest.mark.parametrize(
|
|
173
|
+
"naturalearth_lowres, expected_ext",
|
|
174
|
+
[(".gpkg", ".gpkg"), (".shp", ".shp")],
|
|
175
|
+
indirect=["naturalearth_lowres"],
|
|
176
|
+
)
|
|
177
|
+
def test_fixture_naturalearth_lowres(naturalearth_lowres, expected_ext):
|
|
178
|
+
# Test the fixture with "indirect" parameter
|
|
179
|
+
assert naturalearth_lowres.suffix == expected_ext
|
|
180
|
+
df = read_dataframe(naturalearth_lowres)
|
|
181
|
+
assert len(df) == 177
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def test_read_no_geometry(naturalearth_lowres_all_ext, use_arrow):
|
|
185
|
+
df = read_dataframe(
|
|
186
|
+
naturalearth_lowres_all_ext, use_arrow=use_arrow, read_geometry=False
|
|
187
|
+
)
|
|
188
|
+
assert isinstance(df, pd.DataFrame)
|
|
189
|
+
assert not isinstance(df, gp.GeoDataFrame)
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def test_read_no_geometry_no_columns_no_fids(naturalearth_lowres, use_arrow):
|
|
193
|
+
with pytest.raises(
|
|
194
|
+
ValueError,
|
|
195
|
+
match=(
|
|
196
|
+
"at least one of read_geometry or return_fids must be True or columns must "
|
|
197
|
+
"be None or non-empty"
|
|
198
|
+
),
|
|
199
|
+
):
|
|
200
|
+
_ = read_dataframe(
|
|
201
|
+
naturalearth_lowres,
|
|
202
|
+
columns=[],
|
|
203
|
+
read_geometry=False,
|
|
204
|
+
fid_as_index=False,
|
|
205
|
+
use_arrow=use_arrow,
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def test_read_force_2d(tmp_path, use_arrow):
|
|
210
|
+
filename = tmp_path / "test.gpkg"
|
|
211
|
+
|
|
212
|
+
# create a GPKG with 3D point values
|
|
213
|
+
expected = gp.GeoDataFrame(
|
|
214
|
+
geometry=[Point(0, 0, 0), Point(1, 1, 0)], crs="EPSG:4326"
|
|
215
|
+
)
|
|
216
|
+
write_dataframe(expected, filename)
|
|
217
|
+
|
|
218
|
+
df = read_dataframe(filename)
|
|
219
|
+
assert df.iloc[0].geometry.has_z
|
|
220
|
+
|
|
221
|
+
df = read_dataframe(
|
|
222
|
+
filename,
|
|
223
|
+
force_2d=True,
|
|
224
|
+
max_features=1,
|
|
225
|
+
use_arrow=use_arrow,
|
|
226
|
+
)
|
|
227
|
+
assert not df.iloc[0].geometry.has_z
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def test_read_layer(tmp_path, use_arrow):
|
|
231
|
+
filename = tmp_path / "test.gpkg"
|
|
232
|
+
|
|
233
|
+
# create a multilayer GPKG
|
|
234
|
+
expected1 = gp.GeoDataFrame(geometry=[Point(0, 0)], crs="EPSG:4326")
|
|
235
|
+
write_dataframe(
|
|
236
|
+
expected1,
|
|
237
|
+
filename,
|
|
238
|
+
layer="layer1",
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
expected2 = gp.GeoDataFrame(geometry=[Point(1, 1)], crs="EPSG:4326")
|
|
242
|
+
write_dataframe(expected2, filename, layer="layer2", append=True)
|
|
243
|
+
|
|
244
|
+
assert np.array_equal(
|
|
245
|
+
list_layers(filename), [["layer1", "Point"], ["layer2", "Point"]]
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
kwargs = {"use_arrow": use_arrow, "max_features": 1}
|
|
249
|
+
|
|
250
|
+
# The first layer is read by default, which will warn when there are multiple
|
|
251
|
+
# layers
|
|
252
|
+
with pytest.warns(UserWarning, match="More than one layer found"):
|
|
253
|
+
df = read_dataframe(filename, **kwargs)
|
|
254
|
+
|
|
255
|
+
assert_geodataframe_equal(df, expected1)
|
|
256
|
+
|
|
257
|
+
# Reading a specific layer by name should return that layer.
|
|
258
|
+
# Detected here by a known column.
|
|
259
|
+
df = read_dataframe(filename, layer="layer2", **kwargs)
|
|
260
|
+
assert_geodataframe_equal(df, expected2)
|
|
261
|
+
|
|
262
|
+
# Reading a specific layer by index should return that layer
|
|
263
|
+
df = read_dataframe(filename, layer=1, **kwargs)
|
|
264
|
+
assert_geodataframe_equal(df, expected2)
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
def test_read_layer_invalid(naturalearth_lowres_all_ext, use_arrow):
|
|
268
|
+
with pytest.raises(DataLayerError, match="Layer 'wrong' could not be opened"):
|
|
269
|
+
read_dataframe(naturalearth_lowres_all_ext, layer="wrong", use_arrow=use_arrow)
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def test_read_datetime(datetime_file, use_arrow):
|
|
273
|
+
df = read_dataframe(datetime_file, use_arrow=use_arrow)
|
|
274
|
+
if PANDAS_GE_20:
|
|
275
|
+
# starting with pandas 2.0, it preserves the passed datetime resolution
|
|
276
|
+
assert df.col.dtype.name == "datetime64[ms]"
|
|
277
|
+
else:
|
|
278
|
+
assert df.col.dtype.name == "datetime64[ns]"
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
@pytest.mark.filterwarnings("ignore: Non-conformant content for record 1 in column ")
|
|
282
|
+
@pytest.mark.requires_arrow_write_api
|
|
283
|
+
def test_read_datetime_tz(datetime_tz_file, tmp_path, use_arrow):
|
|
284
|
+
df = read_dataframe(datetime_tz_file)
|
|
285
|
+
# Make the index non-consecutive to test this case as well. Added for issue
|
|
286
|
+
# https://github.com/geopandas/pyogrio/issues/324
|
|
287
|
+
df = df.set_index(np.array([0, 2]))
|
|
288
|
+
raw_expected = ["2020-01-01T09:00:00.123-05:00", "2020-01-01T10:00:00-05:00"]
|
|
289
|
+
|
|
290
|
+
if PANDAS_GE_20:
|
|
291
|
+
expected = pd.to_datetime(raw_expected, format="ISO8601").as_unit("ms")
|
|
292
|
+
else:
|
|
293
|
+
expected = pd.to_datetime(raw_expected)
|
|
294
|
+
expected = pd.Series(expected, name="datetime_col")
|
|
295
|
+
assert_series_equal(df.datetime_col, expected, check_index=False)
|
|
296
|
+
# test write and read round trips
|
|
297
|
+
fpath = tmp_path / "test.gpkg"
|
|
298
|
+
write_dataframe(df, fpath, use_arrow=use_arrow)
|
|
299
|
+
df_read = read_dataframe(fpath, use_arrow=use_arrow)
|
|
300
|
+
if use_arrow:
|
|
301
|
+
# with Arrow, the datetimes are always read as UTC
|
|
302
|
+
expected = expected.dt.tz_convert("UTC")
|
|
303
|
+
assert_series_equal(df_read.datetime_col, expected)
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
@pytest.mark.filterwarnings(
|
|
307
|
+
"ignore: Non-conformant content for record 1 in column dates"
|
|
308
|
+
)
|
|
309
|
+
@pytest.mark.requires_arrow_write_api
|
|
310
|
+
def test_write_datetime_mixed_offset(tmp_path, use_arrow):
|
|
311
|
+
# Australian Summer Time AEDT (GMT+11), Standard Time AEST (GMT+10)
|
|
312
|
+
dates = ["2023-01-01 11:00:01.111", "2023-06-01 10:00:01.111"]
|
|
313
|
+
naive_col = pd.Series(pd.to_datetime(dates), name="dates")
|
|
314
|
+
localised_col = naive_col.dt.tz_localize("Australia/Sydney")
|
|
315
|
+
utc_col = localised_col.dt.tz_convert("UTC")
|
|
316
|
+
if PANDAS_GE_20:
|
|
317
|
+
utc_col = utc_col.dt.as_unit("ms")
|
|
318
|
+
|
|
319
|
+
df = gp.GeoDataFrame(
|
|
320
|
+
{"dates": localised_col, "geometry": [Point(1, 1), Point(1, 1)]},
|
|
321
|
+
crs="EPSG:4326",
|
|
322
|
+
)
|
|
323
|
+
fpath = tmp_path / "test.gpkg"
|
|
324
|
+
write_dataframe(df, fpath, use_arrow=use_arrow)
|
|
325
|
+
result = read_dataframe(fpath, use_arrow=use_arrow)
|
|
326
|
+
# GDAL tz only encodes offsets, not timezones
|
|
327
|
+
# check multiple offsets are read as utc datetime instead of string values
|
|
328
|
+
assert_series_equal(result["dates"], utc_col)
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
@pytest.mark.filterwarnings(
|
|
332
|
+
"ignore: Non-conformant content for record 1 in column dates"
|
|
333
|
+
)
|
|
334
|
+
@pytest.mark.requires_arrow_write_api
|
|
335
|
+
def test_read_write_datetime_tz_with_nulls(tmp_path, use_arrow):
|
|
336
|
+
dates_raw = ["2020-01-01T09:00:00.123-05:00", "2020-01-01T10:00:00-05:00", pd.NaT]
|
|
337
|
+
if PANDAS_GE_20:
|
|
338
|
+
dates = pd.to_datetime(dates_raw, format="ISO8601").as_unit("ms")
|
|
339
|
+
else:
|
|
340
|
+
dates = pd.to_datetime(dates_raw)
|
|
341
|
+
df = gp.GeoDataFrame(
|
|
342
|
+
{"dates": dates, "geometry": [Point(1, 1), Point(1, 1), Point(1, 1)]},
|
|
343
|
+
crs="EPSG:4326",
|
|
344
|
+
)
|
|
345
|
+
fpath = tmp_path / "test.gpkg"
|
|
346
|
+
write_dataframe(df, fpath, use_arrow=use_arrow)
|
|
347
|
+
result = read_dataframe(fpath, use_arrow=use_arrow)
|
|
348
|
+
if use_arrow:
|
|
349
|
+
# with Arrow, the datetimes are always read as UTC
|
|
350
|
+
df["dates"] = df["dates"].dt.tz_convert("UTC")
|
|
351
|
+
assert_geodataframe_equal(df, result)
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
def test_read_null_values(tmp_path, use_arrow):
|
|
355
|
+
filename = tmp_path / "test_null_values_no_geometry.gpkg"
|
|
356
|
+
|
|
357
|
+
# create a GPKG with no geometries and only null values
|
|
358
|
+
expected = pd.DataFrame({"col": [None, None]})
|
|
359
|
+
write_dataframe(expected, filename)
|
|
360
|
+
|
|
361
|
+
df = read_dataframe(filename, use_arrow=use_arrow, read_geometry=False)
|
|
362
|
+
|
|
363
|
+
# make sure that Null values are preserved
|
|
364
|
+
assert np.array_equal(df.col.values, expected.col.values)
|
|
365
|
+
|
|
366
|
+
|
|
367
|
+
def test_read_fid_as_index(naturalearth_lowres_all_ext, use_arrow):
|
|
368
|
+
kwargs = {"use_arrow": use_arrow, "skip_features": 2, "max_features": 2}
|
|
369
|
+
|
|
370
|
+
# default is to not set FIDs as index
|
|
371
|
+
df = read_dataframe(naturalearth_lowres_all_ext, **kwargs)
|
|
372
|
+
assert_index_equal(df.index, pd.RangeIndex(0, 2))
|
|
373
|
+
|
|
374
|
+
df = read_dataframe(naturalearth_lowres_all_ext, fid_as_index=False, **kwargs)
|
|
375
|
+
assert_index_equal(df.index, pd.RangeIndex(0, 2))
|
|
376
|
+
|
|
377
|
+
df = read_dataframe(
|
|
378
|
+
naturalearth_lowres_all_ext,
|
|
379
|
+
fid_as_index=True,
|
|
380
|
+
**kwargs,
|
|
381
|
+
)
|
|
382
|
+
fids_expected = pd.Index([2, 3], name="fid")
|
|
383
|
+
fids_expected += START_FID[naturalearth_lowres_all_ext.suffix]
|
|
384
|
+
assert_index_equal(df.index, fids_expected)
|
|
385
|
+
|
|
386
|
+
|
|
387
|
+
def test_read_fid_as_index_only(naturalearth_lowres, use_arrow):
|
|
388
|
+
df = read_dataframe(
|
|
389
|
+
naturalearth_lowres,
|
|
390
|
+
columns=[],
|
|
391
|
+
read_geometry=False,
|
|
392
|
+
fid_as_index=True,
|
|
393
|
+
use_arrow=use_arrow,
|
|
394
|
+
)
|
|
395
|
+
assert df is not None
|
|
396
|
+
assert len(df) == 177
|
|
397
|
+
assert len(df.columns) == 0
|
|
398
|
+
|
|
399
|
+
|
|
400
|
+
def test_read_where(naturalearth_lowres_all_ext, use_arrow):
|
|
401
|
+
# empty filter should return full set of records
|
|
402
|
+
df = read_dataframe(naturalearth_lowres_all_ext, use_arrow=use_arrow, where="")
|
|
403
|
+
assert len(df) == 177
|
|
404
|
+
|
|
405
|
+
# should return singular item
|
|
406
|
+
df = read_dataframe(
|
|
407
|
+
naturalearth_lowres_all_ext, use_arrow=use_arrow, where="iso_a3 = 'CAN'"
|
|
408
|
+
)
|
|
409
|
+
assert len(df) == 1
|
|
410
|
+
assert df.iloc[0].iso_a3 == "CAN"
|
|
411
|
+
|
|
412
|
+
df = read_dataframe(
|
|
413
|
+
naturalearth_lowres_all_ext,
|
|
414
|
+
use_arrow=use_arrow,
|
|
415
|
+
where="iso_a3 IN ('CAN', 'USA', 'MEX')",
|
|
416
|
+
)
|
|
417
|
+
assert len(df) == 3
|
|
418
|
+
assert len(set(df.iso_a3.unique()).difference(["CAN", "USA", "MEX"])) == 0
|
|
419
|
+
|
|
420
|
+
# should return items within range
|
|
421
|
+
df = read_dataframe(
|
|
422
|
+
naturalearth_lowres_all_ext,
|
|
423
|
+
use_arrow=use_arrow,
|
|
424
|
+
where="POP_EST >= 10000000 AND POP_EST < 100000000",
|
|
425
|
+
)
|
|
426
|
+
assert len(df) == 75
|
|
427
|
+
assert df.pop_est.min() >= 10000000
|
|
428
|
+
assert df.pop_est.max() < 100000000
|
|
429
|
+
|
|
430
|
+
# should match no items
|
|
431
|
+
df = read_dataframe(
|
|
432
|
+
naturalearth_lowres_all_ext, use_arrow=use_arrow, where="ISO_A3 = 'INVALID'"
|
|
433
|
+
)
|
|
434
|
+
assert len(df) == 0
|
|
435
|
+
|
|
436
|
+
|
|
437
|
+
def test_read_where_invalid(request, naturalearth_lowres_all_ext, use_arrow):
|
|
438
|
+
if use_arrow and naturalearth_lowres_all_ext.suffix == ".gpkg":
|
|
439
|
+
# https://github.com/OSGeo/gdal/issues/8492
|
|
440
|
+
request.node.add_marker(pytest.mark.xfail(reason="GDAL doesn't error for GPGK"))
|
|
441
|
+
with pytest.raises(ValueError, match="Invalid SQL"):
|
|
442
|
+
read_dataframe(
|
|
443
|
+
naturalearth_lowres_all_ext, use_arrow=use_arrow, where="invalid"
|
|
444
|
+
)
|
|
445
|
+
|
|
446
|
+
|
|
447
|
+
def test_read_where_ignored_field(naturalearth_lowres, use_arrow):
|
|
448
|
+
# column included in where is not also included in list of columns, which means
|
|
449
|
+
# GDAL will return no features
|
|
450
|
+
# NOTE: this behavior is inconsistent across drivers so only shapefiles are
|
|
451
|
+
# tested for this
|
|
452
|
+
df = read_dataframe(
|
|
453
|
+
naturalearth_lowres,
|
|
454
|
+
where=""" "iso_a3" = 'CAN' """,
|
|
455
|
+
columns=["name"],
|
|
456
|
+
use_arrow=use_arrow,
|
|
457
|
+
)
|
|
458
|
+
|
|
459
|
+
assert len(df) == 0
|
|
460
|
+
|
|
461
|
+
|
|
462
|
+
@pytest.mark.parametrize("bbox", [(1,), (1, 2), (1, 2, 3)])
|
|
463
|
+
def test_read_bbox_invalid(naturalearth_lowres_all_ext, bbox, use_arrow):
|
|
464
|
+
with pytest.raises(ValueError, match="Invalid bbox"):
|
|
465
|
+
read_dataframe(naturalearth_lowres_all_ext, use_arrow=use_arrow, bbox=bbox)
|
|
466
|
+
|
|
467
|
+
|
|
468
|
+
@pytest.mark.parametrize(
|
|
469
|
+
"bbox,expected",
|
|
470
|
+
[
|
|
471
|
+
((0, 0, 0.00001, 0.00001), []),
|
|
472
|
+
((-85, 8, -80, 10), ["PAN", "CRI"]),
|
|
473
|
+
((-104, 54, -105, 55), ["CAN"]),
|
|
474
|
+
],
|
|
475
|
+
)
|
|
476
|
+
def test_read_bbox(naturalearth_lowres_all_ext, use_arrow, bbox, expected):
|
|
477
|
+
if (
|
|
478
|
+
use_arrow
|
|
479
|
+
and __gdal_version__ < (3, 8, 0)
|
|
480
|
+
and naturalearth_lowres_all_ext.suffix == ".gpkg"
|
|
481
|
+
):
|
|
482
|
+
pytest.xfail(reason="GDAL bug: https://github.com/OSGeo/gdal/issues/8347")
|
|
483
|
+
|
|
484
|
+
df = read_dataframe(naturalearth_lowres_all_ext, use_arrow=use_arrow, bbox=bbox)
|
|
485
|
+
|
|
486
|
+
assert np.array_equal(df.iso_a3, expected)
|
|
487
|
+
|
|
488
|
+
|
|
489
|
+
def test_read_bbox_sql(naturalearth_lowres_all_ext, use_arrow):
|
|
490
|
+
df = read_dataframe(
|
|
491
|
+
naturalearth_lowres_all_ext,
|
|
492
|
+
use_arrow=use_arrow,
|
|
493
|
+
bbox=(-180, 50, -100, 90),
|
|
494
|
+
sql="SELECT * from naturalearth_lowres where iso_a3 not in ('USA', 'RUS')",
|
|
495
|
+
)
|
|
496
|
+
assert len(df) == 1
|
|
497
|
+
assert np.array_equal(df.iso_a3, ["CAN"])
|
|
498
|
+
|
|
499
|
+
|
|
500
|
+
def test_read_bbox_where(naturalearth_lowres_all_ext, use_arrow):
|
|
501
|
+
df = read_dataframe(
|
|
502
|
+
naturalearth_lowres_all_ext,
|
|
503
|
+
use_arrow=use_arrow,
|
|
504
|
+
bbox=(-180, 50, -100, 90),
|
|
505
|
+
where="iso_a3 not in ('USA', 'RUS')",
|
|
506
|
+
)
|
|
507
|
+
assert len(df) == 1
|
|
508
|
+
assert np.array_equal(df.iso_a3, ["CAN"])
|
|
509
|
+
|
|
510
|
+
|
|
511
|
+
@pytest.mark.parametrize(
|
|
512
|
+
"mask",
|
|
513
|
+
[
|
|
514
|
+
{"type": "Point", "coordinates": [0, 0]},
|
|
515
|
+
'{"type": "Point", "coordinates": [0, 0]}',
|
|
516
|
+
"invalid",
|
|
517
|
+
],
|
|
518
|
+
)
|
|
519
|
+
def test_read_mask_invalid(naturalearth_lowres, use_arrow, mask):
|
|
520
|
+
with pytest.raises(ValueError, match="'mask' parameter must be a Shapely geometry"):
|
|
521
|
+
read_dataframe(naturalearth_lowres, use_arrow=use_arrow, mask=mask)
|
|
522
|
+
|
|
523
|
+
|
|
524
|
+
def test_read_bbox_mask_invalid(naturalearth_lowres, use_arrow):
|
|
525
|
+
with pytest.raises(ValueError, match="cannot set both 'bbox' and 'mask'"):
|
|
526
|
+
read_dataframe(
|
|
527
|
+
naturalearth_lowres,
|
|
528
|
+
use_arrow=use_arrow,
|
|
529
|
+
bbox=(-85, 8, -80, 10),
|
|
530
|
+
mask=shapely.Point(-105, 55),
|
|
531
|
+
)
|
|
532
|
+
|
|
533
|
+
|
|
534
|
+
@pytest.mark.parametrize(
|
|
535
|
+
"mask,expected",
|
|
536
|
+
[
|
|
537
|
+
(shapely.Point(-105, 55), ["CAN"]),
|
|
538
|
+
(shapely.box(-85, 8, -80, 10), ["PAN", "CRI"]),
|
|
539
|
+
(
|
|
540
|
+
shapely.Polygon(
|
|
541
|
+
(
|
|
542
|
+
[6.101929483362767, 50.97085041206964],
|
|
543
|
+
[5.773001596839322, 50.90661120482673],
|
|
544
|
+
[5.593156133704326, 50.642648747710325],
|
|
545
|
+
[6.059271089606312, 50.686051894002475],
|
|
546
|
+
[6.374064065737485, 50.851481340346965],
|
|
547
|
+
[6.101929483362767, 50.97085041206964],
|
|
548
|
+
)
|
|
549
|
+
),
|
|
550
|
+
["DEU", "BEL", "NLD"],
|
|
551
|
+
),
|
|
552
|
+
(
|
|
553
|
+
shapely.GeometryCollection(
|
|
554
|
+
[shapely.Point(-7.7, 53), shapely.box(-85, 8, -80, 10)]
|
|
555
|
+
),
|
|
556
|
+
["PAN", "CRI", "IRL"],
|
|
557
|
+
),
|
|
558
|
+
],
|
|
559
|
+
)
|
|
560
|
+
def test_read_mask(
|
|
561
|
+
naturalearth_lowres_all_ext,
|
|
562
|
+
use_arrow,
|
|
563
|
+
mask,
|
|
564
|
+
expected,
|
|
565
|
+
):
|
|
566
|
+
if (
|
|
567
|
+
use_arrow
|
|
568
|
+
and __gdal_version__ < (3, 8, 0)
|
|
569
|
+
and naturalearth_lowres_all_ext.suffix == ".gpkg"
|
|
570
|
+
):
|
|
571
|
+
pytest.xfail(reason="GDAL bug: https://github.com/OSGeo/gdal/issues/8347")
|
|
572
|
+
|
|
573
|
+
df = read_dataframe(naturalearth_lowres_all_ext, use_arrow=use_arrow, mask=mask)
|
|
574
|
+
|
|
575
|
+
assert len(df) == len(expected)
|
|
576
|
+
assert np.array_equal(df.iso_a3, expected)
|
|
577
|
+
|
|
578
|
+
|
|
579
|
+
def test_read_mask_sql(naturalearth_lowres_all_ext, use_arrow):
|
|
580
|
+
df = read_dataframe(
|
|
581
|
+
naturalearth_lowres_all_ext,
|
|
582
|
+
use_arrow=use_arrow,
|
|
583
|
+
mask=shapely.box(-180, 50, -100, 90),
|
|
584
|
+
sql="SELECT * from naturalearth_lowres where iso_a3 not in ('USA', 'RUS')",
|
|
585
|
+
)
|
|
586
|
+
assert len(df) == 1
|
|
587
|
+
assert np.array_equal(df.iso_a3, ["CAN"])
|
|
588
|
+
|
|
589
|
+
|
|
590
|
+
def test_read_mask_where(naturalearth_lowres_all_ext, use_arrow):
|
|
591
|
+
df = read_dataframe(
|
|
592
|
+
naturalearth_lowres_all_ext,
|
|
593
|
+
use_arrow=use_arrow,
|
|
594
|
+
mask=shapely.box(-180, 50, -100, 90),
|
|
595
|
+
where="iso_a3 not in ('USA', 'RUS')",
|
|
596
|
+
)
|
|
597
|
+
assert len(df) == 1
|
|
598
|
+
assert np.array_equal(df.iso_a3, ["CAN"])
|
|
599
|
+
|
|
600
|
+
|
|
601
|
+
@pytest.mark.parametrize("fids", [[1, 5, 10], np.array([1, 5, 10], dtype=np.int64)])
|
|
602
|
+
def test_read_fids(naturalearth_lowres_all_ext, fids, use_arrow):
|
|
603
|
+
# ensure keyword is properly passed through
|
|
604
|
+
df = read_dataframe(
|
|
605
|
+
naturalearth_lowres_all_ext, fids=fids, fid_as_index=True, use_arrow=use_arrow
|
|
606
|
+
)
|
|
607
|
+
assert len(df) == 3
|
|
608
|
+
assert np.array_equal(fids, df.index.values)
|
|
609
|
+
|
|
610
|
+
|
|
611
|
+
@requires_pyarrow_api
|
|
612
|
+
def test_read_fids_arrow_max_exception(naturalearth_lowres):
|
|
613
|
+
# Maximum number at time of writing is 4997 for "OGRSQL". For e.g. for SQLite based
|
|
614
|
+
# formats like Geopackage, there is no limit.
|
|
615
|
+
nb_fids = 4998
|
|
616
|
+
fids = range(nb_fids)
|
|
617
|
+
with pytest.raises(ValueError, match=f"error applying filter for {nb_fids} fids"):
|
|
618
|
+
_ = read_dataframe(naturalearth_lowres, fids=fids, use_arrow=True)
|
|
619
|
+
|
|
620
|
+
|
|
621
|
+
@requires_pyarrow_api
|
|
622
|
+
@pytest.mark.skipif(
|
|
623
|
+
__gdal_version__ >= (3, 8, 0), reason="GDAL >= 3.8.0 does not need to warn"
|
|
624
|
+
)
|
|
625
|
+
def test_read_fids_arrow_warning_old_gdal(naturalearth_lowres_all_ext):
|
|
626
|
+
# A warning should be given for old GDAL versions, except for some file formats.
|
|
627
|
+
if naturalearth_lowres_all_ext.suffix not in [".gpkg", ".geojson"]:
|
|
628
|
+
handler = pytest.warns(
|
|
629
|
+
UserWarning,
|
|
630
|
+
match="Using 'fids' and 'use_arrow=True' with GDAL < 3.8 can be slow",
|
|
631
|
+
)
|
|
632
|
+
else:
|
|
633
|
+
handler = contextlib.nullcontext()
|
|
634
|
+
|
|
635
|
+
with handler:
|
|
636
|
+
df = read_dataframe(naturalearth_lowres_all_ext, fids=[22], use_arrow=True)
|
|
637
|
+
assert len(df) == 1
|
|
638
|
+
|
|
639
|
+
|
|
640
|
+
def test_read_fids_force_2d(tmp_path):
|
|
641
|
+
filename = tmp_path / "test.gpkg"
|
|
642
|
+
|
|
643
|
+
# create a GPKG with 3D point values
|
|
644
|
+
expected = gp.GeoDataFrame(
|
|
645
|
+
geometry=[Point(0, 0, 0), Point(1, 1, 0)], crs="EPSG:4326"
|
|
646
|
+
)
|
|
647
|
+
write_dataframe(expected, filename)
|
|
648
|
+
|
|
649
|
+
df = read_dataframe(filename, fids=[1])
|
|
650
|
+
assert_geodataframe_equal(df, expected.iloc[:1])
|
|
651
|
+
|
|
652
|
+
df = read_dataframe(filename, force_2d=True, fids=[1])
|
|
653
|
+
assert np.array_equal(
|
|
654
|
+
df.geometry.values, shapely.force_2d(expected.iloc[:1].geometry.values)
|
|
655
|
+
)
|
|
656
|
+
|
|
657
|
+
|
|
658
|
+
@pytest.mark.parametrize("skip_features", [10, 200])
|
|
659
|
+
def test_read_skip_features(naturalearth_lowres_all_ext, use_arrow, skip_features):
|
|
660
|
+
ext = naturalearth_lowres_all_ext.suffix
|
|
661
|
+
expected = (
|
|
662
|
+
read_dataframe(naturalearth_lowres_all_ext)
|
|
663
|
+
.iloc[skip_features:]
|
|
664
|
+
.reset_index(drop=True)
|
|
665
|
+
)
|
|
666
|
+
|
|
667
|
+
df = read_dataframe(
|
|
668
|
+
naturalearth_lowres_all_ext, skip_features=skip_features, use_arrow=use_arrow
|
|
669
|
+
)
|
|
670
|
+
assert len(df) == len(expected)
|
|
671
|
+
|
|
672
|
+
# Coordinates are not precisely equal when written to JSON
|
|
673
|
+
# dtypes do not necessarily round-trip precisely through JSON
|
|
674
|
+
is_json = ext in [".geojson", ".geojsonl"]
|
|
675
|
+
# In .geojsonl the vertices are reordered, so normalize
|
|
676
|
+
is_jsons = ext == ".geojsonl"
|
|
677
|
+
|
|
678
|
+
assert_geodataframe_equal(
|
|
679
|
+
df,
|
|
680
|
+
expected,
|
|
681
|
+
check_less_precise=is_json,
|
|
682
|
+
check_index_type=False,
|
|
683
|
+
check_dtype=not is_json,
|
|
684
|
+
normalize=is_jsons,
|
|
685
|
+
)
|
|
686
|
+
|
|
687
|
+
|
|
688
|
+
def test_read_negative_skip_features(naturalearth_lowres, use_arrow):
|
|
689
|
+
with pytest.raises(ValueError, match="'skip_features' must be >= 0"):
|
|
690
|
+
read_dataframe(naturalearth_lowres, skip_features=-1, use_arrow=use_arrow)
|
|
691
|
+
|
|
692
|
+
|
|
693
|
+
@pytest.mark.parametrize("max_features", [10, 100])
|
|
694
|
+
def test_read_max_features(naturalearth_lowres_all_ext, use_arrow, max_features):
|
|
695
|
+
ext = naturalearth_lowres_all_ext.suffix
|
|
696
|
+
expected = read_dataframe(naturalearth_lowres_all_ext).iloc[:max_features]
|
|
697
|
+
df = read_dataframe(
|
|
698
|
+
naturalearth_lowres_all_ext, max_features=max_features, use_arrow=use_arrow
|
|
699
|
+
)
|
|
700
|
+
|
|
701
|
+
assert len(df) == len(expected)
|
|
702
|
+
|
|
703
|
+
# Coordinates are not precisely equal when written to JSON
|
|
704
|
+
# dtypes do not necessarily round-trip precisely through JSON
|
|
705
|
+
is_json = ext in [".geojson", ".geojsonl"]
|
|
706
|
+
# In .geojsonl the vertices are reordered, so normalize
|
|
707
|
+
is_jsons = ext == ".geojsonl"
|
|
708
|
+
|
|
709
|
+
assert_geodataframe_equal(
|
|
710
|
+
df,
|
|
711
|
+
expected,
|
|
712
|
+
check_less_precise=is_json,
|
|
713
|
+
check_index_type=False,
|
|
714
|
+
check_dtype=not is_json,
|
|
715
|
+
normalize=is_jsons,
|
|
716
|
+
)
|
|
717
|
+
|
|
718
|
+
|
|
719
|
+
def test_read_negative_max_features(naturalearth_lowres, use_arrow):
|
|
720
|
+
with pytest.raises(ValueError, match="'max_features' must be >= 0"):
|
|
721
|
+
read_dataframe(naturalearth_lowres, max_features=-1, use_arrow=use_arrow)
|
|
722
|
+
|
|
723
|
+
|
|
724
|
+
def test_read_non_existent_file(use_arrow):
|
|
725
|
+
# ensure consistent error type / message from GDAL
|
|
726
|
+
with pytest.raises(DataSourceError, match="No such file or directory"):
|
|
727
|
+
read_dataframe("non-existent.shp", use_arrow=use_arrow)
|
|
728
|
+
|
|
729
|
+
with pytest.raises(DataSourceError, match="does not exist in the file system"):
|
|
730
|
+
read_dataframe("/vsizip/non-existent.zip", use_arrow=use_arrow)
|
|
731
|
+
|
|
732
|
+
with pytest.raises(DataSourceError, match="does not exist in the file system"):
|
|
733
|
+
read_dataframe("zip:///non-existent.zip", use_arrow=use_arrow)
|
|
734
|
+
|
|
735
|
+
|
|
736
|
+
def test_read_sql(naturalearth_lowres_all_ext, use_arrow):
|
|
737
|
+
# The geometry column cannot be specified when using the
|
|
738
|
+
# default OGRSQL dialect but is returned nonetheless, so 4 columns.
|
|
739
|
+
sql = "SELECT iso_a3 AS iso_a3_renamed, name, pop_est FROM naturalearth_lowres"
|
|
740
|
+
df = read_dataframe(
|
|
741
|
+
naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL", use_arrow=use_arrow
|
|
742
|
+
)
|
|
743
|
+
assert len(df.columns) == 4
|
|
744
|
+
assert len(df) == 177
|
|
745
|
+
|
|
746
|
+
# Should return single row
|
|
747
|
+
sql = "SELECT * FROM naturalearth_lowres WHERE iso_a3 = 'CAN'"
|
|
748
|
+
df = read_dataframe(
|
|
749
|
+
naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL", use_arrow=use_arrow
|
|
750
|
+
)
|
|
751
|
+
assert len(df) == 1
|
|
752
|
+
assert len(df.columns) == 6
|
|
753
|
+
assert df.iloc[0].iso_a3 == "CAN"
|
|
754
|
+
|
|
755
|
+
sql = """SELECT *
|
|
756
|
+
FROM naturalearth_lowres
|
|
757
|
+
WHERE iso_a3 IN ('CAN', 'USA', 'MEX')"""
|
|
758
|
+
df = read_dataframe(
|
|
759
|
+
naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL", use_arrow=use_arrow
|
|
760
|
+
)
|
|
761
|
+
assert len(df.columns) == 6
|
|
762
|
+
assert len(df) == 3
|
|
763
|
+
assert df.iso_a3.tolist() == ["CAN", "USA", "MEX"]
|
|
764
|
+
|
|
765
|
+
sql = """SELECT *
|
|
766
|
+
FROM naturalearth_lowres
|
|
767
|
+
WHERE iso_a3 IN ('CAN', 'USA', 'MEX')
|
|
768
|
+
ORDER BY name"""
|
|
769
|
+
df = read_dataframe(
|
|
770
|
+
naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL", use_arrow=use_arrow
|
|
771
|
+
)
|
|
772
|
+
assert len(df.columns) == 6
|
|
773
|
+
assert len(df) == 3
|
|
774
|
+
assert df.iso_a3.tolist() == ["CAN", "MEX", "USA"]
|
|
775
|
+
|
|
776
|
+
# Should return items within range.
|
|
777
|
+
sql = """SELECT *
|
|
778
|
+
FROM naturalearth_lowres
|
|
779
|
+
WHERE POP_EST >= 10000000 AND POP_EST < 100000000"""
|
|
780
|
+
df = read_dataframe(
|
|
781
|
+
naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL", use_arrow=use_arrow
|
|
782
|
+
)
|
|
783
|
+
assert len(df) == 75
|
|
784
|
+
assert len(df.columns) == 6
|
|
785
|
+
assert df.pop_est.min() >= 10000000
|
|
786
|
+
assert df.pop_est.max() < 100000000
|
|
787
|
+
|
|
788
|
+
# Should match no items.
|
|
789
|
+
sql = "SELECT * FROM naturalearth_lowres WHERE ISO_A3 = 'INVALID'"
|
|
790
|
+
df = read_dataframe(
|
|
791
|
+
naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL", use_arrow=use_arrow
|
|
792
|
+
)
|
|
793
|
+
assert len(df) == 0
|
|
794
|
+
|
|
795
|
+
|
|
796
|
+
def test_read_sql_invalid(naturalearth_lowres_all_ext, use_arrow):
|
|
797
|
+
if naturalearth_lowres_all_ext.suffix == ".gpkg":
|
|
798
|
+
with pytest.raises(Exception, match="In ExecuteSQL().*"):
|
|
799
|
+
read_dataframe(
|
|
800
|
+
naturalearth_lowres_all_ext, sql="invalid", use_arrow=use_arrow
|
|
801
|
+
)
|
|
802
|
+
else:
|
|
803
|
+
with pytest.raises(Exception, match="SQL Expression Parsing Error"):
|
|
804
|
+
read_dataframe(
|
|
805
|
+
naturalearth_lowres_all_ext, sql="invalid", use_arrow=use_arrow
|
|
806
|
+
)
|
|
807
|
+
|
|
808
|
+
with pytest.raises(
|
|
809
|
+
ValueError, match="'sql' parameter cannot be combined with 'layer'"
|
|
810
|
+
):
|
|
811
|
+
read_dataframe(
|
|
812
|
+
naturalearth_lowres_all_ext,
|
|
813
|
+
sql="whatever",
|
|
814
|
+
layer="invalid",
|
|
815
|
+
use_arrow=use_arrow,
|
|
816
|
+
)
|
|
817
|
+
|
|
818
|
+
|
|
819
|
+
def test_read_sql_columns_where(naturalearth_lowres_all_ext, use_arrow):
|
|
820
|
+
sql = "SELECT iso_a3 AS iso_a3_renamed, name, pop_est FROM naturalearth_lowres"
|
|
821
|
+
df = read_dataframe(
|
|
822
|
+
naturalearth_lowres_all_ext,
|
|
823
|
+
sql=sql,
|
|
824
|
+
sql_dialect="OGRSQL",
|
|
825
|
+
columns=["iso_a3_renamed", "name"],
|
|
826
|
+
where="iso_a3_renamed IN ('CAN', 'USA', 'MEX')",
|
|
827
|
+
use_arrow=use_arrow,
|
|
828
|
+
)
|
|
829
|
+
assert len(df.columns) == 3
|
|
830
|
+
assert len(df) == 3
|
|
831
|
+
assert df.iso_a3_renamed.tolist() == ["CAN", "USA", "MEX"]
|
|
832
|
+
|
|
833
|
+
|
|
834
|
+
def test_read_sql_columns_where_bbox(naturalearth_lowres_all_ext, use_arrow):
|
|
835
|
+
sql = "SELECT iso_a3 AS iso_a3_renamed, name, pop_est FROM naturalearth_lowres"
|
|
836
|
+
df = read_dataframe(
|
|
837
|
+
naturalearth_lowres_all_ext,
|
|
838
|
+
sql=sql,
|
|
839
|
+
sql_dialect="OGRSQL",
|
|
840
|
+
columns=["iso_a3_renamed", "name"],
|
|
841
|
+
where="iso_a3_renamed IN ('CRI', 'PAN')",
|
|
842
|
+
bbox=(-85, 8, -80, 10),
|
|
843
|
+
use_arrow=use_arrow,
|
|
844
|
+
)
|
|
845
|
+
assert len(df.columns) == 3
|
|
846
|
+
assert len(df) == 2
|
|
847
|
+
assert df.iso_a3_renamed.tolist() == ["PAN", "CRI"]
|
|
848
|
+
|
|
849
|
+
|
|
850
|
+
def test_read_sql_skip_max(naturalearth_lowres_all_ext, use_arrow):
|
|
851
|
+
sql = """SELECT *
|
|
852
|
+
FROM naturalearth_lowres
|
|
853
|
+
WHERE iso_a3 IN ('CAN', 'MEX', 'USA')
|
|
854
|
+
ORDER BY name"""
|
|
855
|
+
df = read_dataframe(
|
|
856
|
+
naturalearth_lowres_all_ext,
|
|
857
|
+
sql=sql,
|
|
858
|
+
skip_features=1,
|
|
859
|
+
max_features=1,
|
|
860
|
+
sql_dialect="OGRSQL",
|
|
861
|
+
use_arrow=use_arrow,
|
|
862
|
+
)
|
|
863
|
+
assert len(df.columns) == 6
|
|
864
|
+
assert len(df) == 1
|
|
865
|
+
assert df.iso_a3.tolist() == ["MEX"]
|
|
866
|
+
|
|
867
|
+
sql = "SELECT * FROM naturalearth_lowres LIMIT 1"
|
|
868
|
+
df = read_dataframe(
|
|
869
|
+
naturalearth_lowres_all_ext,
|
|
870
|
+
sql=sql,
|
|
871
|
+
max_features=3,
|
|
872
|
+
sql_dialect="OGRSQL",
|
|
873
|
+
use_arrow=use_arrow,
|
|
874
|
+
)
|
|
875
|
+
assert len(df) == 1
|
|
876
|
+
|
|
877
|
+
sql = "SELECT * FROM naturalearth_lowres LIMIT 1"
|
|
878
|
+
df = read_dataframe(
|
|
879
|
+
naturalearth_lowres_all_ext,
|
|
880
|
+
sql=sql,
|
|
881
|
+
sql_dialect="OGRSQL",
|
|
882
|
+
skip_features=1,
|
|
883
|
+
use_arrow=use_arrow,
|
|
884
|
+
)
|
|
885
|
+
assert len(df) == 0
|
|
886
|
+
|
|
887
|
+
|
|
888
|
+
@requires_gdal_geos
|
|
889
|
+
@pytest.mark.parametrize(
|
|
890
|
+
"naturalearth_lowres",
|
|
891
|
+
[ext for ext in ALL_EXTS if ext != ".gpkg"],
|
|
892
|
+
indirect=["naturalearth_lowres"],
|
|
893
|
+
)
|
|
894
|
+
def test_read_sql_dialect_sqlite_nogpkg(naturalearth_lowres, use_arrow):
|
|
895
|
+
# Should return singular item
|
|
896
|
+
sql = "SELECT * FROM naturalearth_lowres WHERE iso_a3 = 'CAN'"
|
|
897
|
+
df = read_dataframe(
|
|
898
|
+
naturalearth_lowres, sql=sql, sql_dialect="SQLITE", use_arrow=use_arrow
|
|
899
|
+
)
|
|
900
|
+
assert len(df) == 1
|
|
901
|
+
assert len(df.columns) == 6
|
|
902
|
+
assert df.iloc[0].iso_a3 == "CAN"
|
|
903
|
+
area_canada = df.iloc[0].geometry.area
|
|
904
|
+
|
|
905
|
+
# Use spatialite function
|
|
906
|
+
sql = """SELECT ST_Buffer(geometry, 5) AS geometry, name, pop_est, iso_a3
|
|
907
|
+
FROM naturalearth_lowres
|
|
908
|
+
WHERE ISO_A3 = 'CAN'"""
|
|
909
|
+
df = read_dataframe(
|
|
910
|
+
naturalearth_lowres, sql=sql, sql_dialect="SQLITE", use_arrow=use_arrow
|
|
911
|
+
)
|
|
912
|
+
assert len(df) == 1
|
|
913
|
+
assert len(df.columns) == 4
|
|
914
|
+
assert df.iloc[0].geometry.area > area_canada
|
|
915
|
+
|
|
916
|
+
|
|
917
|
+
@requires_gdal_geos
|
|
918
|
+
@pytest.mark.parametrize(
|
|
919
|
+
"naturalearth_lowres", [".gpkg"], indirect=["naturalearth_lowres"]
|
|
920
|
+
)
|
|
921
|
+
def test_read_sql_dialect_sqlite_gpkg(naturalearth_lowres, use_arrow):
|
|
922
|
+
# "INDIRECT_SQL" prohibits GDAL from passing the SQL statement to sqlite.
|
|
923
|
+
# Because the statement is processed within GDAL it is possible to use
|
|
924
|
+
# spatialite functions even if sqlite isn't built with spatialite support.
|
|
925
|
+
sql = "SELECT * FROM naturalearth_lowres WHERE iso_a3 = 'CAN'"
|
|
926
|
+
df = read_dataframe(
|
|
927
|
+
naturalearth_lowres, sql=sql, sql_dialect="INDIRECT_SQLITE", use_arrow=use_arrow
|
|
928
|
+
)
|
|
929
|
+
assert len(df) == 1
|
|
930
|
+
assert len(df.columns) == 6
|
|
931
|
+
assert df.iloc[0].iso_a3 == "CAN"
|
|
932
|
+
area_canada = df.iloc[0].geometry.area
|
|
933
|
+
|
|
934
|
+
# Use spatialite function
|
|
935
|
+
sql = """SELECT ST_Buffer(geom, 5) AS geometry, name, pop_est, iso_a3
|
|
936
|
+
FROM naturalearth_lowres
|
|
937
|
+
WHERE ISO_A3 = 'CAN'"""
|
|
938
|
+
df = read_dataframe(
|
|
939
|
+
naturalearth_lowres, sql=sql, sql_dialect="INDIRECT_SQLITE", use_arrow=use_arrow
|
|
940
|
+
)
|
|
941
|
+
assert len(df) == 1
|
|
942
|
+
assert len(df.columns) == 4
|
|
943
|
+
assert df.iloc[0].geometry.area > area_canada
|
|
944
|
+
|
|
945
|
+
|
|
946
|
+
@pytest.mark.parametrize("encoding", ["utf-8", "cp1252", None])
|
|
947
|
+
def test_write_csv_encoding(tmp_path, encoding):
|
|
948
|
+
"""Test if write_dataframe uses the default encoding correctly."""
|
|
949
|
+
# Write csv test file. Depending on the os this will be written in a different
|
|
950
|
+
# encoding: for linux and macos this is utf-8, for windows it is cp1252.
|
|
951
|
+
csv_path = tmp_path / "test.csv"
|
|
952
|
+
|
|
953
|
+
with open(csv_path, "w", encoding=encoding) as csv:
|
|
954
|
+
csv.write("näme,city\n")
|
|
955
|
+
csv.write("Wilhelm Röntgen,Zürich\n")
|
|
956
|
+
|
|
957
|
+
# Write csv test file with the same data using write_dataframe. It should use the
|
|
958
|
+
# same encoding as above.
|
|
959
|
+
df = pd.DataFrame({"näme": ["Wilhelm Röntgen"], "city": ["Zürich"]})
|
|
960
|
+
csv_pyogrio_path = tmp_path / "test_pyogrio.csv"
|
|
961
|
+
write_dataframe(df, csv_pyogrio_path, encoding=encoding)
|
|
962
|
+
|
|
963
|
+
# Check if the text files written both ways can be read again and give same result.
|
|
964
|
+
with open(csv_path, encoding=encoding) as csv:
|
|
965
|
+
csv_str = csv.read()
|
|
966
|
+
with open(csv_pyogrio_path, encoding=encoding) as csv_pyogrio:
|
|
967
|
+
csv_pyogrio_str = csv_pyogrio.read()
|
|
968
|
+
assert csv_str == csv_pyogrio_str
|
|
969
|
+
|
|
970
|
+
# Check if they files are binary identical, to be 100% sure they were written with
|
|
971
|
+
# the same encoding.
|
|
972
|
+
with open(csv_path, "rb") as csv:
|
|
973
|
+
csv_bytes = csv.read()
|
|
974
|
+
with open(csv_pyogrio_path, "rb") as csv_pyogrio:
|
|
975
|
+
csv_pyogrio_bytes = csv_pyogrio.read()
|
|
976
|
+
assert csv_bytes == csv_pyogrio_bytes
|
|
977
|
+
|
|
978
|
+
|
|
979
|
+
@pytest.mark.parametrize("ext", ALL_EXTS)
|
|
980
|
+
@pytest.mark.requires_arrow_write_api
|
|
981
|
+
def test_write_dataframe(tmp_path, naturalearth_lowres, ext, use_arrow):
|
|
982
|
+
input_gdf = read_dataframe(naturalearth_lowres)
|
|
983
|
+
output_path = tmp_path / f"test{ext}"
|
|
984
|
+
|
|
985
|
+
if ext == ".fgb":
|
|
986
|
+
# For .fgb, spatial_index=False to avoid the rows being reordered
|
|
987
|
+
write_dataframe(
|
|
988
|
+
input_gdf, output_path, use_arrow=use_arrow, spatial_index=False
|
|
989
|
+
)
|
|
990
|
+
else:
|
|
991
|
+
write_dataframe(input_gdf, output_path, use_arrow=use_arrow)
|
|
992
|
+
|
|
993
|
+
assert output_path.exists()
|
|
994
|
+
result_gdf = read_dataframe(output_path)
|
|
995
|
+
|
|
996
|
+
geometry_types = result_gdf.geometry.type.unique()
|
|
997
|
+
if DRIVERS[ext] in DRIVERS_NO_MIXED_SINGLE_MULTI:
|
|
998
|
+
assert list(geometry_types) == ["MultiPolygon"]
|
|
999
|
+
else:
|
|
1000
|
+
assert set(geometry_types) == {"MultiPolygon", "Polygon"}
|
|
1001
|
+
|
|
1002
|
+
# Coordinates are not precisely equal when written to JSON
|
|
1003
|
+
# dtypes do not necessarily round-trip precisely through JSON
|
|
1004
|
+
is_json = ext in [".geojson", ".geojsonl"]
|
|
1005
|
+
# In .geojsonl the vertices are reordered, so normalize
|
|
1006
|
+
is_jsons = ext == ".geojsonl"
|
|
1007
|
+
|
|
1008
|
+
assert_geodataframe_equal(
|
|
1009
|
+
result_gdf,
|
|
1010
|
+
input_gdf,
|
|
1011
|
+
check_less_precise=is_json,
|
|
1012
|
+
check_index_type=False,
|
|
1013
|
+
check_dtype=not is_json,
|
|
1014
|
+
normalize=is_jsons,
|
|
1015
|
+
)
|
|
1016
|
+
|
|
1017
|
+
|
|
1018
|
+
@pytest.mark.filterwarnings("ignore:.*No SRS set on layer.*")
|
|
1019
|
+
@pytest.mark.parametrize("write_geodf", [True, False])
|
|
1020
|
+
@pytest.mark.parametrize("ext", [ext for ext in ALL_EXTS + [".xlsx"] if ext != ".fgb"])
|
|
1021
|
+
@pytest.mark.requires_arrow_write_api
|
|
1022
|
+
def test_write_dataframe_no_geom(
|
|
1023
|
+
request, tmp_path, naturalearth_lowres, write_geodf, ext, use_arrow
|
|
1024
|
+
):
|
|
1025
|
+
"""Test writing a (geo)dataframe without a geometry column.
|
|
1026
|
+
|
|
1027
|
+
FlatGeobuf (.fgb) doesn't seem to support this, and just writes an empty file.
|
|
1028
|
+
"""
|
|
1029
|
+
# Prepare test data
|
|
1030
|
+
input_df = read_dataframe(naturalearth_lowres, read_geometry=False)
|
|
1031
|
+
if write_geodf:
|
|
1032
|
+
input_df = gp.GeoDataFrame(input_df)
|
|
1033
|
+
|
|
1034
|
+
output_path = tmp_path / f"test{ext}"
|
|
1035
|
+
|
|
1036
|
+
# A shapefile without geometry column results in only a .dbf file.
|
|
1037
|
+
if ext == ".shp":
|
|
1038
|
+
output_path = output_path.with_suffix(".dbf")
|
|
1039
|
+
|
|
1040
|
+
# Determine driver
|
|
1041
|
+
driver = DRIVERS[ext] if ext != ".xlsx" else "XLSX"
|
|
1042
|
+
|
|
1043
|
+
write_dataframe(input_df, output_path, use_arrow=use_arrow, driver=driver)
|
|
1044
|
+
|
|
1045
|
+
assert output_path.exists()
|
|
1046
|
+
result_df = read_dataframe(output_path)
|
|
1047
|
+
|
|
1048
|
+
assert isinstance(result_df, pd.DataFrame)
|
|
1049
|
+
|
|
1050
|
+
# some dtypes do not round-trip precisely through these file types
|
|
1051
|
+
check_dtype = ext not in [".geojson", ".geojsonl", ".xlsx"]
|
|
1052
|
+
|
|
1053
|
+
if ext in [".gpkg", ".shp", ".xlsx"]:
|
|
1054
|
+
# These file types return a DataFrame when read.
|
|
1055
|
+
assert not isinstance(result_df, gp.GeoDataFrame)
|
|
1056
|
+
if isinstance(input_df, gp.GeoDataFrame):
|
|
1057
|
+
input_df = pd.DataFrame(input_df)
|
|
1058
|
+
|
|
1059
|
+
pd.testing.assert_frame_equal(
|
|
1060
|
+
result_df, input_df, check_index_type=False, check_dtype=check_dtype
|
|
1061
|
+
)
|
|
1062
|
+
else:
|
|
1063
|
+
# These file types return a GeoDataFrame with None Geometries when read.
|
|
1064
|
+
input_none_geom_gdf = gp.GeoDataFrame(
|
|
1065
|
+
input_df, geometry=np.repeat(None, len(input_df)), crs=4326
|
|
1066
|
+
)
|
|
1067
|
+
assert_geodataframe_equal(
|
|
1068
|
+
result_df,
|
|
1069
|
+
input_none_geom_gdf,
|
|
1070
|
+
check_index_type=False,
|
|
1071
|
+
check_dtype=check_dtype,
|
|
1072
|
+
)
|
|
1073
|
+
|
|
1074
|
+
|
|
1075
|
+
@pytest.mark.requires_arrow_write_api
|
|
1076
|
+
def test_write_dataframe_index(tmp_path, naturalearth_lowres, use_arrow):
|
|
1077
|
+
# dataframe writing ignores the index
|
|
1078
|
+
input_gdf = read_dataframe(naturalearth_lowres)
|
|
1079
|
+
input_gdf = input_gdf.set_index("iso_a3")
|
|
1080
|
+
|
|
1081
|
+
output_path = tmp_path / "test.shp"
|
|
1082
|
+
write_dataframe(input_gdf, output_path, use_arrow=use_arrow)
|
|
1083
|
+
|
|
1084
|
+
result_gdf = read_dataframe(output_path)
|
|
1085
|
+
assert isinstance(result_gdf.index, pd.RangeIndex)
|
|
1086
|
+
assert_geodataframe_equal(result_gdf, input_gdf.reset_index(drop=True))
|
|
1087
|
+
|
|
1088
|
+
|
|
1089
|
+
@pytest.mark.parametrize("ext", [ext for ext in ALL_EXTS if ext not in ".geojsonl"])
|
|
1090
|
+
@pytest.mark.requires_arrow_write_api
|
|
1091
|
+
def test_write_empty_dataframe(tmp_path, ext, use_arrow):
|
|
1092
|
+
expected = gp.GeoDataFrame(geometry=[], crs=4326)
|
|
1093
|
+
|
|
1094
|
+
filename = tmp_path / f"test{ext}"
|
|
1095
|
+
write_dataframe(expected, filename, use_arrow=use_arrow)
|
|
1096
|
+
|
|
1097
|
+
assert filename.exists()
|
|
1098
|
+
df = read_dataframe(filename)
|
|
1099
|
+
assert_geodataframe_equal(df, expected)
|
|
1100
|
+
|
|
1101
|
+
|
|
1102
|
+
def test_write_empty_geometry(tmp_path):
|
|
1103
|
+
expected = gp.GeoDataFrame({"x": [0]}, geometry=from_wkt(["POINT EMPTY"]), crs=4326)
|
|
1104
|
+
filename = tmp_path / "test.gpkg"
|
|
1105
|
+
|
|
1106
|
+
# Check that no warning is raised with GeoSeries.notna()
|
|
1107
|
+
with warnings.catch_warnings():
|
|
1108
|
+
warnings.simplefilter("error", UserWarning)
|
|
1109
|
+
if not HAS_PYPROJ:
|
|
1110
|
+
warnings.filterwarnings("ignore", message="'crs' was not provided.")
|
|
1111
|
+
write_dataframe(expected, filename)
|
|
1112
|
+
assert filename.exists()
|
|
1113
|
+
|
|
1114
|
+
# Xref GH-436: round-tripping possible with GPKG but not others
|
|
1115
|
+
df = read_dataframe(filename)
|
|
1116
|
+
assert_geodataframe_equal(df, expected)
|
|
1117
|
+
|
|
1118
|
+
|
|
1119
|
+
@pytest.mark.parametrize("ext", [".geojsonl", ".geojsons"])
|
|
1120
|
+
@pytest.mark.requires_arrow_write_api
|
|
1121
|
+
def test_write_read_empty_dataframe_unsupported(tmp_path, ext, use_arrow):
|
|
1122
|
+
# Writing empty dataframe to .geojsons or .geojsonl results logically in a 0 byte
|
|
1123
|
+
# file, but gdal isn't able to read those again at the time of writing.
|
|
1124
|
+
# Issue logged here: https://github.com/geopandas/pyogrio/issues/94
|
|
1125
|
+
expected = gp.GeoDataFrame(geometry=[], crs=4326)
|
|
1126
|
+
|
|
1127
|
+
filename = tmp_path / f"test{ext}"
|
|
1128
|
+
write_dataframe(expected, filename, use_arrow=use_arrow)
|
|
1129
|
+
|
|
1130
|
+
assert filename.exists()
|
|
1131
|
+
with pytest.raises(
|
|
1132
|
+
Exception, match=".* not recognized as( being in)? a supported file format."
|
|
1133
|
+
):
|
|
1134
|
+
_ = read_dataframe(filename, use_arrow=use_arrow)
|
|
1135
|
+
|
|
1136
|
+
|
|
1137
|
+
@pytest.mark.requires_arrow_write_api
|
|
1138
|
+
def test_write_dataframe_gpkg_multiple_layers(tmp_path, naturalearth_lowres, use_arrow):
|
|
1139
|
+
input_gdf = read_dataframe(naturalearth_lowres)
|
|
1140
|
+
filename = tmp_path / "test.gpkg"
|
|
1141
|
+
|
|
1142
|
+
write_dataframe(
|
|
1143
|
+
input_gdf,
|
|
1144
|
+
filename,
|
|
1145
|
+
layer="first",
|
|
1146
|
+
promote_to_multi=True,
|
|
1147
|
+
use_arrow=use_arrow,
|
|
1148
|
+
)
|
|
1149
|
+
|
|
1150
|
+
assert filename.exists()
|
|
1151
|
+
assert np.array_equal(list_layers(filename), [["first", "MultiPolygon"]])
|
|
1152
|
+
|
|
1153
|
+
write_dataframe(
|
|
1154
|
+
input_gdf,
|
|
1155
|
+
filename,
|
|
1156
|
+
layer="second",
|
|
1157
|
+
promote_to_multi=True,
|
|
1158
|
+
use_arrow=use_arrow,
|
|
1159
|
+
)
|
|
1160
|
+
assert np.array_equal(
|
|
1161
|
+
list_layers(filename),
|
|
1162
|
+
[["first", "MultiPolygon"], ["second", "MultiPolygon"]],
|
|
1163
|
+
)
|
|
1164
|
+
|
|
1165
|
+
|
|
1166
|
+
@pytest.mark.parametrize("ext", ALL_EXTS)
|
|
1167
|
+
@pytest.mark.requires_arrow_write_api
|
|
1168
|
+
def test_write_dataframe_append(request, tmp_path, naturalearth_lowres, ext, use_arrow):
|
|
1169
|
+
if ext == ".fgb" and __gdal_version__ <= (3, 5, 0):
|
|
1170
|
+
pytest.skip("Append to FlatGeobuf fails for GDAL <= 3.5.0")
|
|
1171
|
+
|
|
1172
|
+
if ext in (".geojsonl", ".geojsons") and __gdal_version__ <= (3, 6, 0):
|
|
1173
|
+
pytest.skip("Append to GeoJSONSeq only available for GDAL >= 3.6.0")
|
|
1174
|
+
|
|
1175
|
+
if use_arrow and ext.startswith(".geojson"):
|
|
1176
|
+
# Bug in GDAL when appending int64 to GeoJSON
|
|
1177
|
+
# (https://github.com/OSGeo/gdal/issues/9792)
|
|
1178
|
+
request.node.add_marker(
|
|
1179
|
+
pytest.mark.xfail(reason="Bugs with append when writing Arrow to GeoJSON")
|
|
1180
|
+
)
|
|
1181
|
+
|
|
1182
|
+
input_gdf = read_dataframe(naturalearth_lowres)
|
|
1183
|
+
filename = tmp_path / f"test{ext}"
|
|
1184
|
+
|
|
1185
|
+
write_dataframe(input_gdf, filename, use_arrow=use_arrow)
|
|
1186
|
+
|
|
1187
|
+
filename.exists()
|
|
1188
|
+
assert len(read_dataframe(filename)) == 177
|
|
1189
|
+
|
|
1190
|
+
write_dataframe(input_gdf, filename, use_arrow=use_arrow, append=True)
|
|
1191
|
+
assert len(read_dataframe(filename)) == 354
|
|
1192
|
+
|
|
1193
|
+
|
|
1194
|
+
@pytest.mark.parametrize("spatial_index", [False, True])
|
|
1195
|
+
@pytest.mark.requires_arrow_write_api
|
|
1196
|
+
def test_write_dataframe_gdal_options(
|
|
1197
|
+
tmp_path, naturalearth_lowres, spatial_index, use_arrow
|
|
1198
|
+
):
|
|
1199
|
+
df = read_dataframe(naturalearth_lowres)
|
|
1200
|
+
|
|
1201
|
+
outfilename1 = tmp_path / "test1.shp"
|
|
1202
|
+
write_dataframe(
|
|
1203
|
+
df,
|
|
1204
|
+
outfilename1,
|
|
1205
|
+
use_arrow=use_arrow,
|
|
1206
|
+
SPATIAL_INDEX="YES" if spatial_index else "NO",
|
|
1207
|
+
)
|
|
1208
|
+
assert outfilename1.exists() is True
|
|
1209
|
+
index_filename1 = tmp_path / "test1.qix"
|
|
1210
|
+
assert index_filename1.exists() is spatial_index
|
|
1211
|
+
|
|
1212
|
+
# using explicit layer_options instead
|
|
1213
|
+
outfilename2 = tmp_path / "test2.shp"
|
|
1214
|
+
write_dataframe(
|
|
1215
|
+
df,
|
|
1216
|
+
outfilename2,
|
|
1217
|
+
use_arrow=use_arrow,
|
|
1218
|
+
layer_options={"spatial_index": spatial_index},
|
|
1219
|
+
)
|
|
1220
|
+
assert outfilename2.exists() is True
|
|
1221
|
+
index_filename2 = tmp_path / "test2.qix"
|
|
1222
|
+
assert index_filename2.exists() is spatial_index
|
|
1223
|
+
|
|
1224
|
+
|
|
1225
|
+
@pytest.mark.requires_arrow_write_api
|
|
1226
|
+
def test_write_dataframe_gdal_options_unknown(tmp_path, naturalearth_lowres, use_arrow):
|
|
1227
|
+
df = read_dataframe(naturalearth_lowres)
|
|
1228
|
+
|
|
1229
|
+
# geojson has no spatial index, so passing keyword should raise
|
|
1230
|
+
outfilename = tmp_path / "test.geojson"
|
|
1231
|
+
with pytest.raises(ValueError, match="unrecognized option 'SPATIAL_INDEX'"):
|
|
1232
|
+
write_dataframe(df, outfilename, use_arrow=use_arrow, spatial_index=True)
|
|
1233
|
+
|
|
1234
|
+
|
|
1235
|
+
def _get_gpkg_table_names(path):
|
|
1236
|
+
import sqlite3
|
|
1237
|
+
|
|
1238
|
+
con = sqlite3.connect(path)
|
|
1239
|
+
cursor = con.cursor()
|
|
1240
|
+
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
|
|
1241
|
+
result = cursor.fetchall()
|
|
1242
|
+
return [res[0] for res in result]
|
|
1243
|
+
|
|
1244
|
+
|
|
1245
|
+
@pytest.mark.requires_arrow_write_api
|
|
1246
|
+
def test_write_dataframe_gdal_options_dataset(tmp_path, naturalearth_lowres, use_arrow):
|
|
1247
|
+
df = read_dataframe(naturalearth_lowres)
|
|
1248
|
+
|
|
1249
|
+
test_default_filename = tmp_path / "test_default.gpkg"
|
|
1250
|
+
write_dataframe(df, test_default_filename, use_arrow=use_arrow)
|
|
1251
|
+
assert "gpkg_ogr_contents" in _get_gpkg_table_names(test_default_filename)
|
|
1252
|
+
|
|
1253
|
+
test_no_contents_filename = tmp_path / "test_no_contents.gpkg"
|
|
1254
|
+
write_dataframe(
|
|
1255
|
+
df, test_default_filename, use_arrow=use_arrow, ADD_GPKG_OGR_CONTENTS="NO"
|
|
1256
|
+
)
|
|
1257
|
+
assert "gpkg_ogr_contents" not in _get_gpkg_table_names(test_no_contents_filename)
|
|
1258
|
+
|
|
1259
|
+
test_no_contents_filename2 = tmp_path / "test_no_contents2.gpkg"
|
|
1260
|
+
write_dataframe(
|
|
1261
|
+
df,
|
|
1262
|
+
test_no_contents_filename2,
|
|
1263
|
+
use_arrow=use_arrow,
|
|
1264
|
+
dataset_options={"add_gpkg_ogr_contents": False},
|
|
1265
|
+
)
|
|
1266
|
+
assert "gpkg_ogr_contents" not in _get_gpkg_table_names(test_no_contents_filename2)
|
|
1267
|
+
|
|
1268
|
+
|
|
1269
|
+
@pytest.mark.parametrize(
|
|
1270
|
+
"ext, promote_to_multi, expected_geometry_types, expected_geometry_type",
|
|
1271
|
+
[
|
|
1272
|
+
(".fgb", None, ["MultiPolygon"], "MultiPolygon"),
|
|
1273
|
+
(".fgb", True, ["MultiPolygon"], "MultiPolygon"),
|
|
1274
|
+
(".fgb", False, ["MultiPolygon", "Polygon"], "Unknown"),
|
|
1275
|
+
(".geojson", None, ["MultiPolygon", "Polygon"], "Unknown"),
|
|
1276
|
+
(".geojson", True, ["MultiPolygon"], "MultiPolygon"),
|
|
1277
|
+
(".geojson", False, ["MultiPolygon", "Polygon"], "Unknown"),
|
|
1278
|
+
],
|
|
1279
|
+
)
|
|
1280
|
+
@pytest.mark.requires_arrow_write_api
|
|
1281
|
+
def test_write_dataframe_promote_to_multi(
|
|
1282
|
+
tmp_path,
|
|
1283
|
+
naturalearth_lowres,
|
|
1284
|
+
ext,
|
|
1285
|
+
promote_to_multi,
|
|
1286
|
+
expected_geometry_types,
|
|
1287
|
+
expected_geometry_type,
|
|
1288
|
+
use_arrow,
|
|
1289
|
+
):
|
|
1290
|
+
input_gdf = read_dataframe(naturalearth_lowres)
|
|
1291
|
+
|
|
1292
|
+
output_path = tmp_path / f"test_promote{ext}"
|
|
1293
|
+
write_dataframe(
|
|
1294
|
+
input_gdf, output_path, use_arrow=use_arrow, promote_to_multi=promote_to_multi
|
|
1295
|
+
)
|
|
1296
|
+
|
|
1297
|
+
assert output_path.exists()
|
|
1298
|
+
output_gdf = read_dataframe(output_path)
|
|
1299
|
+
geometry_types = sorted(output_gdf.geometry.type.unique())
|
|
1300
|
+
assert geometry_types == expected_geometry_types
|
|
1301
|
+
assert read_info(output_path)["geometry_type"] == expected_geometry_type
|
|
1302
|
+
|
|
1303
|
+
|
|
1304
|
+
@pytest.mark.parametrize(
|
|
1305
|
+
"ext, promote_to_multi, geometry_type, "
|
|
1306
|
+
"expected_geometry_types, expected_geometry_type",
|
|
1307
|
+
[
|
|
1308
|
+
(".fgb", None, "Unknown", ["MultiPolygon"], "Unknown"),
|
|
1309
|
+
(".geojson", False, "Unknown", ["MultiPolygon", "Polygon"], "Unknown"),
|
|
1310
|
+
(".geojson", None, "Unknown", ["MultiPolygon", "Polygon"], "Unknown"),
|
|
1311
|
+
(".geojson", None, "Polygon", ["MultiPolygon", "Polygon"], "Unknown"),
|
|
1312
|
+
(".geojson", None, "MultiPolygon", ["MultiPolygon", "Polygon"], "Unknown"),
|
|
1313
|
+
(".geojson", None, "Point", ["MultiPolygon", "Polygon"], "Unknown"),
|
|
1314
|
+
(".geojson", True, "Unknown", ["MultiPolygon"], "MultiPolygon"),
|
|
1315
|
+
(".gpkg", False, "Unknown", ["MultiPolygon", "Polygon"], "Unknown"),
|
|
1316
|
+
(".gpkg", None, "Unknown", ["MultiPolygon"], "Unknown"),
|
|
1317
|
+
(".gpkg", None, "Polygon", ["MultiPolygon"], "Polygon"),
|
|
1318
|
+
(".gpkg", None, "MultiPolygon", ["MultiPolygon"], "MultiPolygon"),
|
|
1319
|
+
(".gpkg", None, "Point", ["MultiPolygon"], "Point"),
|
|
1320
|
+
(".gpkg", True, "Unknown", ["MultiPolygon"], "Unknown"),
|
|
1321
|
+
(".shp", False, "Unknown", ["MultiPolygon", "Polygon"], "Polygon"),
|
|
1322
|
+
(".shp", None, "Unknown", ["MultiPolygon", "Polygon"], "Polygon"),
|
|
1323
|
+
(".shp", None, "Polygon", ["MultiPolygon", "Polygon"], "Polygon"),
|
|
1324
|
+
(".shp", None, "MultiPolygon", ["MultiPolygon", "Polygon"], "Polygon"),
|
|
1325
|
+
(".shp", True, "Unknown", ["MultiPolygon", "Polygon"], "Polygon"),
|
|
1326
|
+
],
|
|
1327
|
+
)
|
|
1328
|
+
@pytest.mark.requires_arrow_write_api
|
|
1329
|
+
def test_write_dataframe_promote_to_multi_layer_geom_type(
|
|
1330
|
+
tmp_path,
|
|
1331
|
+
naturalearth_lowres,
|
|
1332
|
+
ext,
|
|
1333
|
+
promote_to_multi,
|
|
1334
|
+
geometry_type,
|
|
1335
|
+
expected_geometry_types,
|
|
1336
|
+
expected_geometry_type,
|
|
1337
|
+
use_arrow,
|
|
1338
|
+
):
|
|
1339
|
+
input_gdf = read_dataframe(naturalearth_lowres)
|
|
1340
|
+
|
|
1341
|
+
output_path = tmp_path / f"test_promote_layer_geom_type{ext}"
|
|
1342
|
+
|
|
1343
|
+
if ext == ".gpkg" and geometry_type in ("Polygon", "Point"):
|
|
1344
|
+
ctx = pytest.warns(
|
|
1345
|
+
RuntimeWarning, match="A geometry of type MULTIPOLYGON is inserted"
|
|
1346
|
+
)
|
|
1347
|
+
else:
|
|
1348
|
+
ctx = contextlib.nullcontext()
|
|
1349
|
+
|
|
1350
|
+
with ctx:
|
|
1351
|
+
write_dataframe(
|
|
1352
|
+
input_gdf,
|
|
1353
|
+
output_path,
|
|
1354
|
+
use_arrow=use_arrow,
|
|
1355
|
+
promote_to_multi=promote_to_multi,
|
|
1356
|
+
geometry_type=geometry_type,
|
|
1357
|
+
)
|
|
1358
|
+
|
|
1359
|
+
assert output_path.exists()
|
|
1360
|
+
output_gdf = read_dataframe(output_path)
|
|
1361
|
+
geometry_types = sorted(output_gdf.geometry.type.unique())
|
|
1362
|
+
assert geometry_types == expected_geometry_types
|
|
1363
|
+
assert read_info(output_path)["geometry_type"] == expected_geometry_type
|
|
1364
|
+
|
|
1365
|
+
|
|
1366
|
+
@pytest.mark.parametrize(
|
|
1367
|
+
"ext, promote_to_multi, geometry_type, expected_raises_match",
|
|
1368
|
+
[
|
|
1369
|
+
(".fgb", False, "MultiPolygon", "Mismatched geometry type"),
|
|
1370
|
+
(".fgb", False, "Polygon", "Mismatched geometry type"),
|
|
1371
|
+
(".fgb", None, "Point", "Mismatched geometry type"),
|
|
1372
|
+
(".fgb", None, "Polygon", "Mismatched geometry type"),
|
|
1373
|
+
(
|
|
1374
|
+
".shp",
|
|
1375
|
+
None,
|
|
1376
|
+
"Point",
|
|
1377
|
+
"Could not add feature to layer at index|Error while writing batch to OGR "
|
|
1378
|
+
"layer",
|
|
1379
|
+
),
|
|
1380
|
+
],
|
|
1381
|
+
)
|
|
1382
|
+
@pytest.mark.requires_arrow_write_api
|
|
1383
|
+
def test_write_dataframe_promote_to_multi_layer_geom_type_invalid(
|
|
1384
|
+
tmp_path,
|
|
1385
|
+
naturalearth_lowres,
|
|
1386
|
+
ext,
|
|
1387
|
+
promote_to_multi,
|
|
1388
|
+
geometry_type,
|
|
1389
|
+
expected_raises_match,
|
|
1390
|
+
use_arrow,
|
|
1391
|
+
):
|
|
1392
|
+
input_gdf = read_dataframe(naturalearth_lowres)
|
|
1393
|
+
|
|
1394
|
+
output_path = tmp_path / f"test{ext}"
|
|
1395
|
+
with pytest.raises((FeatureError, DataLayerError), match=expected_raises_match):
|
|
1396
|
+
write_dataframe(
|
|
1397
|
+
input_gdf,
|
|
1398
|
+
output_path,
|
|
1399
|
+
use_arrow=use_arrow,
|
|
1400
|
+
promote_to_multi=promote_to_multi,
|
|
1401
|
+
geometry_type=geometry_type,
|
|
1402
|
+
)
|
|
1403
|
+
|
|
1404
|
+
|
|
1405
|
+
@pytest.mark.requires_arrow_write_api
|
|
1406
|
+
def test_write_dataframe_layer_geom_type_invalid(
|
|
1407
|
+
tmp_path, naturalearth_lowres, use_arrow
|
|
1408
|
+
):
|
|
1409
|
+
df = read_dataframe(naturalearth_lowres)
|
|
1410
|
+
|
|
1411
|
+
filename = tmp_path / "test.geojson"
|
|
1412
|
+
with pytest.raises(
|
|
1413
|
+
GeometryError, match="Geometry type is not supported: NotSupported"
|
|
1414
|
+
):
|
|
1415
|
+
write_dataframe(df, filename, use_arrow=use_arrow, geometry_type="NotSupported")
|
|
1416
|
+
|
|
1417
|
+
|
|
1418
|
+
@pytest.mark.parametrize("ext", [ext for ext in ALL_EXTS if ext not in ".shp"])
|
|
1419
|
+
@pytest.mark.requires_arrow_write_api
|
|
1420
|
+
def test_write_dataframe_truly_mixed(tmp_path, ext, use_arrow):
|
|
1421
|
+
geometry = [
|
|
1422
|
+
shapely.Point(0, 0),
|
|
1423
|
+
shapely.LineString([(0, 0), (1, 1)]),
|
|
1424
|
+
shapely.box(0, 0, 1, 1),
|
|
1425
|
+
shapely.MultiPoint([shapely.Point(1, 1), shapely.Point(2, 2)]),
|
|
1426
|
+
shapely.MultiLineString(
|
|
1427
|
+
[shapely.LineString([(1, 1), (2, 2)]), shapely.LineString([(2, 2), (3, 3)])]
|
|
1428
|
+
),
|
|
1429
|
+
shapely.MultiPolygon([shapely.box(1, 1, 2, 2), shapely.box(2, 2, 3, 3)]),
|
|
1430
|
+
]
|
|
1431
|
+
|
|
1432
|
+
df = gp.GeoDataFrame(
|
|
1433
|
+
{"col": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]}, geometry=geometry, crs="EPSG:4326"
|
|
1434
|
+
)
|
|
1435
|
+
|
|
1436
|
+
filename = tmp_path / f"test{ext}"
|
|
1437
|
+
|
|
1438
|
+
if ext == ".fgb":
|
|
1439
|
+
# For .fgb, spatial_index=False to avoid the rows being reordered
|
|
1440
|
+
write_dataframe(df, filename, use_arrow=use_arrow, spatial_index=False)
|
|
1441
|
+
else:
|
|
1442
|
+
write_dataframe(df, filename, use_arrow=use_arrow)
|
|
1443
|
+
|
|
1444
|
+
# Drivers that support mixed geometries will default to "Unknown" geometry type
|
|
1445
|
+
assert read_info(filename)["geometry_type"] == "Unknown"
|
|
1446
|
+
result = read_dataframe(filename)
|
|
1447
|
+
assert_geodataframe_equal(result, df, check_geom_type=True)
|
|
1448
|
+
|
|
1449
|
+
|
|
1450
|
+
@pytest.mark.requires_arrow_write_api
|
|
1451
|
+
def test_write_dataframe_truly_mixed_invalid(tmp_path, use_arrow):
|
|
1452
|
+
# Shapefile doesn't support generic "Geometry" / "Unknown" type
|
|
1453
|
+
# for mixed geometries
|
|
1454
|
+
|
|
1455
|
+
df = gp.GeoDataFrame(
|
|
1456
|
+
{"col": [1.0, 2.0, 3.0]},
|
|
1457
|
+
geometry=[
|
|
1458
|
+
shapely.Point(0, 0),
|
|
1459
|
+
shapely.LineString([(0, 0), (1, 1)]),
|
|
1460
|
+
shapely.box(0, 0, 1, 1),
|
|
1461
|
+
],
|
|
1462
|
+
crs="EPSG:4326",
|
|
1463
|
+
)
|
|
1464
|
+
|
|
1465
|
+
# ensure error message from GDAL is included
|
|
1466
|
+
msg = (
|
|
1467
|
+
"Could not add feature to layer at index 1: Attempt to "
|
|
1468
|
+
r"write non-point \(LINESTRING\) geometry to point shapefile."
|
|
1469
|
+
# DataLayerError when using Arrow
|
|
1470
|
+
"|Error while writing batch to OGR layer: Attempt to "
|
|
1471
|
+
r"write non-point \(LINESTRING\) geometry to point shapefile."
|
|
1472
|
+
)
|
|
1473
|
+
with pytest.raises((FeatureError, DataLayerError), match=msg):
|
|
1474
|
+
write_dataframe(df, tmp_path / "test.shp", use_arrow=use_arrow)
|
|
1475
|
+
|
|
1476
|
+
|
|
1477
|
+
@pytest.mark.parametrize("ext", [ext for ext in ALL_EXTS if ext not in ".fgb"])
|
|
1478
|
+
@pytest.mark.parametrize(
|
|
1479
|
+
"geoms",
|
|
1480
|
+
[
|
|
1481
|
+
[None, shapely.Point(1, 1)],
|
|
1482
|
+
[shapely.Point(1, 1), None],
|
|
1483
|
+
[None, shapely.Point(1, 1, 2)],
|
|
1484
|
+
[None, None],
|
|
1485
|
+
],
|
|
1486
|
+
)
|
|
1487
|
+
@pytest.mark.requires_arrow_write_api
|
|
1488
|
+
def test_write_dataframe_infer_geometry_with_nulls(tmp_path, geoms, ext, use_arrow):
|
|
1489
|
+
filename = tmp_path / f"test{ext}"
|
|
1490
|
+
|
|
1491
|
+
df = gp.GeoDataFrame({"col": [1.0, 2.0]}, geometry=geoms, crs="EPSG:4326")
|
|
1492
|
+
write_dataframe(df, filename, use_arrow=use_arrow)
|
|
1493
|
+
result = read_dataframe(filename)
|
|
1494
|
+
assert_geodataframe_equal(result, df)
|
|
1495
|
+
|
|
1496
|
+
|
|
1497
|
+
@pytest.mark.filterwarnings(
|
|
1498
|
+
"ignore: You will likely lose important projection information"
|
|
1499
|
+
)
|
|
1500
|
+
@pytest.mark.requires_arrow_write_api
|
|
1501
|
+
@requires_pyproj
|
|
1502
|
+
def test_custom_crs_io(tmp_path, naturalearth_lowres_all_ext, use_arrow):
|
|
1503
|
+
df = read_dataframe(naturalearth_lowres_all_ext)
|
|
1504
|
+
# project Belgium to a custom Albers Equal Area projection
|
|
1505
|
+
expected = (
|
|
1506
|
+
df.loc[df.name == "Belgium"]
|
|
1507
|
+
.reset_index(drop=True)
|
|
1508
|
+
.to_crs("+proj=aea +lat_1=49.5 +lat_2=51.5 +lon_0=4.3")
|
|
1509
|
+
)
|
|
1510
|
+
filename = tmp_path / "test.shp"
|
|
1511
|
+
write_dataframe(expected, filename, use_arrow=use_arrow)
|
|
1512
|
+
|
|
1513
|
+
assert filename.exists()
|
|
1514
|
+
|
|
1515
|
+
df = read_dataframe(filename)
|
|
1516
|
+
|
|
1517
|
+
crs = df.crs.to_dict()
|
|
1518
|
+
assert crs["lat_1"] == 49.5
|
|
1519
|
+
assert crs["lat_2"] == 51.5
|
|
1520
|
+
assert crs["lon_0"] == 4.3
|
|
1521
|
+
assert df.crs.equals(expected.crs)
|
|
1522
|
+
|
|
1523
|
+
|
|
1524
|
+
def test_write_read_mixed_column_values(tmp_path):
|
|
1525
|
+
# use_arrow=True is tested separately below
|
|
1526
|
+
mixed_values = ["test", 1.0, 1, datetime.now(), None, np.nan]
|
|
1527
|
+
geoms = [shapely.Point(0, 0) for _ in mixed_values]
|
|
1528
|
+
test_gdf = gp.GeoDataFrame(
|
|
1529
|
+
{"geometry": geoms, "mixed": mixed_values}, crs="epsg:31370"
|
|
1530
|
+
)
|
|
1531
|
+
output_path = tmp_path / "test_write_mixed_column.gpkg"
|
|
1532
|
+
write_dataframe(test_gdf, output_path)
|
|
1533
|
+
output_gdf = read_dataframe(output_path)
|
|
1534
|
+
assert len(test_gdf) == len(output_gdf)
|
|
1535
|
+
for idx, value in enumerate(mixed_values):
|
|
1536
|
+
if value in (None, np.nan):
|
|
1537
|
+
assert output_gdf["mixed"][idx] is None
|
|
1538
|
+
else:
|
|
1539
|
+
assert output_gdf["mixed"][idx] == str(value)
|
|
1540
|
+
|
|
1541
|
+
|
|
1542
|
+
@requires_arrow_write_api
|
|
1543
|
+
def test_write_read_mixed_column_values_arrow(tmp_path):
|
|
1544
|
+
# Arrow cannot represent a column of mixed types
|
|
1545
|
+
mixed_values = ["test", 1.0, 1, datetime.now(), None, np.nan]
|
|
1546
|
+
geoms = [shapely.Point(0, 0) for _ in mixed_values]
|
|
1547
|
+
test_gdf = gp.GeoDataFrame(
|
|
1548
|
+
{"geometry": geoms, "mixed": mixed_values}, crs="epsg:31370"
|
|
1549
|
+
)
|
|
1550
|
+
output_path = tmp_path / "test_write_mixed_column.gpkg"
|
|
1551
|
+
with pytest.raises(TypeError, match=".*Conversion failed for column"):
|
|
1552
|
+
write_dataframe(test_gdf, output_path, use_arrow=True)
|
|
1553
|
+
|
|
1554
|
+
|
|
1555
|
+
@pytest.mark.requires_arrow_write_api
|
|
1556
|
+
def test_write_read_null(tmp_path, use_arrow):
|
|
1557
|
+
output_path = tmp_path / "test_write_nan.gpkg"
|
|
1558
|
+
geom = shapely.Point(0, 0)
|
|
1559
|
+
test_data = {
|
|
1560
|
+
"geometry": [geom, geom, geom],
|
|
1561
|
+
"float64": [1.0, None, np.nan],
|
|
1562
|
+
"object_str": ["test", None, np.nan],
|
|
1563
|
+
}
|
|
1564
|
+
test_gdf = gp.GeoDataFrame(test_data, crs="epsg:31370")
|
|
1565
|
+
write_dataframe(test_gdf, output_path, use_arrow=use_arrow)
|
|
1566
|
+
result_gdf = read_dataframe(output_path)
|
|
1567
|
+
assert len(test_gdf) == len(result_gdf)
|
|
1568
|
+
assert result_gdf["float64"][0] == 1.0
|
|
1569
|
+
assert pd.isna(result_gdf["float64"][1])
|
|
1570
|
+
assert pd.isna(result_gdf["float64"][2])
|
|
1571
|
+
assert result_gdf["object_str"][0] == "test"
|
|
1572
|
+
assert result_gdf["object_str"][1] is None
|
|
1573
|
+
assert result_gdf["object_str"][2] is None
|
|
1574
|
+
|
|
1575
|
+
|
|
1576
|
+
@pytest.mark.requires_arrow_write_api
|
|
1577
|
+
def test_write_read_vsimem(naturalearth_lowres_vsi, use_arrow):
|
|
1578
|
+
path, _ = naturalearth_lowres_vsi
|
|
1579
|
+
mem_path = f"/vsimem/{path.name}"
|
|
1580
|
+
|
|
1581
|
+
input = read_dataframe(path, use_arrow=use_arrow)
|
|
1582
|
+
assert len(input) == 177
|
|
1583
|
+
|
|
1584
|
+
try:
|
|
1585
|
+
write_dataframe(input, mem_path, use_arrow=use_arrow)
|
|
1586
|
+
result = read_dataframe(mem_path, use_arrow=use_arrow)
|
|
1587
|
+
assert len(result) == 177
|
|
1588
|
+
finally:
|
|
1589
|
+
vsi_unlink(mem_path)
|
|
1590
|
+
|
|
1591
|
+
|
|
1592
|
+
@pytest.mark.parametrize(
|
|
1593
|
+
"wkt,geom_types",
|
|
1594
|
+
[
|
|
1595
|
+
("Point Z (0 0 0)", ["2.5D Point", "Point Z"]),
|
|
1596
|
+
("LineString Z (0 0 0, 1 1 0)", ["2.5D LineString", "LineString Z"]),
|
|
1597
|
+
("Polygon Z ((0 0 0, 0 1 0, 1 1 0, 0 0 0))", ["2.5D Polygon", "Polygon Z"]),
|
|
1598
|
+
("MultiPoint Z (0 0 0, 1 1 0)", ["2.5D MultiPoint", "MultiPoint Z"]),
|
|
1599
|
+
(
|
|
1600
|
+
"MultiLineString Z ((0 0 0, 1 1 0), (2 2 2, 3 3 2))",
|
|
1601
|
+
["2.5D MultiLineString", "MultiLineString Z"],
|
|
1602
|
+
),
|
|
1603
|
+
(
|
|
1604
|
+
"MultiPolygon Z (((0 0 0, 0 1 0, 1 1 0, 0 0 0)), ((1 1 1, 1 2 1, 2 2 1, 1 1 1)))", # noqa: E501
|
|
1605
|
+
["2.5D MultiPolygon", "MultiPolygon Z"],
|
|
1606
|
+
),
|
|
1607
|
+
(
|
|
1608
|
+
"GeometryCollection Z (Point Z (0 0 0))",
|
|
1609
|
+
["2.5D GeometryCollection", "GeometryCollection Z"],
|
|
1610
|
+
),
|
|
1611
|
+
],
|
|
1612
|
+
)
|
|
1613
|
+
@pytest.mark.requires_arrow_write_api
|
|
1614
|
+
def test_write_geometry_z_types(tmp_path, wkt, geom_types, use_arrow):
|
|
1615
|
+
filename = tmp_path / "test.fgb"
|
|
1616
|
+
gdf = gp.GeoDataFrame(geometry=from_wkt([wkt]), crs="EPSG:4326")
|
|
1617
|
+
for geom_type in geom_types:
|
|
1618
|
+
write_dataframe(gdf, filename, use_arrow=use_arrow, geometry_type=geom_type)
|
|
1619
|
+
df = read_dataframe(filename)
|
|
1620
|
+
assert_geodataframe_equal(df, gdf)
|
|
1621
|
+
|
|
1622
|
+
|
|
1623
|
+
@pytest.mark.parametrize("ext", ALL_EXTS)
|
|
1624
|
+
@pytest.mark.parametrize(
|
|
1625
|
+
"test_descr, exp_geometry_type, mixed_dimensions, wkt",
|
|
1626
|
+
[
|
|
1627
|
+
("1 Point Z", "Point Z", False, ["Point Z (0 0 0)"]),
|
|
1628
|
+
("1 LineString Z", "LineString Z", False, ["LineString Z (0 0 0, 1 1 0)"]),
|
|
1629
|
+
(
|
|
1630
|
+
"1 Polygon Z",
|
|
1631
|
+
"Polygon Z",
|
|
1632
|
+
False,
|
|
1633
|
+
["Polygon Z ((0 0 0, 0 1 0, 1 1 0, 0 0 0))"],
|
|
1634
|
+
),
|
|
1635
|
+
("1 MultiPoint Z", "MultiPoint Z", False, ["MultiPoint Z (0 0 0, 1 1 0)"]),
|
|
1636
|
+
(
|
|
1637
|
+
"1 MultiLineString Z",
|
|
1638
|
+
"MultiLineString Z",
|
|
1639
|
+
False,
|
|
1640
|
+
["MultiLineString Z ((0 0 0, 1 1 0), (2 2 2, 3 3 2))"],
|
|
1641
|
+
),
|
|
1642
|
+
(
|
|
1643
|
+
"1 MultiLinePolygon Z",
|
|
1644
|
+
"MultiPolygon Z",
|
|
1645
|
+
False,
|
|
1646
|
+
[
|
|
1647
|
+
"MultiPolygon Z (((0 0 0, 0 1 0, 1 1 0, 0 0 0)), ((1 1 1, 1 2 1, 2 2 1, 1 1 1)))" # noqa: E501
|
|
1648
|
+
],
|
|
1649
|
+
),
|
|
1650
|
+
(
|
|
1651
|
+
"1 GeometryCollection Z",
|
|
1652
|
+
"GeometryCollection Z",
|
|
1653
|
+
False,
|
|
1654
|
+
["GeometryCollection Z (Point Z (0 0 0))"],
|
|
1655
|
+
),
|
|
1656
|
+
("Point Z + Point", "Point Z", True, ["Point Z (0 0 0)", "Point (0 0)"]),
|
|
1657
|
+
("Point Z + None", "Point Z", False, ["Point Z (0 0 0)", None]),
|
|
1658
|
+
(
|
|
1659
|
+
"Point Z + LineString Z",
|
|
1660
|
+
"Unknown",
|
|
1661
|
+
False,
|
|
1662
|
+
["LineString Z (0 0 0, 1 1 0)", "Point Z (0 0 0)"],
|
|
1663
|
+
),
|
|
1664
|
+
(
|
|
1665
|
+
"Point Z + LineString",
|
|
1666
|
+
"Unknown",
|
|
1667
|
+
True,
|
|
1668
|
+
["LineString (0 0, 1 1)", "Point Z (0 0 0)"],
|
|
1669
|
+
),
|
|
1670
|
+
],
|
|
1671
|
+
)
|
|
1672
|
+
@pytest.mark.requires_arrow_write_api
|
|
1673
|
+
def test_write_geometry_z_types_auto(
|
|
1674
|
+
tmp_path, ext, test_descr, exp_geometry_type, mixed_dimensions, wkt, use_arrow
|
|
1675
|
+
):
|
|
1676
|
+
# Shapefile has some different behaviour that other file types
|
|
1677
|
+
if ext == ".shp":
|
|
1678
|
+
if exp_geometry_type in ("GeometryCollection Z", "Unknown"):
|
|
1679
|
+
pytest.skip(f"ext {ext} doesn't support {exp_geometry_type}")
|
|
1680
|
+
elif exp_geometry_type == "MultiLineString Z":
|
|
1681
|
+
exp_geometry_type = "LineString Z"
|
|
1682
|
+
elif exp_geometry_type == "MultiPolygon Z":
|
|
1683
|
+
exp_geometry_type = "Polygon Z"
|
|
1684
|
+
|
|
1685
|
+
column_data = {}
|
|
1686
|
+
column_data["test_descr"] = [test_descr] * len(wkt)
|
|
1687
|
+
column_data["idx"] = [str(idx) for idx in range(len(wkt))]
|
|
1688
|
+
gdf = gp.GeoDataFrame(column_data, geometry=from_wkt(wkt), crs="EPSG:4326")
|
|
1689
|
+
filename = tmp_path / f"test{ext}"
|
|
1690
|
+
|
|
1691
|
+
if ext == ".fgb":
|
|
1692
|
+
# writing empty / null geometries not allowed by FlatGeobuf for
|
|
1693
|
+
# GDAL >= 3.6.4 and were simply not written previously
|
|
1694
|
+
gdf = gdf.loc[~(gdf.geometry.isna() | gdf.geometry.is_empty)]
|
|
1695
|
+
|
|
1696
|
+
if mixed_dimensions and DRIVERS[ext] in DRIVERS_NO_MIXED_DIMENSIONS:
|
|
1697
|
+
with pytest.raises(
|
|
1698
|
+
DataSourceError,
|
|
1699
|
+
match=("Mixed 2D and 3D coordinates are not supported by"),
|
|
1700
|
+
):
|
|
1701
|
+
write_dataframe(gdf, filename, use_arrow=use_arrow)
|
|
1702
|
+
return
|
|
1703
|
+
else:
|
|
1704
|
+
write_dataframe(gdf, filename, use_arrow=use_arrow)
|
|
1705
|
+
|
|
1706
|
+
info = read_info(filename)
|
|
1707
|
+
assert info["geometry_type"] == exp_geometry_type
|
|
1708
|
+
|
|
1709
|
+
result_gdf = read_dataframe(filename)
|
|
1710
|
+
if ext == ".geojsonl":
|
|
1711
|
+
result_gdf.crs = "EPSG:4326"
|
|
1712
|
+
|
|
1713
|
+
assert_geodataframe_equal(gdf, result_gdf)
|
|
1714
|
+
|
|
1715
|
+
|
|
1716
|
+
@pytest.mark.parametrize(
|
|
1717
|
+
"on_invalid, message",
|
|
1718
|
+
[
|
|
1719
|
+
(
|
|
1720
|
+
"warn",
|
|
1721
|
+
"Invalid WKB: geometry is returned as None. IllegalArgumentException: "
|
|
1722
|
+
"Invalid number of points in LinearRing found 2 - must be 0 or >=",
|
|
1723
|
+
),
|
|
1724
|
+
("raise", "Invalid number of points in LinearRing found 2 - must be 0 or >="),
|
|
1725
|
+
("ignore", None),
|
|
1726
|
+
],
|
|
1727
|
+
)
|
|
1728
|
+
def test_read_invalid_poly_ring(tmp_path, use_arrow, on_invalid, message):
|
|
1729
|
+
if on_invalid == "raise":
|
|
1730
|
+
handler = pytest.raises(shapely.errors.GEOSException, match=message)
|
|
1731
|
+
elif on_invalid == "warn":
|
|
1732
|
+
handler = pytest.warns(match=message)
|
|
1733
|
+
elif on_invalid == "ignore":
|
|
1734
|
+
handler = contextlib.nullcontext()
|
|
1735
|
+
else:
|
|
1736
|
+
raise ValueError(f"unknown value for on_invalid: {on_invalid}")
|
|
1737
|
+
|
|
1738
|
+
# create a GeoJSON file with an invalid exterior ring
|
|
1739
|
+
invalid_geojson = """{
|
|
1740
|
+
"type": "FeatureCollection",
|
|
1741
|
+
"features": [
|
|
1742
|
+
{
|
|
1743
|
+
"type": "Feature",
|
|
1744
|
+
"properties": {},
|
|
1745
|
+
"geometry": {
|
|
1746
|
+
"type": "Polygon",
|
|
1747
|
+
"coordinates": [ [ [0, 0], [0, 0] ] ]
|
|
1748
|
+
}
|
|
1749
|
+
}
|
|
1750
|
+
]
|
|
1751
|
+
}"""
|
|
1752
|
+
|
|
1753
|
+
filename = tmp_path / "test.geojson"
|
|
1754
|
+
with open(filename, "w") as f:
|
|
1755
|
+
_ = f.write(invalid_geojson)
|
|
1756
|
+
|
|
1757
|
+
with handler:
|
|
1758
|
+
df = read_dataframe(
|
|
1759
|
+
filename,
|
|
1760
|
+
use_arrow=use_arrow,
|
|
1761
|
+
on_invalid=on_invalid,
|
|
1762
|
+
)
|
|
1763
|
+
df.geometry.isnull().all()
|
|
1764
|
+
|
|
1765
|
+
|
|
1766
|
+
def test_read_multisurface(multisurface_file, use_arrow):
|
|
1767
|
+
if use_arrow:
|
|
1768
|
+
# TODO: revisit once https://github.com/geopandas/pyogrio/issues/478
|
|
1769
|
+
# is resolved.
|
|
1770
|
+
pytest.skip("Shapely + GEOS 3.13 crashes in from_wkb for this case")
|
|
1771
|
+
|
|
1772
|
+
with pytest.raises(shapely.errors.GEOSException):
|
|
1773
|
+
# TODO(Arrow)
|
|
1774
|
+
# shapely fails parsing the WKB
|
|
1775
|
+
read_dataframe(multisurface_file, use_arrow=True)
|
|
1776
|
+
else:
|
|
1777
|
+
df = read_dataframe(multisurface_file)
|
|
1778
|
+
|
|
1779
|
+
# MultiSurface should be converted to MultiPolygon
|
|
1780
|
+
assert df.geometry.type.tolist() == ["MultiPolygon"]
|
|
1781
|
+
|
|
1782
|
+
|
|
1783
|
+
def test_read_dataset_kwargs(nested_geojson_file, use_arrow):
|
|
1784
|
+
# by default, nested data are not flattened
|
|
1785
|
+
df = read_dataframe(nested_geojson_file, use_arrow=use_arrow)
|
|
1786
|
+
|
|
1787
|
+
expected = gp.GeoDataFrame(
|
|
1788
|
+
{
|
|
1789
|
+
"top_level": ["A"],
|
|
1790
|
+
"intermediate_level": ['{ "bottom_level": "B" }'],
|
|
1791
|
+
},
|
|
1792
|
+
geometry=[shapely.Point(0, 0)],
|
|
1793
|
+
crs="EPSG:4326",
|
|
1794
|
+
)
|
|
1795
|
+
|
|
1796
|
+
assert_geodataframe_equal(df, expected)
|
|
1797
|
+
|
|
1798
|
+
df = read_dataframe(
|
|
1799
|
+
nested_geojson_file, use_arrow=use_arrow, FLATTEN_NESTED_ATTRIBUTES="YES"
|
|
1800
|
+
)
|
|
1801
|
+
|
|
1802
|
+
expected = gp.GeoDataFrame(
|
|
1803
|
+
{
|
|
1804
|
+
"top_level": ["A"],
|
|
1805
|
+
"intermediate_level_bottom_level": ["B"],
|
|
1806
|
+
},
|
|
1807
|
+
geometry=[shapely.Point(0, 0)],
|
|
1808
|
+
crs="EPSG:4326",
|
|
1809
|
+
)
|
|
1810
|
+
|
|
1811
|
+
assert_geodataframe_equal(df, expected)
|
|
1812
|
+
|
|
1813
|
+
|
|
1814
|
+
def test_read_invalid_dataset_kwargs(naturalearth_lowres, use_arrow):
|
|
1815
|
+
with pytest.warns(RuntimeWarning, match="does not support open option INVALID"):
|
|
1816
|
+
read_dataframe(naturalearth_lowres, use_arrow=use_arrow, INVALID="YES")
|
|
1817
|
+
|
|
1818
|
+
|
|
1819
|
+
@pytest.mark.requires_arrow_write_api
|
|
1820
|
+
def test_write_nullable_dtypes(tmp_path, use_arrow):
|
|
1821
|
+
path = tmp_path / "test_nullable_dtypes.gpkg"
|
|
1822
|
+
test_data = {
|
|
1823
|
+
"col1": pd.Series([1, 2, 3], dtype="int64"),
|
|
1824
|
+
"col2": pd.Series([1, 2, None], dtype="Int64"),
|
|
1825
|
+
"col3": pd.Series([0.1, None, 0.3], dtype="Float32"),
|
|
1826
|
+
"col4": pd.Series([True, False, None], dtype="boolean"),
|
|
1827
|
+
"col5": pd.Series(["a", None, "b"], dtype="string"),
|
|
1828
|
+
}
|
|
1829
|
+
input_gdf = gp.GeoDataFrame(
|
|
1830
|
+
test_data, geometry=[shapely.Point(0, 0)] * 3, crs="epsg:31370"
|
|
1831
|
+
)
|
|
1832
|
+
write_dataframe(input_gdf, path, use_arrow=use_arrow)
|
|
1833
|
+
output_gdf = read_dataframe(path)
|
|
1834
|
+
# We read it back as default (non-nullable) numpy dtypes, so we cast
|
|
1835
|
+
# to those for the expected result
|
|
1836
|
+
expected = input_gdf.copy()
|
|
1837
|
+
expected["col2"] = expected["col2"].astype("float64")
|
|
1838
|
+
expected["col3"] = expected["col3"].astype("float32")
|
|
1839
|
+
expected["col4"] = expected["col4"].astype("float64")
|
|
1840
|
+
expected["col5"] = expected["col5"].astype(object)
|
|
1841
|
+
expected.loc[1, "col5"] = None # pandas converts to pd.NA on line above
|
|
1842
|
+
assert_geodataframe_equal(output_gdf, expected)
|
|
1843
|
+
|
|
1844
|
+
|
|
1845
|
+
@pytest.mark.parametrize(
|
|
1846
|
+
"metadata_type", ["dataset_metadata", "layer_metadata", "metadata"]
|
|
1847
|
+
)
|
|
1848
|
+
@pytest.mark.requires_arrow_write_api
|
|
1849
|
+
def test_metadata_io(tmp_path, naturalearth_lowres, metadata_type, use_arrow):
|
|
1850
|
+
metadata = {"level": metadata_type}
|
|
1851
|
+
|
|
1852
|
+
df = read_dataframe(naturalearth_lowres)
|
|
1853
|
+
|
|
1854
|
+
filename = tmp_path / "test.gpkg"
|
|
1855
|
+
write_dataframe(df, filename, use_arrow=use_arrow, **{metadata_type: metadata})
|
|
1856
|
+
|
|
1857
|
+
metadata_key = "layer_metadata" if metadata_type == "metadata" else metadata_type
|
|
1858
|
+
|
|
1859
|
+
assert read_info(filename)[metadata_key] == metadata
|
|
1860
|
+
|
|
1861
|
+
|
|
1862
|
+
@pytest.mark.parametrize("metadata_type", ["dataset_metadata", "layer_metadata"])
|
|
1863
|
+
@pytest.mark.parametrize(
|
|
1864
|
+
"metadata",
|
|
1865
|
+
[
|
|
1866
|
+
{1: 2},
|
|
1867
|
+
{"key": None},
|
|
1868
|
+
{"key": 1},
|
|
1869
|
+
],
|
|
1870
|
+
)
|
|
1871
|
+
@pytest.mark.requires_arrow_write_api
|
|
1872
|
+
def test_invalid_metadata(
|
|
1873
|
+
tmp_path, naturalearth_lowres, metadata_type, metadata, use_arrow
|
|
1874
|
+
):
|
|
1875
|
+
df = read_dataframe(naturalearth_lowres)
|
|
1876
|
+
with pytest.raises(ValueError, match="must be a string"):
|
|
1877
|
+
write_dataframe(
|
|
1878
|
+
df, tmp_path / "test.gpkg", use_arrow=use_arrow, **{metadata_type: metadata}
|
|
1879
|
+
)
|
|
1880
|
+
|
|
1881
|
+
|
|
1882
|
+
@pytest.mark.parametrize("metadata_type", ["dataset_metadata", "layer_metadata"])
|
|
1883
|
+
@pytest.mark.requires_arrow_write_api
|
|
1884
|
+
def test_metadata_unsupported(tmp_path, naturalearth_lowres, metadata_type, use_arrow):
|
|
1885
|
+
"""metadata is silently ignored"""
|
|
1886
|
+
|
|
1887
|
+
filename = tmp_path / "test.geojson"
|
|
1888
|
+
write_dataframe(
|
|
1889
|
+
read_dataframe(naturalearth_lowres),
|
|
1890
|
+
filename,
|
|
1891
|
+
use_arrow=use_arrow,
|
|
1892
|
+
**{metadata_type: {"key": "value"}},
|
|
1893
|
+
)
|
|
1894
|
+
|
|
1895
|
+
metadata_key = "layer_metadata" if metadata_type == "metadata" else metadata_type
|
|
1896
|
+
|
|
1897
|
+
assert read_info(filename)[metadata_key] is None
|
|
1898
|
+
|
|
1899
|
+
|
|
1900
|
+
@pytest.mark.skipif(not PANDAS_GE_15, reason="ArrowDtype requires pandas 1.5+")
|
|
1901
|
+
def test_read_dataframe_arrow_dtypes(tmp_path):
|
|
1902
|
+
# https://github.com/geopandas/pyogrio/issues/319 - ensure arrow binary
|
|
1903
|
+
# column can be converted with from_wkb in case of missing values
|
|
1904
|
+
pytest.importorskip("pyarrow")
|
|
1905
|
+
filename = tmp_path / "test.gpkg"
|
|
1906
|
+
df = gp.GeoDataFrame(
|
|
1907
|
+
{"col": [1.0, 2.0]}, geometry=[Point(1, 1), None], crs="EPSG:4326"
|
|
1908
|
+
)
|
|
1909
|
+
write_dataframe(df, filename)
|
|
1910
|
+
|
|
1911
|
+
result = read_dataframe(
|
|
1912
|
+
filename,
|
|
1913
|
+
use_arrow=True,
|
|
1914
|
+
arrow_to_pandas_kwargs={
|
|
1915
|
+
"types_mapper": lambda pa_dtype: pd.ArrowDtype(pa_dtype)
|
|
1916
|
+
},
|
|
1917
|
+
)
|
|
1918
|
+
assert isinstance(result["col"].dtype, pd.ArrowDtype)
|
|
1919
|
+
result["col"] = result["col"].astype("float64")
|
|
1920
|
+
assert_geodataframe_equal(result, df)
|
|
1921
|
+
|
|
1922
|
+
|
|
1923
|
+
@requires_pyarrow_api
|
|
1924
|
+
@pytest.mark.skipif(
|
|
1925
|
+
__gdal_version__ < (3, 8, 3), reason="Arrow bool value bug fixed in GDAL >= 3.8.3"
|
|
1926
|
+
)
|
|
1927
|
+
@pytest.mark.parametrize("ext", ALL_EXTS)
|
|
1928
|
+
def test_arrow_bool_roundtrip(tmp_path, ext):
|
|
1929
|
+
filename = tmp_path / f"test{ext}"
|
|
1930
|
+
|
|
1931
|
+
kwargs = {}
|
|
1932
|
+
|
|
1933
|
+
if ext == ".fgb":
|
|
1934
|
+
# For .fgb, spatial_index=False to avoid the rows being reordered
|
|
1935
|
+
kwargs["spatial_index"] = False
|
|
1936
|
+
|
|
1937
|
+
df = gp.GeoDataFrame(
|
|
1938
|
+
{"bool_col": [True, False, True, False, True], "geometry": [Point(0, 0)] * 5},
|
|
1939
|
+
crs="EPSG:4326",
|
|
1940
|
+
)
|
|
1941
|
+
|
|
1942
|
+
write_dataframe(df, filename, **kwargs)
|
|
1943
|
+
result = read_dataframe(filename, use_arrow=True)
|
|
1944
|
+
# Shapefiles do not support bool columns; these are returned as int32
|
|
1945
|
+
assert_geodataframe_equal(result, df, check_dtype=ext != ".shp")
|
|
1946
|
+
|
|
1947
|
+
|
|
1948
|
+
@requires_pyarrow_api
|
|
1949
|
+
@pytest.mark.skipif(
|
|
1950
|
+
__gdal_version__ >= (3, 8, 3), reason="Arrow bool value bug fixed in GDAL >= 3.8.3"
|
|
1951
|
+
)
|
|
1952
|
+
@pytest.mark.parametrize("ext", ALL_EXTS)
|
|
1953
|
+
def test_arrow_bool_exception(tmp_path, ext):
|
|
1954
|
+
filename = tmp_path / f"test{ext}"
|
|
1955
|
+
|
|
1956
|
+
df = gp.GeoDataFrame(
|
|
1957
|
+
{"bool_col": [True, False, True, False, True], "geometry": [Point(0, 0)] * 5},
|
|
1958
|
+
crs="EPSG:4326",
|
|
1959
|
+
)
|
|
1960
|
+
|
|
1961
|
+
write_dataframe(df, filename)
|
|
1962
|
+
|
|
1963
|
+
if ext in {".fgb", ".gpkg"}:
|
|
1964
|
+
# only raise exception for GPKG / FGB
|
|
1965
|
+
with pytest.raises(
|
|
1966
|
+
RuntimeError,
|
|
1967
|
+
match="GDAL < 3.8.3 does not correctly read boolean data values using "
|
|
1968
|
+
"the Arrow API",
|
|
1969
|
+
):
|
|
1970
|
+
read_dataframe(filename, use_arrow=True)
|
|
1971
|
+
|
|
1972
|
+
# do not raise exception if no bool columns are read
|
|
1973
|
+
read_dataframe(filename, use_arrow=True, columns=[])
|
|
1974
|
+
|
|
1975
|
+
else:
|
|
1976
|
+
_ = read_dataframe(filename, use_arrow=True)
|
|
1977
|
+
|
|
1978
|
+
|
|
1979
|
+
@pytest.mark.filterwarnings("ignore:File /vsimem:RuntimeWarning")
|
|
1980
|
+
@pytest.mark.parametrize("driver", ["GeoJSON", "GPKG"])
|
|
1981
|
+
def test_write_memory(naturalearth_lowres, driver):
|
|
1982
|
+
df = read_dataframe(naturalearth_lowres)
|
|
1983
|
+
|
|
1984
|
+
buffer = BytesIO()
|
|
1985
|
+
write_dataframe(df, buffer, driver=driver, layer="test")
|
|
1986
|
+
|
|
1987
|
+
assert len(buffer.getbuffer()) > 0
|
|
1988
|
+
|
|
1989
|
+
actual = read_dataframe(buffer)
|
|
1990
|
+
assert len(actual) == len(df)
|
|
1991
|
+
|
|
1992
|
+
is_json = driver == "GeoJSON"
|
|
1993
|
+
|
|
1994
|
+
assert_geodataframe_equal(
|
|
1995
|
+
actual,
|
|
1996
|
+
df,
|
|
1997
|
+
check_less_precise=is_json,
|
|
1998
|
+
check_index_type=False,
|
|
1999
|
+
check_dtype=not is_json,
|
|
2000
|
+
)
|
|
2001
|
+
|
|
2002
|
+
# Check temp file was cleaned up. Filter, as gdal keeps cache files in /vsimem/.
|
|
2003
|
+
assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == []
|
|
2004
|
+
|
|
2005
|
+
|
|
2006
|
+
def test_write_memory_driver_required(naturalearth_lowres):
|
|
2007
|
+
df = read_dataframe(naturalearth_lowres)
|
|
2008
|
+
|
|
2009
|
+
buffer = BytesIO()
|
|
2010
|
+
|
|
2011
|
+
with pytest.raises(
|
|
2012
|
+
ValueError,
|
|
2013
|
+
match="driver must be provided to write to in-memory file",
|
|
2014
|
+
):
|
|
2015
|
+
write_dataframe(df.head(1), buffer, driver=None, layer="test")
|
|
2016
|
+
|
|
2017
|
+
# Check temp file was cleaned up. Filter, as gdal keeps cache files in /vsimem/.
|
|
2018
|
+
assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == []
|
|
2019
|
+
|
|
2020
|
+
|
|
2021
|
+
@pytest.mark.parametrize("driver", ["ESRI Shapefile", "OpenFileGDB"])
|
|
2022
|
+
def test_write_memory_unsupported_driver(naturalearth_lowres, driver):
|
|
2023
|
+
if driver == "OpenFileGDB" and __gdal_version__ < (3, 6, 0):
|
|
2024
|
+
pytest.skip("OpenFileGDB write support only available for GDAL >= 3.6.0")
|
|
2025
|
+
|
|
2026
|
+
df = read_dataframe(naturalearth_lowres)
|
|
2027
|
+
|
|
2028
|
+
buffer = BytesIO()
|
|
2029
|
+
|
|
2030
|
+
with pytest.raises(
|
|
2031
|
+
ValueError, match=f"writing to in-memory file is not supported for {driver}"
|
|
2032
|
+
):
|
|
2033
|
+
write_dataframe(df, buffer, driver=driver, layer="test")
|
|
2034
|
+
|
|
2035
|
+
# Check temp file was cleaned up. Filter, as gdal keeps cache files in /vsimem/.
|
|
2036
|
+
assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == []
|
|
2037
|
+
|
|
2038
|
+
|
|
2039
|
+
@pytest.mark.parametrize("driver", ["GeoJSON", "GPKG"])
|
|
2040
|
+
def test_write_memory_append_unsupported(naturalearth_lowres, driver):
|
|
2041
|
+
df = read_dataframe(naturalearth_lowres)
|
|
2042
|
+
|
|
2043
|
+
buffer = BytesIO()
|
|
2044
|
+
|
|
2045
|
+
with pytest.raises(
|
|
2046
|
+
NotImplementedError, match="append is not supported for in-memory files"
|
|
2047
|
+
):
|
|
2048
|
+
write_dataframe(df.head(1), buffer, driver=driver, layer="test", append=True)
|
|
2049
|
+
|
|
2050
|
+
# Check temp file was cleaned up. Filter, as gdal keeps cache files in /vsimem/.
|
|
2051
|
+
assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == []
|
|
2052
|
+
|
|
2053
|
+
|
|
2054
|
+
def test_write_memory_existing_unsupported(naturalearth_lowres):
|
|
2055
|
+
df = read_dataframe(naturalearth_lowres)
|
|
2056
|
+
|
|
2057
|
+
buffer = BytesIO(b"0000")
|
|
2058
|
+
with pytest.raises(
|
|
2059
|
+
NotImplementedError,
|
|
2060
|
+
match="writing to existing in-memory object is not supported",
|
|
2061
|
+
):
|
|
2062
|
+
write_dataframe(df.head(1), buffer, driver="GeoJSON", layer="test")
|
|
2063
|
+
|
|
2064
|
+
# Check temp file was cleaned up. Filter, as gdal keeps cache files in /vsimem/.
|
|
2065
|
+
assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == []
|
|
2066
|
+
|
|
2067
|
+
|
|
2068
|
+
def test_write_open_file_handle(tmp_path, naturalearth_lowres):
|
|
2069
|
+
"""Verify that writing to an open file handle is not currently supported"""
|
|
2070
|
+
|
|
2071
|
+
df = read_dataframe(naturalearth_lowres)
|
|
2072
|
+
|
|
2073
|
+
# verify it fails for regular file handle
|
|
2074
|
+
with pytest.raises(
|
|
2075
|
+
NotImplementedError, match="writing to an open file handle is not yet supported"
|
|
2076
|
+
):
|
|
2077
|
+
with open(tmp_path / "test.geojson", "wb") as f:
|
|
2078
|
+
write_dataframe(df.head(1), f)
|
|
2079
|
+
|
|
2080
|
+
# verify it fails for ZipFile
|
|
2081
|
+
with pytest.raises(
|
|
2082
|
+
NotImplementedError, match="writing to an open file handle is not yet supported"
|
|
2083
|
+
):
|
|
2084
|
+
with ZipFile(tmp_path / "test.geojson.zip", "w") as z:
|
|
2085
|
+
with z.open("test.geojson", "w") as f:
|
|
2086
|
+
write_dataframe(df.head(1), f)
|
|
2087
|
+
|
|
2088
|
+
# Check temp file was cleaned up. Filter, as gdal keeps cache files in /vsimem/.
|
|
2089
|
+
assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == []
|
|
2090
|
+
|
|
2091
|
+
|
|
2092
|
+
@pytest.mark.parametrize("ext", ["gpkg", "geojson"])
|
|
2093
|
+
def test_non_utf8_encoding_io(tmp_path, ext, encoded_text):
|
|
2094
|
+
"""Verify that we write non-UTF data to the data source
|
|
2095
|
+
|
|
2096
|
+
IMPORTANT: this may not be valid for the data source and will likely render
|
|
2097
|
+
them unusable in other tools, but should successfully roundtrip unless we
|
|
2098
|
+
disable writing using other encodings.
|
|
2099
|
+
|
|
2100
|
+
NOTE: FlatGeobuff driver cannot handle non-UTF data in GDAL >= 3.9
|
|
2101
|
+
|
|
2102
|
+
NOTE: pyarrow cannot handle non-UTF-8 characters in this way
|
|
2103
|
+
"""
|
|
2104
|
+
|
|
2105
|
+
encoding, text = encoded_text
|
|
2106
|
+
output_path = tmp_path / f"test.{ext}"
|
|
2107
|
+
|
|
2108
|
+
df = gp.GeoDataFrame({text: [text], "geometry": [Point(0, 0)]}, crs="EPSG:4326")
|
|
2109
|
+
write_dataframe(df, output_path, encoding=encoding)
|
|
2110
|
+
|
|
2111
|
+
# cannot open these files without specifying encoding
|
|
2112
|
+
with pytest.raises(UnicodeDecodeError):
|
|
2113
|
+
read_dataframe(output_path)
|
|
2114
|
+
|
|
2115
|
+
# must provide encoding to read these properly
|
|
2116
|
+
actual = read_dataframe(output_path, encoding=encoding)
|
|
2117
|
+
assert actual.columns[0] == text
|
|
2118
|
+
assert actual[text].values[0] == text
|
|
2119
|
+
|
|
2120
|
+
|
|
2121
|
+
@requires_pyarrow_api
|
|
2122
|
+
@pytest.mark.parametrize("ext", ["gpkg", "geojson"])
|
|
2123
|
+
def test_non_utf8_encoding_io_arrow_exception(tmp_path, ext, encoded_text):
|
|
2124
|
+
encoding, text = encoded_text
|
|
2125
|
+
output_path = tmp_path / f"test.{ext}"
|
|
2126
|
+
|
|
2127
|
+
df = gp.GeoDataFrame({text: [text], "geometry": [Point(0, 0)]}, crs="EPSG:4326")
|
|
2128
|
+
write_dataframe(df, output_path, encoding=encoding)
|
|
2129
|
+
|
|
2130
|
+
# cannot open these files without specifying encoding
|
|
2131
|
+
with pytest.raises(UnicodeDecodeError):
|
|
2132
|
+
read_dataframe(output_path)
|
|
2133
|
+
|
|
2134
|
+
with pytest.raises(
|
|
2135
|
+
ValueError, match="non-UTF-8 encoding is not supported for Arrow"
|
|
2136
|
+
):
|
|
2137
|
+
read_dataframe(output_path, encoding=encoding, use_arrow=True)
|
|
2138
|
+
|
|
2139
|
+
|
|
2140
|
+
def test_non_utf8_encoding_io_shapefile(tmp_path, encoded_text, use_arrow):
|
|
2141
|
+
encoding, text = encoded_text
|
|
2142
|
+
|
|
2143
|
+
output_path = tmp_path / "test.shp"
|
|
2144
|
+
|
|
2145
|
+
df = gp.GeoDataFrame({text: [text], "geometry": [Point(0, 0)]}, crs="EPSG:4326")
|
|
2146
|
+
write_dataframe(df, output_path, encoding=encoding)
|
|
2147
|
+
|
|
2148
|
+
# NOTE: GDAL automatically creates a cpg file with the encoding name, which
|
|
2149
|
+
# means that if we read this without specifying the encoding it uses the
|
|
2150
|
+
# correct one
|
|
2151
|
+
actual = read_dataframe(output_path, use_arrow=use_arrow)
|
|
2152
|
+
assert actual.columns[0] == text
|
|
2153
|
+
assert actual[text].values[0] == text
|
|
2154
|
+
|
|
2155
|
+
# verify that if cpg file is not present, that user-provided encoding must be used
|
|
2156
|
+
output_path.with_suffix(".cpg").unlink()
|
|
2157
|
+
|
|
2158
|
+
# We will assume ISO-8859-1, which is wrong
|
|
2159
|
+
miscoded = text.encode(encoding).decode("ISO-8859-1")
|
|
2160
|
+
|
|
2161
|
+
if use_arrow:
|
|
2162
|
+
# pyarrow cannot decode column name with incorrect encoding
|
|
2163
|
+
with pytest.raises(UnicodeDecodeError):
|
|
2164
|
+
read_dataframe(output_path, use_arrow=True)
|
|
2165
|
+
else:
|
|
2166
|
+
bad = read_dataframe(output_path, use_arrow=False)
|
|
2167
|
+
assert bad.columns[0] == miscoded
|
|
2168
|
+
assert bad[miscoded].values[0] == miscoded
|
|
2169
|
+
|
|
2170
|
+
# If encoding is provided, that should yield correct text
|
|
2171
|
+
actual = read_dataframe(output_path, encoding=encoding, use_arrow=use_arrow)
|
|
2172
|
+
assert actual.columns[0] == text
|
|
2173
|
+
assert actual[text].values[0] == text
|
|
2174
|
+
|
|
2175
|
+
# if ENCODING open option, that should yield correct text
|
|
2176
|
+
actual = read_dataframe(output_path, use_arrow=use_arrow, ENCODING=encoding)
|
|
2177
|
+
assert actual.columns[0] == text
|
|
2178
|
+
assert actual[text].values[0] == text
|
|
2179
|
+
|
|
2180
|
+
|
|
2181
|
+
def test_encoding_read_option_collision_shapefile(naturalearth_lowres, use_arrow):
|
|
2182
|
+
"""Providing both encoding parameter and ENCODING open option
|
|
2183
|
+
(even if blank) is not allowed."""
|
|
2184
|
+
|
|
2185
|
+
with pytest.raises(
|
|
2186
|
+
ValueError, match='cannot provide both encoding parameter and "ENCODING" option'
|
|
2187
|
+
):
|
|
2188
|
+
read_dataframe(
|
|
2189
|
+
naturalearth_lowres, encoding="CP936", ENCODING="", use_arrow=use_arrow
|
|
2190
|
+
)
|
|
2191
|
+
|
|
2192
|
+
|
|
2193
|
+
def test_encoding_write_layer_option_collision_shapefile(tmp_path, encoded_text):
|
|
2194
|
+
"""Providing both encoding parameter and ENCODING layer creation option
|
|
2195
|
+
(even if blank) is not allowed."""
|
|
2196
|
+
encoding, text = encoded_text
|
|
2197
|
+
|
|
2198
|
+
output_path = tmp_path / "test.shp"
|
|
2199
|
+
df = gp.GeoDataFrame({text: [text], "geometry": [Point(0, 0)]}, crs="EPSG:4326")
|
|
2200
|
+
|
|
2201
|
+
with pytest.raises(
|
|
2202
|
+
ValueError,
|
|
2203
|
+
match=(
|
|
2204
|
+
'cannot provide both encoding parameter and "ENCODING" layer creation '
|
|
2205
|
+
"option"
|
|
2206
|
+
),
|
|
2207
|
+
):
|
|
2208
|
+
write_dataframe(
|
|
2209
|
+
df, output_path, encoding=encoding, layer_options={"ENCODING": ""}
|
|
2210
|
+
)
|
|
2211
|
+
|
|
2212
|
+
|
|
2213
|
+
def test_non_utf8_encoding_shapefile_sql(tmp_path, use_arrow):
|
|
2214
|
+
encoding = "CP936"
|
|
2215
|
+
|
|
2216
|
+
output_path = tmp_path / "test.shp"
|
|
2217
|
+
|
|
2218
|
+
mandarin = "中文"
|
|
2219
|
+
df = gp.GeoDataFrame(
|
|
2220
|
+
{mandarin: mandarin, "geometry": [Point(0, 0)]}, crs="EPSG:4326"
|
|
2221
|
+
)
|
|
2222
|
+
write_dataframe(df, output_path, encoding=encoding)
|
|
2223
|
+
|
|
2224
|
+
actual = read_dataframe(
|
|
2225
|
+
output_path,
|
|
2226
|
+
sql=f"select * from test where \"{mandarin}\" = '{mandarin}'",
|
|
2227
|
+
use_arrow=use_arrow,
|
|
2228
|
+
)
|
|
2229
|
+
assert actual.columns[0] == mandarin
|
|
2230
|
+
assert actual[mandarin].values[0] == mandarin
|
|
2231
|
+
|
|
2232
|
+
actual = read_dataframe(
|
|
2233
|
+
output_path,
|
|
2234
|
+
sql=f"select * from test where \"{mandarin}\" = '{mandarin}'",
|
|
2235
|
+
encoding=encoding,
|
|
2236
|
+
use_arrow=use_arrow,
|
|
2237
|
+
)
|
|
2238
|
+
assert actual.columns[0] == mandarin
|
|
2239
|
+
assert actual[mandarin].values[0] == mandarin
|
|
2240
|
+
|
|
2241
|
+
|
|
2242
|
+
@pytest.mark.requires_arrow_write_api
|
|
2243
|
+
def test_write_kml_file_coordinate_order(tmp_path, use_arrow):
|
|
2244
|
+
# confirm KML coordinates are written in lon, lat order even if CRS axis
|
|
2245
|
+
# specifies otherwise
|
|
2246
|
+
points = [Point(10, 20), Point(30, 40), Point(50, 60)]
|
|
2247
|
+
gdf = gp.GeoDataFrame(geometry=points, crs="EPSG:4326")
|
|
2248
|
+
output_path = tmp_path / "test.kml"
|
|
2249
|
+
write_dataframe(
|
|
2250
|
+
gdf, output_path, layer="tmp_layer", driver="KML", use_arrow=use_arrow
|
|
2251
|
+
)
|
|
2252
|
+
|
|
2253
|
+
gdf_in = read_dataframe(output_path, use_arrow=use_arrow)
|
|
2254
|
+
|
|
2255
|
+
assert np.array_equal(gdf_in.geometry.values, points)
|
|
2256
|
+
|
|
2257
|
+
if "LIBKML" in list_drivers():
|
|
2258
|
+
# test appending to the existing file only if LIBKML is available
|
|
2259
|
+
# as it appears to fall back on LIBKML driver when appending.
|
|
2260
|
+
points_append = [Point(70, 80), Point(90, 100), Point(110, 120)]
|
|
2261
|
+
gdf_append = gp.GeoDataFrame(geometry=points_append, crs="EPSG:4326")
|
|
2262
|
+
|
|
2263
|
+
write_dataframe(
|
|
2264
|
+
gdf_append,
|
|
2265
|
+
output_path,
|
|
2266
|
+
layer="tmp_layer",
|
|
2267
|
+
driver="KML",
|
|
2268
|
+
use_arrow=use_arrow,
|
|
2269
|
+
append=True,
|
|
2270
|
+
)
|
|
2271
|
+
# force_2d used to only compare xy geometry as z-dimension is undesirably
|
|
2272
|
+
# introduced when the kml file is over-written.
|
|
2273
|
+
gdf_in_appended = read_dataframe(
|
|
2274
|
+
output_path, use_arrow=use_arrow, force_2d=True
|
|
2275
|
+
)
|
|
2276
|
+
|
|
2277
|
+
assert np.array_equal(gdf_in_appended.geometry.values, points + points_append)
|
|
2278
|
+
|
|
2279
|
+
|
|
2280
|
+
@pytest.mark.requires_arrow_write_api
|
|
2281
|
+
def test_write_geojson_rfc7946_coordinates(tmp_path, use_arrow):
|
|
2282
|
+
points = [Point(10, 20), Point(30, 40), Point(50, 60)]
|
|
2283
|
+
gdf = gp.GeoDataFrame(geometry=points, crs="EPSG:4326")
|
|
2284
|
+
output_path = tmp_path / "test.geojson"
|
|
2285
|
+
write_dataframe(
|
|
2286
|
+
gdf,
|
|
2287
|
+
output_path,
|
|
2288
|
+
layer="tmp_layer",
|
|
2289
|
+
driver="GeoJSON",
|
|
2290
|
+
RFC7946=True,
|
|
2291
|
+
use_arrow=use_arrow,
|
|
2292
|
+
)
|
|
2293
|
+
|
|
2294
|
+
gdf_in = read_dataframe(output_path, use_arrow=use_arrow)
|
|
2295
|
+
|
|
2296
|
+
assert np.array_equal(gdf_in.geometry.values, points)
|
|
2297
|
+
|
|
2298
|
+
# test appending to the existing file
|
|
2299
|
+
|
|
2300
|
+
points_append = [Point(70, 80), Point(90, 100), Point(110, 120)]
|
|
2301
|
+
gdf_append = gp.GeoDataFrame(geometry=points_append, crs="EPSG:4326")
|
|
2302
|
+
|
|
2303
|
+
write_dataframe(
|
|
2304
|
+
gdf_append,
|
|
2305
|
+
output_path,
|
|
2306
|
+
layer="tmp_layer",
|
|
2307
|
+
driver="GeoJSON",
|
|
2308
|
+
RFC7946=True,
|
|
2309
|
+
use_arrow=use_arrow,
|
|
2310
|
+
append=True,
|
|
2311
|
+
)
|
|
2312
|
+
|
|
2313
|
+
gdf_in_appended = read_dataframe(output_path, use_arrow=use_arrow)
|
|
2314
|
+
assert np.array_equal(gdf_in_appended.geometry.values, points + points_append)
|