pyogrio 0.12.0__cp314-cp314t-macosx_12_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. pyogrio/.dylibs/libgdal.37.3.11.4.dylib +0 -0
  2. pyogrio/__init__.py +57 -0
  3. pyogrio/_compat.py +54 -0
  4. pyogrio/_env.py +59 -0
  5. pyogrio/_err.cpython-314t-darwin.so +0 -0
  6. pyogrio/_geometry.cpython-314t-darwin.so +0 -0
  7. pyogrio/_io.cpython-314t-darwin.so +0 -0
  8. pyogrio/_ogr.cpython-314t-darwin.so +0 -0
  9. pyogrio/_version.py +21 -0
  10. pyogrio/_vsi.cpython-314t-darwin.so +0 -0
  11. pyogrio/core.py +387 -0
  12. pyogrio/errors.py +25 -0
  13. pyogrio/gdal_data/GDAL-targets-release.cmake +19 -0
  14. pyogrio/gdal_data/GDAL-targets.cmake +106 -0
  15. pyogrio/gdal_data/GDALConfig.cmake +24 -0
  16. pyogrio/gdal_data/GDALConfigVersion.cmake +65 -0
  17. pyogrio/gdal_data/GDALLogoBW.svg +138 -0
  18. pyogrio/gdal_data/GDALLogoColor.svg +126 -0
  19. pyogrio/gdal_data/GDALLogoGS.svg +126 -0
  20. pyogrio/gdal_data/LICENSE.TXT +467 -0
  21. pyogrio/gdal_data/MM_m_idofic.csv +321 -0
  22. pyogrio/gdal_data/copyright +467 -0
  23. pyogrio/gdal_data/cubewerx_extra.wkt +48 -0
  24. pyogrio/gdal_data/default.rsc +0 -0
  25. pyogrio/gdal_data/ecw_cs.wkt +1453 -0
  26. pyogrio/gdal_data/eedaconf.json +23 -0
  27. pyogrio/gdal_data/epsg.wkt +1 -0
  28. pyogrio/gdal_data/esri_StatePlane_extra.wkt +631 -0
  29. pyogrio/gdal_data/gdal_algorithm.schema.json +220 -0
  30. pyogrio/gdal_data/gdalg.schema.json +36 -0
  31. pyogrio/gdal_data/gdalicon.png +0 -0
  32. pyogrio/gdal_data/gdalinfo_output.schema.json +390 -0
  33. pyogrio/gdal_data/gdalmdiminfo_output.schema.json +326 -0
  34. pyogrio/gdal_data/gdaltileindex.xsd +253 -0
  35. pyogrio/gdal_data/gdalvrt.xsd +927 -0
  36. pyogrio/gdal_data/gfs.xsd +246 -0
  37. pyogrio/gdal_data/gml_registry.xml +117 -0
  38. pyogrio/gdal_data/gml_registry.xsd +66 -0
  39. pyogrio/gdal_data/grib2_center.csv +251 -0
  40. pyogrio/gdal_data/grib2_process.csv +102 -0
  41. pyogrio/gdal_data/grib2_subcenter.csv +63 -0
  42. pyogrio/gdal_data/grib2_table_4_2_0_0.csv +261 -0
  43. pyogrio/gdal_data/grib2_table_4_2_0_1.csv +261 -0
  44. pyogrio/gdal_data/grib2_table_4_2_0_13.csv +261 -0
  45. pyogrio/gdal_data/grib2_table_4_2_0_14.csv +261 -0
  46. pyogrio/gdal_data/grib2_table_4_2_0_15.csv +261 -0
  47. pyogrio/gdal_data/grib2_table_4_2_0_16.csv +261 -0
  48. pyogrio/gdal_data/grib2_table_4_2_0_17.csv +11 -0
  49. pyogrio/gdal_data/grib2_table_4_2_0_18.csv +261 -0
  50. pyogrio/gdal_data/grib2_table_4_2_0_19.csv +261 -0
  51. pyogrio/gdal_data/grib2_table_4_2_0_190.csv +261 -0
  52. pyogrio/gdal_data/grib2_table_4_2_0_191.csv +261 -0
  53. pyogrio/gdal_data/grib2_table_4_2_0_2.csv +261 -0
  54. pyogrio/gdal_data/grib2_table_4_2_0_20.csv +261 -0
  55. pyogrio/gdal_data/grib2_table_4_2_0_21.csv +261 -0
  56. pyogrio/gdal_data/grib2_table_4_2_0_3.csv +261 -0
  57. pyogrio/gdal_data/grib2_table_4_2_0_4.csv +261 -0
  58. pyogrio/gdal_data/grib2_table_4_2_0_5.csv +261 -0
  59. pyogrio/gdal_data/grib2_table_4_2_0_6.csv +261 -0
  60. pyogrio/gdal_data/grib2_table_4_2_0_7.csv +261 -0
  61. pyogrio/gdal_data/grib2_table_4_2_10_0.csv +261 -0
  62. pyogrio/gdal_data/grib2_table_4_2_10_1.csv +261 -0
  63. pyogrio/gdal_data/grib2_table_4_2_10_191.csv +261 -0
  64. pyogrio/gdal_data/grib2_table_4_2_10_2.csv +261 -0
  65. pyogrio/gdal_data/grib2_table_4_2_10_3.csv +261 -0
  66. pyogrio/gdal_data/grib2_table_4_2_10_4.csv +261 -0
  67. pyogrio/gdal_data/grib2_table_4_2_1_0.csv +261 -0
  68. pyogrio/gdal_data/grib2_table_4_2_1_1.csv +261 -0
  69. pyogrio/gdal_data/grib2_table_4_2_1_2.csv +261 -0
  70. pyogrio/gdal_data/grib2_table_4_2_20_0.csv +261 -0
  71. pyogrio/gdal_data/grib2_table_4_2_20_1.csv +261 -0
  72. pyogrio/gdal_data/grib2_table_4_2_20_2.csv +261 -0
  73. pyogrio/gdal_data/grib2_table_4_2_2_0.csv +261 -0
  74. pyogrio/gdal_data/grib2_table_4_2_2_3.csv +261 -0
  75. pyogrio/gdal_data/grib2_table_4_2_2_4.csv +261 -0
  76. pyogrio/gdal_data/grib2_table_4_2_2_5.csv +261 -0
  77. pyogrio/gdal_data/grib2_table_4_2_2_6.csv +261 -0
  78. pyogrio/gdal_data/grib2_table_4_2_3_0.csv +261 -0
  79. pyogrio/gdal_data/grib2_table_4_2_3_1.csv +261 -0
  80. pyogrio/gdal_data/grib2_table_4_2_3_2.csv +28 -0
  81. pyogrio/gdal_data/grib2_table_4_2_3_3.csv +8 -0
  82. pyogrio/gdal_data/grib2_table_4_2_3_4.csv +14 -0
  83. pyogrio/gdal_data/grib2_table_4_2_3_5.csv +11 -0
  84. pyogrio/gdal_data/grib2_table_4_2_3_6.csv +11 -0
  85. pyogrio/gdal_data/grib2_table_4_2_4_0.csv +261 -0
  86. pyogrio/gdal_data/grib2_table_4_2_4_1.csv +261 -0
  87. pyogrio/gdal_data/grib2_table_4_2_4_10.csv +261 -0
  88. pyogrio/gdal_data/grib2_table_4_2_4_2.csv +261 -0
  89. pyogrio/gdal_data/grib2_table_4_2_4_3.csv +261 -0
  90. pyogrio/gdal_data/grib2_table_4_2_4_4.csv +261 -0
  91. pyogrio/gdal_data/grib2_table_4_2_4_5.csv +261 -0
  92. pyogrio/gdal_data/grib2_table_4_2_4_6.csv +261 -0
  93. pyogrio/gdal_data/grib2_table_4_2_4_7.csv +261 -0
  94. pyogrio/gdal_data/grib2_table_4_2_4_8.csv +261 -0
  95. pyogrio/gdal_data/grib2_table_4_2_4_9.csv +261 -0
  96. pyogrio/gdal_data/grib2_table_4_2_local_Canada.csv +5 -0
  97. pyogrio/gdal_data/grib2_table_4_2_local_HPC.csv +2 -0
  98. pyogrio/gdal_data/grib2_table_4_2_local_MRMS.csv +175 -0
  99. pyogrio/gdal_data/grib2_table_4_2_local_NCEP.csv +401 -0
  100. pyogrio/gdal_data/grib2_table_4_2_local_NDFD.csv +38 -0
  101. pyogrio/gdal_data/grib2_table_4_2_local_index.csv +7 -0
  102. pyogrio/gdal_data/grib2_table_4_5.csv +261 -0
  103. pyogrio/gdal_data/grib2_table_versions.csv +3 -0
  104. pyogrio/gdal_data/gt_datum.csv +229 -0
  105. pyogrio/gdal_data/gt_ellips.csv +24 -0
  106. pyogrio/gdal_data/header.dxf +1124 -0
  107. pyogrio/gdal_data/inspire_cp_BasicPropertyUnit.gfs +57 -0
  108. pyogrio/gdal_data/inspire_cp_CadastralBoundary.gfs +60 -0
  109. pyogrio/gdal_data/inspire_cp_CadastralParcel.gfs +81 -0
  110. pyogrio/gdal_data/inspire_cp_CadastralZoning.gfs +161 -0
  111. pyogrio/gdal_data/jpfgdgml_AdmArea.gfs +59 -0
  112. pyogrio/gdal_data/jpfgdgml_AdmBdry.gfs +49 -0
  113. pyogrio/gdal_data/jpfgdgml_AdmPt.gfs +59 -0
  114. pyogrio/gdal_data/jpfgdgml_BldA.gfs +54 -0
  115. pyogrio/gdal_data/jpfgdgml_BldL.gfs +54 -0
  116. pyogrio/gdal_data/jpfgdgml_Cntr.gfs +54 -0
  117. pyogrio/gdal_data/jpfgdgml_CommBdry.gfs +49 -0
  118. pyogrio/gdal_data/jpfgdgml_CommPt.gfs +59 -0
  119. pyogrio/gdal_data/jpfgdgml_Cstline.gfs +54 -0
  120. pyogrio/gdal_data/jpfgdgml_ElevPt.gfs +54 -0
  121. pyogrio/gdal_data/jpfgdgml_GCP.gfs +94 -0
  122. pyogrio/gdal_data/jpfgdgml_LeveeEdge.gfs +49 -0
  123. pyogrio/gdal_data/jpfgdgml_RailCL.gfs +54 -0
  124. pyogrio/gdal_data/jpfgdgml_RdASL.gfs +44 -0
  125. pyogrio/gdal_data/jpfgdgml_RdArea.gfs +54 -0
  126. pyogrio/gdal_data/jpfgdgml_RdCompt.gfs +59 -0
  127. pyogrio/gdal_data/jpfgdgml_RdEdg.gfs +59 -0
  128. pyogrio/gdal_data/jpfgdgml_RdMgtBdry.gfs +49 -0
  129. pyogrio/gdal_data/jpfgdgml_RdSgmtA.gfs +59 -0
  130. pyogrio/gdal_data/jpfgdgml_RvrMgtBdry.gfs +49 -0
  131. pyogrio/gdal_data/jpfgdgml_SBAPt.gfs +49 -0
  132. pyogrio/gdal_data/jpfgdgml_SBArea.gfs +54 -0
  133. pyogrio/gdal_data/jpfgdgml_SBBdry.gfs +44 -0
  134. pyogrio/gdal_data/jpfgdgml_WA.gfs +54 -0
  135. pyogrio/gdal_data/jpfgdgml_WL.gfs +54 -0
  136. pyogrio/gdal_data/jpfgdgml_WStrA.gfs +54 -0
  137. pyogrio/gdal_data/jpfgdgml_WStrL.gfs +54 -0
  138. pyogrio/gdal_data/leaflet_template.html +102 -0
  139. pyogrio/gdal_data/nitf_spec.xml +3288 -0
  140. pyogrio/gdal_data/nitf_spec.xsd +171 -0
  141. pyogrio/gdal_data/ogr_fields_override.schema.json +125 -0
  142. pyogrio/gdal_data/ogrinfo_output.schema.json +528 -0
  143. pyogrio/gdal_data/ogrvrt.xsd +528 -0
  144. pyogrio/gdal_data/osmconf.ini +134 -0
  145. pyogrio/gdal_data/ozi_datum.csv +131 -0
  146. pyogrio/gdal_data/ozi_ellips.csv +35 -0
  147. pyogrio/gdal_data/pci_datum.txt +530 -0
  148. pyogrio/gdal_data/pci_ellips.txt +129 -0
  149. pyogrio/gdal_data/pdfcomposition.xsd +703 -0
  150. pyogrio/gdal_data/pds4_template.xml +65 -0
  151. pyogrio/gdal_data/plscenesconf.json +1985 -0
  152. pyogrio/gdal_data/ruian_vf_ob_v1.gfs +1455 -0
  153. pyogrio/gdal_data/ruian_vf_st_uvoh_v1.gfs +86 -0
  154. pyogrio/gdal_data/ruian_vf_st_v1.gfs +1489 -0
  155. pyogrio/gdal_data/ruian_vf_v1.gfs +2126 -0
  156. pyogrio/gdal_data/s57agencies.csv +249 -0
  157. pyogrio/gdal_data/s57attributes.csv +484 -0
  158. pyogrio/gdal_data/s57expectedinput.csv +1008 -0
  159. pyogrio/gdal_data/s57objectclasses.csv +287 -0
  160. pyogrio/gdal_data/seed_2d.dgn +0 -0
  161. pyogrio/gdal_data/seed_3d.dgn +0 -0
  162. pyogrio/gdal_data/stateplane.csv +259 -0
  163. pyogrio/gdal_data/template_tiles.mapml +28 -0
  164. pyogrio/gdal_data/tms_LINZAntarticaMapTileGrid.json +190 -0
  165. pyogrio/gdal_data/tms_MapML_APSTILE.json +268 -0
  166. pyogrio/gdal_data/tms_MapML_CBMTILE.json +346 -0
  167. pyogrio/gdal_data/tms_NZTM2000.json +243 -0
  168. pyogrio/gdal_data/trailer.dxf +434 -0
  169. pyogrio/gdal_data/usage +4 -0
  170. pyogrio/gdal_data/vcpkg-cmake-wrapper.cmake +23 -0
  171. pyogrio/gdal_data/vcpkg.spdx.json +291 -0
  172. pyogrio/gdal_data/vcpkg_abi_info.txt +45 -0
  173. pyogrio/gdal_data/vdv452.xml +349 -0
  174. pyogrio/gdal_data/vdv452.xsd +45 -0
  175. pyogrio/gdal_data/vicar.json +164 -0
  176. pyogrio/geopandas.py +978 -0
  177. pyogrio/proj_data/CH +22 -0
  178. pyogrio/proj_data/GL27 +23 -0
  179. pyogrio/proj_data/ITRF2000 +24 -0
  180. pyogrio/proj_data/ITRF2008 +94 -0
  181. pyogrio/proj_data/ITRF2014 +55 -0
  182. pyogrio/proj_data/ITRF2020 +91 -0
  183. pyogrio/proj_data/copyright +34 -0
  184. pyogrio/proj_data/deformation_model.schema.json +582 -0
  185. pyogrio/proj_data/nad.lst +142 -0
  186. pyogrio/proj_data/nad27 +810 -0
  187. pyogrio/proj_data/nad83 +745 -0
  188. pyogrio/proj_data/other.extra +53 -0
  189. pyogrio/proj_data/proj-config-version.cmake +44 -0
  190. pyogrio/proj_data/proj-config.cmake +79 -0
  191. pyogrio/proj_data/proj-targets-release.cmake +19 -0
  192. pyogrio/proj_data/proj-targets.cmake +107 -0
  193. pyogrio/proj_data/proj.db +0 -0
  194. pyogrio/proj_data/proj.ini +59 -0
  195. pyogrio/proj_data/proj4-targets-release.cmake +19 -0
  196. pyogrio/proj_data/proj4-targets.cmake +107 -0
  197. pyogrio/proj_data/projjson.schema.json +1174 -0
  198. pyogrio/proj_data/triangulation.schema.json +214 -0
  199. pyogrio/proj_data/usage +9 -0
  200. pyogrio/proj_data/vcpkg.spdx.json +203 -0
  201. pyogrio/proj_data/vcpkg_abi_info.txt +28 -0
  202. pyogrio/proj_data/world +214 -0
  203. pyogrio/raw.py +897 -0
  204. pyogrio/tests/__init__.py +0 -0
  205. pyogrio/tests/conftest.py +588 -0
  206. pyogrio/tests/fixtures/README.md +108 -0
  207. pyogrio/tests/fixtures/curve.gpkg +0 -0
  208. pyogrio/tests/fixtures/curvepolygon.gpkg +0 -0
  209. pyogrio/tests/fixtures/line_zm.gpkg +0 -0
  210. pyogrio/tests/fixtures/list_field_values_file.parquet +0 -0
  211. pyogrio/tests/fixtures/list_nested_struct_file.parquet +0 -0
  212. pyogrio/tests/fixtures/multisurface.gpkg +0 -0
  213. pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.cpg +1 -0
  214. pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.dbf +0 -0
  215. pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.prj +1 -0
  216. pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.shp +0 -0
  217. pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.shx +0 -0
  218. pyogrio/tests/fixtures/sample.osm.pbf +0 -0
  219. pyogrio/tests/fixtures/test_gpkg_nulls.gpkg +0 -0
  220. pyogrio/tests/test_arrow.py +1160 -0
  221. pyogrio/tests/test_core.py +702 -0
  222. pyogrio/tests/test_geopandas_io.py +3218 -0
  223. pyogrio/tests/test_path.py +374 -0
  224. pyogrio/tests/test_raw_io.py +1473 -0
  225. pyogrio/tests/test_util.py +56 -0
  226. pyogrio/util.py +258 -0
  227. pyogrio-0.12.0.dist-info/METADATA +125 -0
  228. pyogrio-0.12.0.dist-info/RECORD +231 -0
  229. pyogrio-0.12.0.dist-info/WHEEL +6 -0
  230. pyogrio-0.12.0.dist-info/licenses/LICENSE +21 -0
  231. pyogrio-0.12.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,3218 @@
1
+ import contextlib
2
+ import locale
3
+ import os
4
+ import re
5
+ import warnings
6
+ from datetime import datetime
7
+ from io import BytesIO
8
+ from zipfile import ZipFile
9
+
10
+ import numpy as np
11
+
12
+ from pyogrio import (
13
+ __gdal_version__,
14
+ list_drivers,
15
+ list_layers,
16
+ read_info,
17
+ set_gdal_config_options,
18
+ vsi_listtree,
19
+ vsi_unlink,
20
+ )
21
+ from pyogrio._compat import (
22
+ GDAL_GE_37,
23
+ GDAL_GE_311,
24
+ HAS_ARROW_WRITE_API,
25
+ HAS_PYPROJ,
26
+ PANDAS_GE_15,
27
+ PANDAS_GE_23,
28
+ PANDAS_GE_30,
29
+ SHAPELY_GE_21,
30
+ )
31
+ from pyogrio.errors import DataLayerError, DataSourceError, FeatureError, GeometryError
32
+ from pyogrio.geopandas import PANDAS_GE_20, read_dataframe, write_dataframe
33
+ from pyogrio.raw import (
34
+ DRIVERS_NO_MIXED_DIMENSIONS,
35
+ DRIVERS_NO_MIXED_SINGLE_MULTI,
36
+ )
37
+ from pyogrio.tests.conftest import (
38
+ ALL_EXTS,
39
+ DRIVERS,
40
+ GDAL_HAS_PARQUET_DRIVER,
41
+ START_FID,
42
+ requires_arrow_write_api,
43
+ requires_gdal_geos,
44
+ requires_pyarrow_api,
45
+ requires_pyproj,
46
+ )
47
+
48
+ import pytest
49
+
50
+ try:
51
+ import geopandas as gp
52
+ import pandas as pd
53
+ from geopandas.array import from_wkt
54
+ from pandas.api.types import is_datetime64_dtype, is_object_dtype, is_string_dtype
55
+
56
+ import shapely # if geopandas is present, shapely is expected to be present
57
+ from shapely.geometry import Point
58
+
59
+ from geopandas.testing import assert_geodataframe_equal
60
+ from pandas.testing import (
61
+ assert_index_equal,
62
+ assert_series_equal,
63
+ )
64
+
65
+ except ImportError:
66
+ pass
67
+
68
+
69
+ pytest.importorskip("geopandas")
70
+
71
+
72
+ @pytest.fixture(
73
+ scope="session",
74
+ params=[
75
+ False,
76
+ pytest.param(True, marks=requires_pyarrow_api),
77
+ ],
78
+ )
79
+ def use_arrow(request):
80
+ return request.param
81
+
82
+
83
+ @pytest.fixture(autouse=True)
84
+ def skip_if_no_arrow_write_api(request):
85
+ # automatically skip tests with use_arrow=True and that require Arrow write
86
+ # API (marked with `@pytest.mark.requires_arrow_write_api`) if it is not available
87
+ use_arrow = (
88
+ request.getfixturevalue("use_arrow")
89
+ if "use_arrow" in request.fixturenames
90
+ else False
91
+ )
92
+ if (
93
+ use_arrow
94
+ and not HAS_ARROW_WRITE_API
95
+ and request.node.get_closest_marker("requires_arrow_write_api")
96
+ ):
97
+ pytest.skip("GDAL>=3.8 required for Arrow write API")
98
+
99
+
100
+ @contextlib.contextmanager
101
+ def use_arrow_context():
102
+ original = os.environ.get("PYOGRIO_USE_ARROW", None)
103
+ os.environ["PYOGRIO_USE_ARROW"] = "1"
104
+ yield
105
+ if original:
106
+ os.environ["PYOGRIO_USE_ARROW"] = original
107
+ else:
108
+ del os.environ["PYOGRIO_USE_ARROW"]
109
+
110
+
111
+ def test_spatialite_available(test_gpkg_nulls):
112
+ """Check if SpatiaLite is available by running a simple SQL query."""
113
+ _ = read_dataframe(
114
+ test_gpkg_nulls, sql="select spatialite_version();", sql_dialect="SQLITE"
115
+ )
116
+
117
+
118
+ @pytest.mark.parametrize(
119
+ "encoding, arrow",
120
+ [
121
+ ("utf-8", False),
122
+ pytest.param("utf-8", True, marks=requires_pyarrow_api),
123
+ ("cp1252", False),
124
+ (None, False),
125
+ ],
126
+ )
127
+ def test_read_csv_encoding(tmp_path, encoding, arrow):
128
+ """ "Test reading CSV files with different encodings.
129
+
130
+ Arrow only supports utf-8 encoding.
131
+ """
132
+ # Write csv test file. Depending on the os this will be written in a different
133
+ # encoding: for linux and macos this is utf-8, for windows it is cp1252.
134
+ csv_path = tmp_path / "test.csv"
135
+ with open(csv_path, "w", encoding=encoding) as csv:
136
+ csv.write("näme,city\n")
137
+ csv.write("Wilhelm Röntgen,Zürich\n")
138
+
139
+ # Read csv. The data should be read with the same default encoding as the csv file
140
+ # was written in, but should have been converted to utf-8 in the dataframe returned.
141
+ # Hence, the asserts below, with strings in utf-8, be OK.
142
+ df = read_dataframe(csv_path, encoding=encoding, use_arrow=arrow)
143
+
144
+ assert len(df) == 1
145
+ assert df.columns.tolist() == ["näme", "city"]
146
+ assert df.city.tolist() == ["Zürich"]
147
+ assert df.näme.tolist() == ["Wilhelm Röntgen"]
148
+
149
+
150
+ @pytest.mark.skipif(
151
+ locale.getpreferredencoding().upper() == "UTF-8",
152
+ reason="test requires non-UTF-8 default platform",
153
+ )
154
+ def test_read_csv_platform_encoding(tmp_path, use_arrow):
155
+ """Verify that read defaults to platform encoding; only works on Windows (CP1252).
156
+
157
+ When use_arrow=True, reading an non-UTF8 fails.
158
+ """
159
+ csv_path = tmp_path / "test.csv"
160
+ with open(csv_path, "w", encoding=locale.getpreferredencoding()) as csv:
161
+ csv.write("näme,city\n")
162
+ csv.write("Wilhelm Röntgen,Zürich\n")
163
+
164
+ if use_arrow:
165
+ with pytest.raises(
166
+ DataSourceError,
167
+ match="; please use_arrow=False",
168
+ ):
169
+ df = read_dataframe(csv_path, use_arrow=use_arrow)
170
+ else:
171
+ df = read_dataframe(csv_path, use_arrow=use_arrow)
172
+
173
+ assert len(df) == 1
174
+ assert df.columns.tolist() == ["näme", "city"]
175
+ assert df.city.tolist() == ["Zürich"]
176
+ assert df.näme.tolist() == ["Wilhelm Röntgen"]
177
+
178
+
179
+ def test_read_dataframe(naturalearth_lowres_all_ext):
180
+ df = read_dataframe(naturalearth_lowres_all_ext)
181
+
182
+ if HAS_PYPROJ:
183
+ assert df.crs == "EPSG:4326"
184
+ assert len(df) == 177
185
+ assert df.columns.tolist() == [
186
+ "pop_est",
187
+ "continent",
188
+ "name",
189
+ "iso_a3",
190
+ "gdp_md_est",
191
+ "geometry",
192
+ ]
193
+
194
+
195
+ def test_read_dataframe_vsi(naturalearth_lowres_vsi, use_arrow):
196
+ df = read_dataframe(naturalearth_lowres_vsi[1], use_arrow=use_arrow)
197
+ assert len(df) == 177
198
+
199
+
200
+ @pytest.mark.parametrize(
201
+ "columns, fid_as_index, exp_len", [(None, False, 3), ([], True, 3), ([], False, 0)]
202
+ )
203
+ def test_read_layer_without_geometry(
204
+ no_geometry_file, columns, fid_as_index, use_arrow, exp_len
205
+ ):
206
+ result = read_dataframe(
207
+ no_geometry_file,
208
+ columns=columns,
209
+ fid_as_index=fid_as_index,
210
+ use_arrow=use_arrow,
211
+ )
212
+ assert type(result) is pd.DataFrame
213
+ assert len(result) == exp_len
214
+
215
+
216
+ @pytest.mark.parametrize(
217
+ "naturalearth_lowres, expected_ext",
218
+ [(".gpkg", ".gpkg"), (".shp", ".shp")],
219
+ indirect=["naturalearth_lowres"],
220
+ )
221
+ def test_fixture_naturalearth_lowres(naturalearth_lowres, expected_ext):
222
+ # Test the fixture with "indirect" parameter
223
+ assert naturalearth_lowres.suffix == expected_ext
224
+ df = read_dataframe(naturalearth_lowres)
225
+ assert len(df) == 177
226
+
227
+
228
+ def test_read_no_geometry(naturalearth_lowres_all_ext, use_arrow):
229
+ df = read_dataframe(
230
+ naturalearth_lowres_all_ext, use_arrow=use_arrow, read_geometry=False
231
+ )
232
+ assert isinstance(df, pd.DataFrame)
233
+ assert not isinstance(df, gp.GeoDataFrame)
234
+
235
+
236
+ def test_read_no_geometry_no_columns_no_fids(naturalearth_lowres, use_arrow):
237
+ with pytest.raises(
238
+ ValueError,
239
+ match=(
240
+ "at least one of read_geometry or return_fids must be True or columns must "
241
+ "be None or non-empty"
242
+ ),
243
+ ):
244
+ _ = read_dataframe(
245
+ naturalearth_lowres,
246
+ columns=[],
247
+ read_geometry=False,
248
+ fid_as_index=False,
249
+ use_arrow=use_arrow,
250
+ )
251
+
252
+
253
+ def test_read_force_2d(tmp_path, use_arrow):
254
+ filename = tmp_path / "test.gpkg"
255
+
256
+ # create a GPKG with 3D point values
257
+ expected = gp.GeoDataFrame(
258
+ geometry=[Point(0, 0, 0), Point(1, 1, 0)], crs="EPSG:4326"
259
+ )
260
+ write_dataframe(expected, filename)
261
+
262
+ df = read_dataframe(filename)
263
+ assert df.iloc[0].geometry.has_z
264
+
265
+ df = read_dataframe(
266
+ filename,
267
+ force_2d=True,
268
+ max_features=1,
269
+ use_arrow=use_arrow,
270
+ )
271
+ assert not df.iloc[0].geometry.has_z
272
+
273
+
274
+ def test_read_geojson_error(naturalearth_lowres_geojson, use_arrow):
275
+ try:
276
+ set_gdal_config_options({"OGR_GEOJSON_MAX_OBJ_SIZE": 0.01})
277
+ with pytest.raises(
278
+ DataSourceError,
279
+ match="Failed to read GeoJSON data; .* GeoJSON object too complex",
280
+ ):
281
+ read_dataframe(naturalearth_lowres_geojson, use_arrow=use_arrow)
282
+ finally:
283
+ set_gdal_config_options({"OGR_GEOJSON_MAX_OBJ_SIZE": None})
284
+
285
+
286
+ @pytest.mark.skipif(
287
+ "LIBKML" not in list_drivers(),
288
+ reason="LIBKML driver is not available and is needed to read simpledata element",
289
+ )
290
+ def test_read_kml_simpledata(kml_file, use_arrow):
291
+ """Test reading a KML file with a simpledata element.
292
+
293
+ Simpledata elements are only read by the LibKML driver, not the KML driver.
294
+ """
295
+ gdf = read_dataframe(kml_file, use_arrow=use_arrow)
296
+
297
+ # Check if the simpledata column is present.
298
+ assert "formation" in gdf.columns
299
+ assert gdf["formation"].iloc[0] == "Ton"
300
+
301
+
302
+ def test_read_layer(tmp_path, use_arrow):
303
+ filename = tmp_path / "test.gpkg"
304
+
305
+ # create a multilayer GPKG
306
+ expected1 = gp.GeoDataFrame(geometry=[Point(0, 0)], crs="EPSG:4326")
307
+ if use_arrow:
308
+ # TODO this needs to be fixed on the geopandas side (to ensure the
309
+ # GeoDataFrame() constructor does this), when use_arrow we already
310
+ # get columns Index with string dtype
311
+ expected1.columns = expected1.columns.astype("str")
312
+ write_dataframe(
313
+ expected1,
314
+ filename,
315
+ layer="layer1",
316
+ )
317
+
318
+ expected2 = gp.GeoDataFrame(geometry=[Point(1, 1)], crs="EPSG:4326")
319
+ if use_arrow:
320
+ expected2.columns = expected2.columns.astype("str")
321
+ write_dataframe(expected2, filename, layer="layer2", append=True)
322
+
323
+ assert np.array_equal(
324
+ list_layers(filename), [["layer1", "Point"], ["layer2", "Point"]]
325
+ )
326
+
327
+ kwargs = {"use_arrow": use_arrow, "max_features": 1}
328
+
329
+ # The first layer is read by default, which will warn when there are multiple
330
+ # layers
331
+ with pytest.warns(UserWarning, match="More than one layer found"):
332
+ df = read_dataframe(filename, **kwargs)
333
+
334
+ assert_geodataframe_equal(df, expected1)
335
+
336
+ # Reading a specific layer by name should return that layer.
337
+ # Detected here by a known column.
338
+ df = read_dataframe(filename, layer="layer2", **kwargs)
339
+ assert_geodataframe_equal(df, expected2)
340
+
341
+ # Reading a specific layer by index should return that layer
342
+ df = read_dataframe(filename, layer=1, **kwargs)
343
+ assert_geodataframe_equal(df, expected2)
344
+
345
+
346
+ def test_read_layer_invalid(naturalearth_lowres_all_ext, use_arrow):
347
+ with pytest.raises(DataLayerError, match="Layer 'wrong' could not be opened"):
348
+ read_dataframe(naturalearth_lowres_all_ext, layer="wrong", use_arrow=use_arrow)
349
+
350
+
351
+ def test_read_datetime(datetime_file, use_arrow):
352
+ df = read_dataframe(datetime_file, use_arrow=use_arrow)
353
+ if PANDAS_GE_20:
354
+ # starting with pandas 2.0, it preserves the passed datetime resolution
355
+ assert df.col.dtype.name == "datetime64[ms]"
356
+ else:
357
+ assert df.col.dtype.name == "datetime64[ns]"
358
+
359
+
360
+ def test_read_list_types(list_field_values_files, use_arrow):
361
+ """Test reading a geojson file containing fields with lists."""
362
+ if list_field_values_files.suffix == ".parquet" and not GDAL_HAS_PARQUET_DRIVER:
363
+ pytest.skip(
364
+ "Skipping test for parquet as the GDAL Parquet driver is not available"
365
+ )
366
+
367
+ info = read_info(list_field_values_files)
368
+ suffix = list_field_values_files.suffix
369
+
370
+ result = read_dataframe(list_field_values_files, use_arrow=use_arrow)
371
+
372
+ # Check list_int column
373
+ assert "list_int" in result.columns
374
+ assert info["fields"][1] == "list_int"
375
+ assert info["ogr_types"][1] in ("OFTIntegerList", "OFTInteger64List")
376
+ assert result["list_int"][0].tolist() == [0, 1]
377
+ assert result["list_int"][1].tolist() == [2, 3]
378
+ assert result["list_int"][2].tolist() == []
379
+ assert result["list_int"][3] is None
380
+ assert result["list_int"][4] is None
381
+
382
+ # Check list_double column
383
+ assert "list_double" in result.columns
384
+ assert info["fields"][2] == "list_double"
385
+ assert info["ogr_types"][2] == "OFTRealList"
386
+ assert result["list_double"][0].tolist() == [0.0, 1.0]
387
+ assert result["list_double"][1].tolist() == [2.0, 3.0]
388
+ assert result["list_double"][2].tolist() == []
389
+ assert result["list_double"][3] is None
390
+ assert result["list_double"][4] is None
391
+
392
+ # Check list_string column
393
+ assert "list_string" in result.columns
394
+ assert info["fields"][3] == "list_string"
395
+ assert info["ogr_types"][3] == "OFTStringList"
396
+ assert result["list_string"][0].tolist() == ["string1", "string2"]
397
+ assert result["list_string"][1].tolist() == ["string3", "string4", ""]
398
+ assert result["list_string"][2].tolist() == []
399
+ assert result["list_string"][3] is None
400
+ assert result["list_string"][4] == [""]
401
+
402
+ # Check list_int_with_null column
403
+ if suffix == ".geojson":
404
+ # Once any row of a column contains a null value in a list, the column isn't
405
+ # recognized as a list column anymore for .geojson files, but as a JSON column.
406
+ # Because JSON columns containing JSON Arrays are also parsed to python lists,
407
+ # the end result is the same...
408
+ exp_type = "OFTString"
409
+ exp_subtype = "OFSTJSON"
410
+ exp_list_int_with_null_value = [0, None]
411
+ else:
412
+ # For .parquet files, the list column is preserved as a list column.
413
+ exp_type = "OFTInteger64List"
414
+ exp_subtype = "OFSTNone"
415
+ if use_arrow:
416
+ exp_list_int_with_null_value = [0.0, np.nan]
417
+ else:
418
+ exp_list_int_with_null_value = [0, 0]
419
+ # xfail: when reading a list of int with None values without Arrow from a
420
+ # .parquet file, the None values become 0, which is wrong.
421
+ # https://github.com/OSGeo/gdal/issues/13448
422
+
423
+ assert "list_int_with_null" in result.columns
424
+ assert info["fields"][4] == "list_int_with_null"
425
+ assert info["ogr_types"][4] == exp_type
426
+ assert info["ogr_subtypes"][4] == exp_subtype
427
+ assert result["list_int_with_null"][0][0] == 0
428
+ if exp_list_int_with_null_value[1] == 0:
429
+ assert result["list_int_with_null"][0][1] == exp_list_int_with_null_value[1]
430
+ else:
431
+ assert pd.isna(result["list_int_with_null"][0][1])
432
+
433
+ if suffix == ".geojson":
434
+ # For .geojson, the lists are already python lists
435
+ assert result["list_int_with_null"][1] == [2, 3]
436
+ assert result["list_int_with_null"][2] == []
437
+ else:
438
+ # For .parquet, the lists are numpy arrays
439
+ assert result["list_int_with_null"][1].tolist() == [2, 3]
440
+ assert result["list_int_with_null"][2].tolist() == []
441
+
442
+ assert pd.isna(result["list_int_with_null"][3])
443
+ assert pd.isna(result["list_int_with_null"][4])
444
+
445
+ # Check list_string_with_null column
446
+ if suffix == ".geojson":
447
+ # Once any row of a column contains a null value in a list, the column isn't
448
+ # recognized as a list column anymore for .geojson files, but as a JSON column.
449
+ # Because JSON columns containing JSON Arrays are also parsed to python lists,
450
+ # the end result is the same...
451
+ exp_type = "OFTString"
452
+ exp_subtype = "OFSTJSON"
453
+ else:
454
+ # For .parquet files, the list column is preserved as a list column.
455
+ exp_type = "OFTStringList"
456
+ exp_subtype = "OFSTNone"
457
+
458
+ assert "list_string_with_null" in result.columns
459
+ assert info["fields"][5] == "list_string_with_null"
460
+ assert info["ogr_types"][5] == exp_type
461
+ assert info["ogr_subtypes"][5] == exp_subtype
462
+
463
+ if suffix == ".geojson":
464
+ # For .geojson, the lists are already python lists
465
+ assert result["list_string_with_null"][0] == ["string1", None]
466
+ assert result["list_string_with_null"][1] == ["string3", "string4", ""]
467
+ assert result["list_string_with_null"][2] == []
468
+ else:
469
+ # For .parquet, the lists are numpy arrays
470
+ # When use_arrow=False, the None becomes an empty string, which is wrong.
471
+ exp_value = ["string1", ""] if not use_arrow else ["string1", None]
472
+ assert result["list_string_with_null"][0].tolist() == exp_value
473
+ assert result["list_string_with_null"][1].tolist() == ["string3", "string4", ""]
474
+ assert result["list_string_with_null"][2].tolist() == []
475
+
476
+ assert pd.isna(result["list_string_with_null"][3])
477
+ assert result["list_string_with_null"][4] == [""]
478
+
479
+
480
+ @pytest.mark.requires_arrow_write_api
481
+ @pytest.mark.skipif(
482
+ not GDAL_HAS_PARQUET_DRIVER, reason="Parquet driver is not available"
483
+ )
484
+ def test_read_list_nested_struct_parquet_file(
485
+ list_nested_struct_parquet_file, use_arrow
486
+ ):
487
+ """Test reading a Parquet file containing nested struct and list types."""
488
+ if not use_arrow:
489
+ pytest.skip(
490
+ "When use_arrow=False, gdal flattens nested columns to seperate columns. "
491
+ "Not sure how we want to deal with this case, but for now just skip."
492
+ )
493
+
494
+ result = read_dataframe(list_nested_struct_parquet_file, use_arrow=use_arrow)
495
+
496
+ assert "col_flat" in result.columns
497
+ assert np.array_equal(result["col_flat"].to_numpy(), np.array([0, 1, 2]))
498
+
499
+ assert "col_list" in result.columns
500
+ assert result["col_list"].dtype == object
501
+ assert result["col_list"][0].tolist() == [1, 2, 3]
502
+ assert result["col_list"][1].tolist() == [1, 2, 3]
503
+ assert result["col_list"][2].tolist() == [1, 2, 3]
504
+
505
+ assert "col_nested" in result.columns
506
+ assert result["col_nested"].dtype == object
507
+ assert result["col_nested"][0].tolist() == [{"a": 1, "b": 2}, {"a": 1, "b": 2}]
508
+ assert result["col_nested"][1].tolist() == [{"a": 1, "b": 2}, {"a": 1, "b": 2}]
509
+ assert result["col_nested"][2].tolist() == [{"a": 1, "b": 2}, {"a": 1, "b": 2}]
510
+
511
+ assert "col_struct" in result.columns
512
+ assert result["col_struct"].dtype == object
513
+ assert result["col_struct"][0] == {"a": 1, "b": 2}
514
+ assert result["col_struct"][1] == {"a": 1, "b": 2}
515
+ assert result["col_struct"][2] == {"a": 1, "b": 2}
516
+
517
+
518
+ @pytest.mark.filterwarnings(
519
+ "ignore: Non-conformant content for record 1 in column dates"
520
+ )
521
+ @pytest.mark.requires_arrow_write_api
522
+ def test_write_datetime_mixed_offset(tmp_path, use_arrow):
523
+ # Australian Summer Time AEDT (GMT+11), Standard Time AEST (GMT+10)
524
+ dates = ["2023-01-01 11:00:01.111", "2023-06-01 10:00:01.111"]
525
+ naive_col = pd.Series(pd.to_datetime(dates), name="dates")
526
+ localised_col = naive_col.dt.tz_localize("Australia/Sydney")
527
+ utc_col = localised_col.dt.tz_convert("UTC")
528
+ if PANDAS_GE_20:
529
+ utc_col = utc_col.dt.as_unit("ms")
530
+
531
+
532
+ @pytest.mark.parametrize("datetime_as_string", [False, True])
533
+ @pytest.mark.parametrize("mixed_offsets_as_utc", [False, True])
534
+ def test_read_datetime_long_ago(
535
+ geojson_datetime_long_ago, use_arrow, mixed_offsets_as_utc, datetime_as_string
536
+ ):
537
+ """Test writing/reading a column with a datetime far in the past.
538
+ Dates from before 1678-1-1 aren't parsed correctly by pandas < 3.0, so they
539
+ stay strings.
540
+ Reported in https://github.com/geopandas/pyogrio/issues/553.
541
+ """
542
+ handler = contextlib.nullcontext()
543
+ overflow_occured = False
544
+ if not datetime_as_string and not PANDAS_GE_30 and (not use_arrow or GDAL_GE_311):
545
+ # When datetimes should not be returned as string and arrow is not used or
546
+ # arrow is used with GDAL >= 3.11, `pandas.to_datetime` is used to parse the
547
+ # datetimes. However, when using pandas < 3.0, this raises an
548
+ # "Out of bounds nanosecond timestamp" error for very old dates.
549
+ # As a result, `read_dataframe` gives a warning and the datetimes stay strings.
550
+ handler = pytest.warns(
551
+ UserWarning, match="Error parsing datetimes, original strings are returned"
552
+ )
553
+ overflow_occured = True
554
+ # XFAIL: datetimes before 1678-1-1 give overflow with arrow=False and pandas<3.0
555
+ elif use_arrow and not PANDAS_GE_20 and not GDAL_GE_311:
556
+ # When arrow is used with pandas < 2.0 and GDAL < 3.11, an overflow occurs in
557
+ # pyarrow.to_pandas().
558
+ handler = pytest.raises(
559
+ Exception,
560
+ match=re.escape("Casting from timestamp[ms] to timestamp[ns] would result"),
561
+ )
562
+ overflow_occured = True
563
+ # XFAIL: datetimes before 1678-1-1 give overflow with arrow=True and pandas<2.0
564
+
565
+ with handler:
566
+ df = read_dataframe(
567
+ geojson_datetime_long_ago,
568
+ use_arrow=use_arrow,
569
+ datetime_as_string=datetime_as_string,
570
+ mixed_offsets_as_utc=mixed_offsets_as_utc,
571
+ )
572
+
573
+ exp_dates_str = pd.Series(["1670-01-01T09:00:00"], name="datetime_col")
574
+ if datetime_as_string:
575
+ assert is_string_dtype(df.datetime_col.dtype)
576
+ assert_series_equal(df.datetime_col, exp_dates_str)
577
+ else:
578
+ # It is a single naive datetime, so regardless of mixed_offsets_as_utc the
579
+ # expected "ideal" result is the same: a datetime64 without time zone info.
580
+ if overflow_occured:
581
+ # Strings are returned because of an overflow.
582
+ assert is_string_dtype(df.datetime_col.dtype)
583
+ assert_series_equal(df.datetime_col, exp_dates_str)
584
+ else:
585
+ # With use_arrow or pandas >= 3.0, old datetimes are parsed correctly.
586
+ assert is_datetime64_dtype(df.datetime_col)
587
+ assert df.datetime_col.iloc[0] == pd.Timestamp(1670, 1, 1, 9, 0, 0)
588
+ assert df.datetime_col.iloc[0].unit == "ms"
589
+
590
+
591
+ @pytest.mark.parametrize("ext", [ext for ext in ALL_EXTS if ext != ".shp"])
592
+ @pytest.mark.parametrize("datetime_as_string", [False, True])
593
+ @pytest.mark.parametrize("mixed_offsets_as_utc", [False, True])
594
+ @pytest.mark.requires_arrow_write_api
595
+ def test_write_read_datetime_no_tz(
596
+ tmp_path, ext, datetime_as_string, mixed_offsets_as_utc, use_arrow
597
+ ):
598
+ """Test writing/reading a column with naive datetimes (no time zone information)."""
599
+ dates_raw = ["2020-01-01T09:00:00.123", "2020-01-01T10:00:00", np.nan]
600
+ if PANDAS_GE_20:
601
+ dates = pd.to_datetime(dates_raw, format="ISO8601").as_unit("ms")
602
+ else:
603
+ dates = pd.to_datetime(dates_raw)
604
+ df = gp.GeoDataFrame(
605
+ {"dates": dates, "geometry": [Point(1, 1)] * 3}, crs="EPSG:4326"
606
+ )
607
+
608
+ fpath = tmp_path / f"test{ext}"
609
+ write_dataframe(df, fpath, use_arrow=use_arrow)
610
+ result = read_dataframe(
611
+ fpath,
612
+ use_arrow=use_arrow,
613
+ datetime_as_string=datetime_as_string,
614
+ mixed_offsets_as_utc=mixed_offsets_as_utc,
615
+ )
616
+
617
+ if use_arrow and ext == ".gpkg" and __gdal_version__ < (3, 11, 0):
618
+ # With GDAL < 3.11 with arrow, columns with naive datetimes are written
619
+ # correctly, but when read they are wrongly interpreted as being in UTC.
620
+ # The reason is complicated, but more info can be found e.g. here:
621
+ # https://github.com/geopandas/pyogrio/issues/487#issuecomment-2423762807
622
+ exp_dates = df.dates.dt.tz_localize("UTC")
623
+ if datetime_as_string:
624
+ exp_dates = exp_dates.astype("str").str.replace(" ", "T")
625
+ exp_dates[2] = np.nan
626
+ assert_series_equal(result.dates, exp_dates)
627
+ elif not mixed_offsets_as_utc:
628
+ assert_series_equal(result.dates, exp_dates)
629
+ # XFAIL: naive datetimes read wrong in GPKG with GDAL < 3.11 via arrow
630
+
631
+ elif datetime_as_string:
632
+ assert is_string_dtype(result.dates.dtype)
633
+ if use_arrow and __gdal_version__ < (3, 11, 0):
634
+ dates_str = df.dates.astype("str").str.replace(" ", "T")
635
+ dates_str[2] = np.nan
636
+ else:
637
+ dates_str = pd.Series(dates_raw, name="dates")
638
+ assert_series_equal(result.dates, dates_str)
639
+ else:
640
+ assert is_datetime64_dtype(result.dates.dtype)
641
+ assert_geodataframe_equal(result, df)
642
+
643
+
644
+ @pytest.mark.parametrize("ext", [ext for ext in ALL_EXTS if ext != ".shp"])
645
+ @pytest.mark.parametrize("datetime_as_string", [False, True])
646
+ @pytest.mark.parametrize("mixed_offsets_as_utc", [False, True])
647
+ @pytest.mark.filterwarnings("ignore: Non-conformant content for record 1 in column ")
648
+ @pytest.mark.requires_arrow_write_api
649
+ def test_write_read_datetime_tz(
650
+ request, tmp_path, ext, datetime_as_string, mixed_offsets_as_utc, use_arrow
651
+ ):
652
+ """Write and read file with all equal time zones.
653
+
654
+ This should result in the result being in pandas datetime64 dtype column.
655
+ """
656
+ if use_arrow and __gdal_version__ < (3, 10, 0) and ext in (".geojson", ".geojsonl"):
657
+ # With GDAL < 3.10 with arrow, the time zone offset was applied to the datetime
658
+ # as well as retaining the time zone.
659
+ # This was fixed in https://github.com/OSGeo/gdal/pull/11049
660
+ request.node.add_marker(
661
+ pytest.mark.xfail(
662
+ reason="Wrong datetimes read in GeoJSON with GDAL < 3.10 via arrow"
663
+ )
664
+ )
665
+
666
+ dates_raw = ["2020-01-01T09:00:00.123-05:00", "2020-01-01T10:00:00-05:00", np.nan]
667
+ if PANDAS_GE_20:
668
+ dates = pd.to_datetime(dates_raw, format="ISO8601").as_unit("ms")
669
+ else:
670
+ dates = pd.to_datetime(dates_raw)
671
+
672
+ # Make the index non-consecutive to test this case as well. Added for issue
673
+ # https://github.com/geopandas/pyogrio/issues/324
674
+ df = gp.GeoDataFrame(
675
+ {"dates": dates, "geometry": [Point(1, 1)] * 3},
676
+ index=[0, 2, 3],
677
+ crs="EPSG:4326",
678
+ )
679
+ assert isinstance(df.dates.dtype, pd.DatetimeTZDtype)
680
+
681
+ fpath = tmp_path / f"test{ext}"
682
+ write_dataframe(df, fpath, use_arrow=use_arrow)
683
+ result = read_dataframe(
684
+ fpath,
685
+ use_arrow=use_arrow,
686
+ datetime_as_string=datetime_as_string,
687
+ mixed_offsets_as_utc=mixed_offsets_as_utc,
688
+ )
689
+
690
+ # With some older versions, the offset is represented slightly differently
691
+ if result.dates.dtype.name.endswith(", pytz.FixedOffset(-300)]"):
692
+ result.dates = result.dates.astype(df.dates.dtype)
693
+
694
+ if use_arrow and ext in (".fgb", ".gpkg") and __gdal_version__ < (3, 11, 0):
695
+ # With GDAL < 3.11 with arrow, datetime columns are written as string type
696
+ df_exp = df.copy()
697
+ df_exp.dates = df_exp[df_exp.dates.notna()].dates.astype(str)
698
+ assert_series_equal(result.dates, df_exp.dates, check_index=False)
699
+ # XFAIL: datetime columns written as string with GDAL < 3.11 via arrow
700
+ elif datetime_as_string:
701
+ assert is_string_dtype(result.dates.dtype)
702
+ if use_arrow and __gdal_version__ < (3, 11, 0):
703
+ dates_str = df.dates.astype("str").str.replace(" ", "T")
704
+ dates_str.iloc[2] = np.nan
705
+ elif __gdal_version__ < (3, 7, 0):
706
+ # With GDAL < 3.7, time zone minutes aren't included in the string
707
+ dates_str = [x[:-3] for x in dates_raw if pd.notna(x)] + [np.nan]
708
+ dates_str = pd.Series(dates_str, name="dates")
709
+ else:
710
+ dates_str = pd.Series(dates_raw, name="dates")
711
+ assert_series_equal(result.dates, dates_str, check_index=False)
712
+ else:
713
+ assert_series_equal(result.dates, df.dates, check_index=False)
714
+
715
+
716
+ @pytest.mark.parametrize("ext", [ext for ext in ALL_EXTS if ext != ".shp"])
717
+ @pytest.mark.parametrize("datetime_as_string", [False, True])
718
+ @pytest.mark.parametrize("mixed_offsets_as_utc", [False, True])
719
+ @pytest.mark.filterwarnings(
720
+ "ignore: Non-conformant content for record 1 in column dates"
721
+ )
722
+ @pytest.mark.requires_arrow_write_api
723
+ def test_write_read_datetime_tz_localized_mixed_offset(
724
+ tmp_path, ext, datetime_as_string, mixed_offsets_as_utc, use_arrow
725
+ ):
726
+ """Test with localized dates across a different summer/winter time zone offset."""
727
+ # Australian Summer Time AEDT (GMT+11), Standard Time AEST (GMT+10)
728
+ dates_raw = ["2023-01-01 11:00:01.111", "2023-06-01 10:00:01.111", np.nan]
729
+ dates_naive = pd.Series(pd.to_datetime(dates_raw), name="dates")
730
+ dates_local = dates_naive.dt.tz_localize("Australia/Sydney")
731
+ dates_local_offsets_str = dates_local.astype(str)
732
+ if datetime_as_string:
733
+ exp_dates = dates_local_offsets_str.str.replace(" ", "T")
734
+ exp_dates = exp_dates.str.replace(".111000", ".111")
735
+ if __gdal_version__ < (3, 7, 0):
736
+ # With GDAL < 3.7, time zone minutes aren't included in the string
737
+ exp_dates = exp_dates.str.slice(0, -3)
738
+ elif mixed_offsets_as_utc:
739
+ exp_dates = dates_local.dt.tz_convert("UTC")
740
+ if PANDAS_GE_20:
741
+ exp_dates = exp_dates.dt.as_unit("ms")
742
+ else:
743
+ exp_dates = dates_local_offsets_str.apply(
744
+ lambda x: pd.Timestamp(x) if pd.notna(x) else None
745
+ )
746
+
747
+ df = gp.GeoDataFrame(
748
+ {"dates": dates_local, "geometry": [Point(1, 1)] * 3}, crs="EPSG:4326"
749
+ )
750
+ fpath = tmp_path / f"test{ext}"
751
+ write_dataframe(df, fpath, use_arrow=use_arrow)
752
+ result = read_dataframe(
753
+ fpath,
754
+ use_arrow=use_arrow,
755
+ datetime_as_string=datetime_as_string,
756
+ mixed_offsets_as_utc=mixed_offsets_as_utc,
757
+ )
758
+
759
+ if use_arrow and __gdal_version__ < (3, 11, 0):
760
+ if ext in (".geojson", ".geojsonl"):
761
+ # With GDAL < 3.11 with arrow, GDAL converts mixed time zone datetimes to
762
+ # UTC when read as the arrow datetime column type does not support mixed tz.
763
+ dates_utc = dates_local.dt.tz_convert("UTC")
764
+ if PANDAS_GE_20:
765
+ dates_utc = dates_utc.dt.as_unit("ms")
766
+ if datetime_as_string:
767
+ assert is_string_dtype(result.dates.dtype)
768
+ dates_utc = dates_utc.astype(str).str.replace(" ", "T")
769
+ assert pd.isna(result.dates[2])
770
+ assert_series_equal(result.dates.head(2), dates_utc.head(2))
771
+ # XFAIL: mixed tz datetimes converted to UTC with GDAL < 3.11 + arrow
772
+ return
773
+
774
+ elif ext in (".gpkg", ".fgb"):
775
+ # With GDAL < 3.11 with arrow, datetime columns written as string type
776
+ assert pd.isna(result.dates[2])
777
+ assert_series_equal(result.dates.head(2), dates_local_offsets_str.head(2))
778
+ # XFAIL: datetime columns written as string with GDAL < 3.11 + arrow
779
+ return
780
+
781
+ # GDAL tz only encodes offsets, not time zones
782
+ if datetime_as_string:
783
+ assert is_string_dtype(result.dates.dtype)
784
+ elif mixed_offsets_as_utc:
785
+ assert isinstance(result.dates.dtype, pd.DatetimeTZDtype)
786
+ else:
787
+ assert is_object_dtype(result.dates.dtype)
788
+
789
+ # Check isna for the third value seperately as depending on versions this is
790
+ # different + pandas 3.0 assert_series_equal becomes strict about this.
791
+ assert pd.isna(result.dates[2])
792
+ assert_series_equal(result.dates.head(2), exp_dates.head(2))
793
+
794
+
795
+ @pytest.mark.parametrize("ext", [ext for ext in ALL_EXTS if ext != ".shp"])
796
+ @pytest.mark.parametrize("datetime_as_string", [False, True])
797
+ @pytest.mark.parametrize("mixed_offsets_as_utc", [False, True])
798
+ @pytest.mark.filterwarnings(
799
+ "ignore: Non-conformant content for record 1 in column dates"
800
+ )
801
+ @pytest.mark.requires_arrow_write_api
802
+ def test_write_read_datetime_tz_mixed_offsets(
803
+ tmp_path, ext, datetime_as_string, mixed_offsets_as_utc, use_arrow
804
+ ):
805
+ """Test with dates with mixed time zone offsets."""
806
+ # Pandas datetime64 column types doesn't support mixed time zone offsets, so
807
+ # it needs to be a list of pandas.Timestamp objects instead.
808
+ dates = [
809
+ pd.Timestamp("2023-01-01 11:00:01.111+01:00"),
810
+ pd.Timestamp("2023-06-01 10:00:01.111+05:00"),
811
+ np.nan,
812
+ ]
813
+
814
+ df = gp.GeoDataFrame(
815
+ {"dates": dates, "geometry": [Point(1, 1)] * 3}, crs="EPSG:4326"
816
+ )
817
+ fpath = tmp_path / f"test{ext}"
818
+ write_dataframe(df, fpath, use_arrow=use_arrow)
819
+ result = read_dataframe(
820
+ fpath,
821
+ use_arrow=use_arrow,
822
+ datetime_as_string=datetime_as_string,
823
+ mixed_offsets_as_utc=mixed_offsets_as_utc,
824
+ )
825
+
826
+ if use_arrow and __gdal_version__ < (3, 11, 0):
827
+ if ext in (".geojson", ".geojsonl"):
828
+ # With GDAL < 3.11 with arrow, GDAL converts mixed time zone datetimes to
829
+ # UTC when read as the arrow datetime column type does not support mixed tz.
830
+ df_exp = df.copy()
831
+ df_exp.dates = pd.to_datetime(dates, utc=True)
832
+ if PANDAS_GE_20:
833
+ df_exp.dates = df_exp.dates.dt.as_unit("ms")
834
+ if datetime_as_string:
835
+ df_exp.dates = df_exp.dates.astype("str").str.replace(" ", "T")
836
+ df_exp.loc[2, "dates"] = pd.NA
837
+ assert_geodataframe_equal(result, df_exp)
838
+ # XFAIL: mixed tz datetimes converted to UTC with GDAL < 3.11 + arrow
839
+ return
840
+
841
+ elif ext in (".gpkg", ".fgb"):
842
+ # With arrow and GDAL < 3.11, mixed time zone datetimes are written as
843
+ # string type columns, so no proper roundtrip possible.
844
+ df_exp = df.copy()
845
+ df_exp.dates = df_exp.dates.astype("string").astype("O")
846
+ assert_geodataframe_equal(result, df_exp)
847
+ # XFAIL: mixed tz datetimes converted to UTC with GDAL < 3.11 + arrow
848
+ return
849
+
850
+ if datetime_as_string:
851
+ assert is_string_dtype(result.dates.dtype)
852
+ dates_str = df.dates.map(
853
+ lambda x: x.isoformat(timespec="milliseconds") if pd.notna(x) else np.nan
854
+ )
855
+ if __gdal_version__ < (3, 7, 0):
856
+ # With GDAL < 3.7, time zone minutes aren't included in the string
857
+ dates_str = dates_str.str.slice(0, -3)
858
+ assert_series_equal(result.dates, dates_str)
859
+ elif mixed_offsets_as_utc:
860
+ assert isinstance(result.dates.dtype, pd.DatetimeTZDtype)
861
+ exp_dates = pd.to_datetime(df.dates, utc=True)
862
+ if PANDAS_GE_20:
863
+ exp_dates = exp_dates.dt.as_unit("ms")
864
+ assert_series_equal(result.dates, exp_dates)
865
+ else:
866
+ assert is_object_dtype(result.dates.dtype)
867
+ assert_geodataframe_equal(result, df)
868
+
869
+
870
+ @pytest.mark.parametrize("ext", [ext for ext in ALL_EXTS if ext != ".shp"])
871
+ @pytest.mark.parametrize(
872
+ "dates_raw",
873
+ [
874
+ (
875
+ pd.Timestamp("2020-01-01T09:00:00.123-05:00"),
876
+ pd.Timestamp("2020-01-01T10:00:00-05:00"),
877
+ np.nan,
878
+ ),
879
+ (
880
+ datetime.fromisoformat("2020-01-01T09:00:00.123-05:00"),
881
+ datetime.fromisoformat("2020-01-01T10:00:00-05:00"),
882
+ np.nan,
883
+ ),
884
+ ],
885
+ )
886
+ @pytest.mark.parametrize("datetime_as_string", [False, True])
887
+ @pytest.mark.parametrize("mixed_offsets_as_utc", [False, True])
888
+ @pytest.mark.filterwarnings(
889
+ "ignore: Non-conformant content for record 1 in column dates"
890
+ )
891
+ @pytest.mark.requires_arrow_write_api
892
+ def test_write_read_datetime_tz_objects(
893
+ tmp_path, dates_raw, ext, use_arrow, datetime_as_string, mixed_offsets_as_utc
894
+ ):
895
+ """Datetime objects with equal offsets are read as datetime64."""
896
+ dates = pd.Series(dates_raw, dtype="O")
897
+ df = gp.GeoDataFrame(
898
+ {"dates": dates, "geometry": [Point(1, 1)] * 3}, crs="EPSG:4326"
899
+ )
900
+
901
+ fpath = tmp_path / f"test{ext}"
902
+ write_dataframe(df, fpath, use_arrow=use_arrow)
903
+ result = read_dataframe(
904
+ fpath,
905
+ use_arrow=use_arrow,
906
+ datetime_as_string=datetime_as_string,
907
+ mixed_offsets_as_utc=mixed_offsets_as_utc,
908
+ )
909
+
910
+ # Check result
911
+ if PANDAS_GE_20:
912
+ exp_dates = pd.to_datetime(dates_raw, format="ISO8601").as_unit("ms")
913
+ else:
914
+ exp_dates = pd.to_datetime(dates_raw)
915
+ exp_df = df.copy()
916
+ exp_df["dates"] = pd.Series(exp_dates, name="dates")
917
+
918
+ # With some older versions, the offset is represented slightly differently
919
+ if result.dates.dtype.name.endswith(", pytz.FixedOffset(-300)]"):
920
+ result["dates"] = result.dates.astype(exp_df.dates.dtype)
921
+
922
+ if use_arrow and __gdal_version__ < (3, 10, 0) and ext in (".geojson", ".geojsonl"):
923
+ # XFAIL: Wrong datetimes read in GeoJSON with GDAL < 3.10 via arrow.
924
+ # The time zone offset was applied to the datetime as well as retaining
925
+ # the time zone. This was fixed in https://github.com/OSGeo/gdal/pull/11049
926
+
927
+ # Subtract 5 hours from the expected datetimes to match the wrong result.
928
+ if datetime_as_string:
929
+ exp_df["dates"] = pd.Series(
930
+ [
931
+ "2020-01-01T04:00:00.123000-05:00",
932
+ "2020-01-01T05:00:00-05:00",
933
+ np.nan,
934
+ ]
935
+ )
936
+ else:
937
+ exp_df["dates"] = exp_df.dates - pd.Timedelta(hours=5)
938
+ if PANDAS_GE_20:
939
+ # The unit needs to be applied again apparently
940
+ exp_df["dates"] = exp_df.dates.dt.as_unit("ms")
941
+ assert_geodataframe_equal(result, exp_df)
942
+ return
943
+
944
+ if use_arrow and __gdal_version__ < (3, 11, 0) and ext in (".fgb", ".gpkg"):
945
+ # XFAIL: datetime columns are written as string with GDAL < 3.11 + arrow
946
+ # -> custom formatting because the df column is object dtype and thus
947
+ # astype(str) converted the datetime objects one by one
948
+ exp_df["dates"] = pd.Series(
949
+ ["2020-01-01 09:00:00.123000-05:00", "2020-01-01 10:00:00-05:00", np.nan]
950
+ )
951
+ assert_geodataframe_equal(result, exp_df)
952
+ return
953
+
954
+ if datetime_as_string:
955
+ assert is_string_dtype(result.dates.dtype)
956
+ if use_arrow and __gdal_version__ < (3, 11, 0):
957
+ # With GDAL < 3.11 with arrow, datetime columns are written as string type
958
+ exp_df["dates"] = pd.Series(
959
+ [
960
+ "2020-01-01T09:00:00.123000-05:00",
961
+ "2020-01-01T10:00:00-05:00",
962
+ np.nan,
963
+ ]
964
+ )
965
+ else:
966
+ exp_df["dates"] = pd.Series(
967
+ ["2020-01-01T09:00:00.123-05:00", "2020-01-01T10:00:00-05:00", np.nan]
968
+ )
969
+ if __gdal_version__ < (3, 7, 0):
970
+ # With GDAL < 3.7, time zone minutes aren't included in the string
971
+ exp_df["dates"] = exp_df.dates.str.slice(0, -3)
972
+ elif mixed_offsets_as_utc:
973
+ # the offsets are all -05:00, so the result retains the offset and not UTC
974
+ assert isinstance(result.dates.dtype, pd.DatetimeTZDtype)
975
+ assert str(result.dates.dtype.tz) in ("UTC-05:00", "pytz.FixedOffset(-300)")
976
+ else:
977
+ assert isinstance(result.dates.dtype, pd.DatetimeTZDtype)
978
+
979
+ assert_geodataframe_equal(result, exp_df)
980
+
981
+
982
+ @pytest.mark.parametrize("ext", [ext for ext in ALL_EXTS if ext != ".shp"])
983
+ @pytest.mark.parametrize("datetime_as_string", [False, True])
984
+ @pytest.mark.parametrize("mixed_offsets_as_utc", [False, True])
985
+ @pytest.mark.requires_arrow_write_api
986
+ def test_write_read_datetime_utc(
987
+ tmp_path, ext, use_arrow, datetime_as_string, mixed_offsets_as_utc
988
+ ):
989
+ """Test writing/reading a column with UTC datetimes."""
990
+ dates_raw = ["2020-01-01T09:00:00.123Z", "2020-01-01T10:00:00Z", np.nan]
991
+ if PANDAS_GE_20:
992
+ dates = pd.to_datetime(dates_raw, format="ISO8601").as_unit("ms")
993
+ else:
994
+ dates = pd.to_datetime(dates_raw)
995
+ df = gp.GeoDataFrame(
996
+ {"dates": dates, "geometry": [Point(1, 1)] * 3}, crs="EPSG:4326"
997
+ )
998
+ assert df.dates.dtype.name in ("datetime64[ms, UTC]", "datetime64[ns, UTC]")
999
+
1000
+ fpath = tmp_path / f"test{ext}"
1001
+ write_dataframe(df, fpath, use_arrow=use_arrow)
1002
+ result = read_dataframe(
1003
+ fpath,
1004
+ use_arrow=use_arrow,
1005
+ datetime_as_string=datetime_as_string,
1006
+ mixed_offsets_as_utc=mixed_offsets_as_utc,
1007
+ )
1008
+
1009
+ if use_arrow and ext == ".fgb" and __gdal_version__ < (3, 11, 0):
1010
+ # With GDAL < 3.11 with arrow, time zone information is dropped when reading
1011
+ # .fgb
1012
+ if datetime_as_string:
1013
+ assert is_string_dtype(result.dates.dtype)
1014
+ dates_str = pd.Series(
1015
+ ["2020-01-01T09:00:00.123", "2020-01-01T10:00:00.000", np.nan],
1016
+ name="dates",
1017
+ )
1018
+ assert_series_equal(result.dates, dates_str)
1019
+ else:
1020
+ assert_series_equal(result.dates, df.dates.dt.tz_localize(None))
1021
+ # XFAIL: UTC datetimes read wrong in .fgb with GDAL < 3.11 via arrow
1022
+ elif datetime_as_string:
1023
+ assert is_string_dtype(result.dates.dtype)
1024
+ if use_arrow and __gdal_version__ < (3, 11, 0):
1025
+ dates_str = df.dates.astype("str").str.replace(" ", "T")
1026
+ dates_str[2] = np.nan
1027
+ else:
1028
+ dates_str = pd.Series(dates_raw, name="dates")
1029
+ if __gdal_version__ < (3, 7, 0):
1030
+ # With GDAL < 3.7, datetime ends with +00 for UTC, not Z
1031
+ dates_str = dates_str.str.replace("Z", "+00")
1032
+ assert_series_equal(result.dates, dates_str)
1033
+ else:
1034
+ assert result.dates.dtype.name in ("datetime64[ms, UTC]", "datetime64[ns, UTC]")
1035
+ assert_geodataframe_equal(result, df)
1036
+
1037
+
1038
+ def test_read_null_values(tmp_path, use_arrow):
1039
+ filename = tmp_path / "test_null_values_no_geometry.gpkg"
1040
+
1041
+ # create a GPKG with no geometries and only null values
1042
+ expected = pd.DataFrame({"col": [None, None]})
1043
+ write_dataframe(expected, filename)
1044
+
1045
+ df = read_dataframe(filename, use_arrow=use_arrow, read_geometry=False)
1046
+
1047
+ # make sure that Null values are preserved
1048
+ assert df["col"].isna().all()
1049
+
1050
+
1051
+ def test_read_fid_as_index(naturalearth_lowres_all_ext, use_arrow):
1052
+ kwargs = {"use_arrow": use_arrow, "skip_features": 2, "max_features": 2}
1053
+
1054
+ # default is to not set FIDs as index
1055
+ df = read_dataframe(naturalearth_lowres_all_ext, **kwargs)
1056
+ assert_index_equal(df.index, pd.RangeIndex(0, 2))
1057
+
1058
+ df = read_dataframe(naturalearth_lowres_all_ext, fid_as_index=False, **kwargs)
1059
+ assert_index_equal(df.index, pd.RangeIndex(0, 2))
1060
+
1061
+ df = read_dataframe(
1062
+ naturalearth_lowres_all_ext,
1063
+ fid_as_index=True,
1064
+ **kwargs,
1065
+ )
1066
+ fids_expected = pd.Index([2, 3], name="fid")
1067
+ fids_expected += START_FID[naturalearth_lowres_all_ext.suffix]
1068
+ assert_index_equal(df.index, fids_expected)
1069
+
1070
+
1071
+ def test_read_fid_as_index_only(naturalearth_lowres, use_arrow):
1072
+ df = read_dataframe(
1073
+ naturalearth_lowres,
1074
+ columns=[],
1075
+ read_geometry=False,
1076
+ fid_as_index=True,
1077
+ use_arrow=use_arrow,
1078
+ )
1079
+ assert df is not None
1080
+ assert len(df) == 177
1081
+ assert len(df.columns) == 0
1082
+
1083
+
1084
+ def test_read_where(naturalearth_lowres_all_ext, use_arrow):
1085
+ # empty filter should return full set of records
1086
+ df = read_dataframe(naturalearth_lowres_all_ext, use_arrow=use_arrow, where="")
1087
+ assert len(df) == 177
1088
+
1089
+ # should return singular item
1090
+ df = read_dataframe(
1091
+ naturalearth_lowres_all_ext, use_arrow=use_arrow, where="iso_a3 = 'CAN'"
1092
+ )
1093
+ assert len(df) == 1
1094
+ assert df.iloc[0].iso_a3 == "CAN"
1095
+
1096
+ df = read_dataframe(
1097
+ naturalearth_lowres_all_ext,
1098
+ use_arrow=use_arrow,
1099
+ where="iso_a3 IN ('CAN', 'USA', 'MEX')",
1100
+ )
1101
+ assert len(df) == 3
1102
+ assert len(set(df.iso_a3.unique()).difference(["CAN", "USA", "MEX"])) == 0
1103
+
1104
+ # should return items within range
1105
+ df = read_dataframe(
1106
+ naturalearth_lowres_all_ext,
1107
+ use_arrow=use_arrow,
1108
+ where="POP_EST >= 10000000 AND POP_EST < 100000000",
1109
+ )
1110
+ assert len(df) == 75
1111
+ assert df.pop_est.min() >= 10000000
1112
+ assert df.pop_est.max() < 100000000
1113
+
1114
+ # should match no items
1115
+ df = read_dataframe(
1116
+ naturalearth_lowres_all_ext, use_arrow=use_arrow, where="ISO_A3 = 'INVALID'"
1117
+ )
1118
+ assert len(df) == 0
1119
+
1120
+
1121
+ def test_read_where_invalid(request, naturalearth_lowres_all_ext, use_arrow):
1122
+ if use_arrow and naturalearth_lowres_all_ext.suffix == ".gpkg":
1123
+ # https://github.com/OSGeo/gdal/issues/8492
1124
+ request.node.add_marker(pytest.mark.xfail(reason="GDAL doesn't error for GPGK"))
1125
+
1126
+ if naturalearth_lowres_all_ext.suffix == ".gpkg" and __gdal_version__ >= (3, 11, 0):
1127
+ with pytest.raises(DataLayerError, match="no such column"):
1128
+ read_dataframe(
1129
+ naturalearth_lowres_all_ext, use_arrow=use_arrow, where="invalid"
1130
+ )
1131
+ else:
1132
+ with pytest.raises(ValueError, match="Invalid SQL"):
1133
+ read_dataframe(
1134
+ naturalearth_lowres_all_ext, use_arrow=use_arrow, where="invalid"
1135
+ )
1136
+
1137
+
1138
+ def test_read_where_ignored_field(naturalearth_lowres, use_arrow):
1139
+ # column included in where is not also included in list of columns, which means
1140
+ # GDAL will return no features
1141
+ # NOTE: this behavior is inconsistent across drivers so only shapefiles are
1142
+ # tested for this
1143
+ df = read_dataframe(
1144
+ naturalearth_lowres,
1145
+ where=""" "iso_a3" = 'CAN' """,
1146
+ columns=["name"],
1147
+ use_arrow=use_arrow,
1148
+ )
1149
+
1150
+ assert len(df) == 0
1151
+
1152
+
1153
+ @pytest.mark.parametrize("bbox", [(1,), (1, 2), (1, 2, 3)])
1154
+ def test_read_bbox_invalid(naturalearth_lowres_all_ext, bbox, use_arrow):
1155
+ with pytest.raises(ValueError, match="Invalid bbox"):
1156
+ read_dataframe(naturalearth_lowres_all_ext, use_arrow=use_arrow, bbox=bbox)
1157
+
1158
+
1159
+ @pytest.mark.parametrize(
1160
+ "bbox,expected",
1161
+ [
1162
+ ((0, 0, 0.00001, 0.00001), []),
1163
+ ((-85, 8, -80, 10), ["PAN", "CRI"]),
1164
+ ((-104, 54, -105, 55), ["CAN"]),
1165
+ ],
1166
+ )
1167
+ def test_read_bbox(naturalearth_lowres_all_ext, use_arrow, bbox, expected):
1168
+ if (
1169
+ use_arrow
1170
+ and __gdal_version__ < (3, 8, 0)
1171
+ and naturalearth_lowres_all_ext.suffix == ".gpkg"
1172
+ ):
1173
+ pytest.xfail(reason="GDAL bug: https://github.com/OSGeo/gdal/issues/8347")
1174
+
1175
+ df = read_dataframe(naturalearth_lowres_all_ext, use_arrow=use_arrow, bbox=bbox)
1176
+
1177
+ assert np.array_equal(df.iso_a3, expected)
1178
+
1179
+
1180
+ def test_read_bbox_sql(naturalearth_lowres_all_ext, use_arrow):
1181
+ df = read_dataframe(
1182
+ naturalearth_lowres_all_ext,
1183
+ use_arrow=use_arrow,
1184
+ bbox=(-180, 50, -100, 90),
1185
+ sql="SELECT * from naturalearth_lowres where iso_a3 not in ('USA', 'RUS')",
1186
+ )
1187
+ assert len(df) == 1
1188
+ assert np.array_equal(df.iso_a3, ["CAN"])
1189
+
1190
+
1191
+ def test_read_bbox_where(naturalearth_lowres_all_ext, use_arrow):
1192
+ df = read_dataframe(
1193
+ naturalearth_lowres_all_ext,
1194
+ use_arrow=use_arrow,
1195
+ bbox=(-180, 50, -100, 90),
1196
+ where="iso_a3 not in ('USA', 'RUS')",
1197
+ )
1198
+ assert len(df) == 1
1199
+ assert np.array_equal(df.iso_a3, ["CAN"])
1200
+
1201
+
1202
+ @pytest.mark.parametrize(
1203
+ "mask",
1204
+ [
1205
+ {"type": "Point", "coordinates": [0, 0]},
1206
+ '{"type": "Point", "coordinates": [0, 0]}',
1207
+ "invalid",
1208
+ ],
1209
+ )
1210
+ def test_read_mask_invalid(naturalearth_lowres, use_arrow, mask):
1211
+ with pytest.raises(ValueError, match="'mask' parameter must be a Shapely geometry"):
1212
+ read_dataframe(naturalearth_lowres, use_arrow=use_arrow, mask=mask)
1213
+
1214
+
1215
+ def test_read_bbox_mask_invalid(naturalearth_lowres, use_arrow):
1216
+ with pytest.raises(ValueError, match="cannot set both 'bbox' and 'mask'"):
1217
+ read_dataframe(
1218
+ naturalearth_lowres,
1219
+ use_arrow=use_arrow,
1220
+ bbox=(-85, 8, -80, 10),
1221
+ mask=shapely.Point(-105, 55),
1222
+ )
1223
+
1224
+
1225
+ @pytest.mark.parametrize(
1226
+ "mask,expected",
1227
+ [
1228
+ (shapely.Point(-105, 55), ["CAN"]),
1229
+ (shapely.box(-85, 8, -80, 10), ["PAN", "CRI"]),
1230
+ (
1231
+ shapely.Polygon(
1232
+ (
1233
+ [6.101929483362767, 50.97085041206964],
1234
+ [5.773001596839322, 50.90661120482673],
1235
+ [5.593156133704326, 50.642648747710325],
1236
+ [6.059271089606312, 50.686051894002475],
1237
+ [6.374064065737485, 50.851481340346965],
1238
+ [6.101929483362767, 50.97085041206964],
1239
+ )
1240
+ ),
1241
+ ["DEU", "BEL", "NLD"],
1242
+ ),
1243
+ (
1244
+ shapely.GeometryCollection(
1245
+ [shapely.Point(-7.7, 53), shapely.box(-85, 8, -80, 10)]
1246
+ ),
1247
+ ["PAN", "CRI", "IRL"],
1248
+ ),
1249
+ ],
1250
+ )
1251
+ def test_read_mask(
1252
+ naturalearth_lowres_all_ext,
1253
+ use_arrow,
1254
+ mask,
1255
+ expected,
1256
+ ):
1257
+ if (
1258
+ use_arrow
1259
+ and __gdal_version__ < (3, 8, 0)
1260
+ and naturalearth_lowres_all_ext.suffix == ".gpkg"
1261
+ ):
1262
+ pytest.xfail(reason="GDAL bug: https://github.com/OSGeo/gdal/issues/8347")
1263
+
1264
+ df = read_dataframe(naturalearth_lowres_all_ext, use_arrow=use_arrow, mask=mask)
1265
+
1266
+ assert len(df) == len(expected)
1267
+ assert np.array_equal(df.iso_a3, expected)
1268
+
1269
+
1270
+ def test_read_mask_sql(naturalearth_lowres_all_ext, use_arrow):
1271
+ df = read_dataframe(
1272
+ naturalearth_lowres_all_ext,
1273
+ use_arrow=use_arrow,
1274
+ mask=shapely.box(-180, 50, -100, 90),
1275
+ sql="SELECT * from naturalearth_lowres where iso_a3 not in ('USA', 'RUS')",
1276
+ )
1277
+ assert len(df) == 1
1278
+ assert np.array_equal(df.iso_a3, ["CAN"])
1279
+
1280
+
1281
+ def test_read_mask_where(naturalearth_lowres_all_ext, use_arrow):
1282
+ df = read_dataframe(
1283
+ naturalearth_lowres_all_ext,
1284
+ use_arrow=use_arrow,
1285
+ mask=shapely.box(-180, 50, -100, 90),
1286
+ where="iso_a3 not in ('USA', 'RUS')",
1287
+ )
1288
+ assert len(df) == 1
1289
+ assert np.array_equal(df.iso_a3, ["CAN"])
1290
+
1291
+
1292
+ @pytest.mark.parametrize("fids", [[1, 5, 10], np.array([1, 5, 10], dtype=np.int64)])
1293
+ def test_read_fids(naturalearth_lowres_all_ext, fids, use_arrow):
1294
+ # ensure keyword is properly passed through
1295
+ df = read_dataframe(
1296
+ naturalearth_lowres_all_ext, fids=fids, fid_as_index=True, use_arrow=use_arrow
1297
+ )
1298
+ assert len(df) == 3
1299
+ assert np.array_equal(fids, df.index.values)
1300
+
1301
+
1302
+ @requires_pyarrow_api
1303
+ def test_read_fids_arrow_max_exception(naturalearth_lowres):
1304
+ # Maximum number at time of writing is 4997 for "OGRSQL". For e.g. for SQLite based
1305
+ # formats like Geopackage, there is no limit.
1306
+ nb_fids = 4998
1307
+ fids = range(nb_fids)
1308
+ with pytest.raises(ValueError, match=f"error applying filter for {nb_fids} fids"):
1309
+ _ = read_dataframe(naturalearth_lowres, fids=fids, use_arrow=True)
1310
+
1311
+
1312
+ @requires_pyarrow_api
1313
+ @pytest.mark.skipif(
1314
+ __gdal_version__ >= (3, 8, 0), reason="GDAL >= 3.8.0 does not need to warn"
1315
+ )
1316
+ def test_read_fids_arrow_warning_old_gdal(naturalearth_lowres_all_ext):
1317
+ # A warning should be given for old GDAL versions, except for some file formats.
1318
+ if naturalearth_lowres_all_ext.suffix not in [".gpkg", ".geojson"]:
1319
+ handler = pytest.warns(
1320
+ UserWarning,
1321
+ match="Using 'fids' and 'use_arrow=True' with GDAL < 3.8 can be slow",
1322
+ )
1323
+ else:
1324
+ handler = contextlib.nullcontext()
1325
+
1326
+ with handler:
1327
+ df = read_dataframe(naturalearth_lowres_all_ext, fids=[22], use_arrow=True)
1328
+ assert len(df) == 1
1329
+
1330
+
1331
+ def test_read_fids_force_2d(tmp_path):
1332
+ filename = tmp_path / "test.gpkg"
1333
+
1334
+ # create a GPKG with 3D point values
1335
+ expected = gp.GeoDataFrame(
1336
+ geometry=[Point(0, 0, 0), Point(1, 1, 0)], crs="EPSG:4326"
1337
+ )
1338
+ write_dataframe(expected, filename)
1339
+
1340
+ df = read_dataframe(filename, fids=[1])
1341
+ assert_geodataframe_equal(df, expected.iloc[:1])
1342
+
1343
+ df = read_dataframe(filename, force_2d=True, fids=[1])
1344
+ assert np.array_equal(
1345
+ df.geometry.values, shapely.force_2d(expected.iloc[:1].geometry.values)
1346
+ )
1347
+
1348
+
1349
+ @pytest.mark.parametrize("skip_features", [10, 200])
1350
+ def test_read_skip_features(naturalearth_lowres_all_ext, use_arrow, skip_features):
1351
+ ext = naturalearth_lowres_all_ext.suffix
1352
+ expected = (
1353
+ read_dataframe(naturalearth_lowres_all_ext)
1354
+ .iloc[skip_features:]
1355
+ .reset_index(drop=True)
1356
+ )
1357
+
1358
+ df = read_dataframe(
1359
+ naturalearth_lowres_all_ext, skip_features=skip_features, use_arrow=use_arrow
1360
+ )
1361
+ assert len(df) == len(expected)
1362
+
1363
+ # Coordinates are not precisely equal when written to JSON
1364
+ # dtypes do not necessarily round-trip precisely through JSON
1365
+ is_json = ext in [".geojson", ".geojsonl"]
1366
+ # In .geojsonl the vertices are reordered, so normalize
1367
+ is_jsons = ext == ".geojsonl"
1368
+
1369
+ if skip_features == 200 and not use_arrow:
1370
+ # result is an empty dataframe, so no proper dtype inference happens
1371
+ # for the numpy object dtype arrays
1372
+ df[["continent", "name", "iso_a3"]] = df[
1373
+ ["continent", "name", "iso_a3"]
1374
+ ].astype("str")
1375
+
1376
+ assert_geodataframe_equal(
1377
+ df,
1378
+ expected,
1379
+ check_less_precise=is_json,
1380
+ check_index_type=False,
1381
+ check_dtype=not is_json,
1382
+ normalize=is_jsons,
1383
+ )
1384
+
1385
+
1386
+ def test_read_negative_skip_features(naturalearth_lowres, use_arrow):
1387
+ with pytest.raises(ValueError, match="'skip_features' must be >= 0"):
1388
+ read_dataframe(naturalearth_lowres, skip_features=-1, use_arrow=use_arrow)
1389
+
1390
+
1391
+ @pytest.mark.parametrize("skip_features", [0, 10, 200])
1392
+ @pytest.mark.parametrize("max_features", [10, 100])
1393
+ def test_read_max_features(
1394
+ naturalearth_lowres_all_ext, use_arrow, max_features, skip_features
1395
+ ):
1396
+ ext = naturalearth_lowres_all_ext.suffix
1397
+ expected = (
1398
+ read_dataframe(naturalearth_lowres_all_ext)
1399
+ .iloc[skip_features : skip_features + max_features]
1400
+ .reset_index(drop=True)
1401
+ )
1402
+ df = read_dataframe(
1403
+ naturalearth_lowres_all_ext,
1404
+ skip_features=skip_features,
1405
+ max_features=max_features,
1406
+ use_arrow=use_arrow,
1407
+ )
1408
+
1409
+ assert len(df) == len(expected)
1410
+
1411
+ # Coordinates are not precisely equal when written to JSON
1412
+ # dtypes do not necessarily round-trip precisely through JSON
1413
+ is_json = ext in [".geojson", ".geojsonl"]
1414
+ # In .geojsonl the vertices are reordered, so normalize
1415
+ is_jsons = ext == ".geojsonl"
1416
+
1417
+ if len(expected) == 0 and not use_arrow:
1418
+ # for pandas >= 3, the column has string dtype but when reading it as
1419
+ # empty result, it gets inferred as object dtype
1420
+ expected["continent"] = expected["continent"].astype("object")
1421
+ expected["name"] = expected["name"].astype("object")
1422
+ expected["iso_a3"] = expected["iso_a3"].astype("object")
1423
+
1424
+ assert_geodataframe_equal(
1425
+ df,
1426
+ expected,
1427
+ check_less_precise=is_json,
1428
+ check_index_type=False,
1429
+ check_dtype=not is_json,
1430
+ normalize=is_jsons,
1431
+ )
1432
+
1433
+
1434
+ def test_read_negative_max_features(naturalearth_lowres, use_arrow):
1435
+ with pytest.raises(ValueError, match="'max_features' must be >= 0"):
1436
+ read_dataframe(naturalearth_lowres, max_features=-1, use_arrow=use_arrow)
1437
+
1438
+
1439
+ def test_read_non_existent_file(use_arrow):
1440
+ # ensure consistent error type / message from GDAL
1441
+ with pytest.raises(DataSourceError, match="No such file or directory"):
1442
+ read_dataframe("non-existent.shp", use_arrow=use_arrow)
1443
+
1444
+ with pytest.raises(DataSourceError, match="does not exist in the file system"):
1445
+ read_dataframe("/vsizip/non-existent.zip", use_arrow=use_arrow)
1446
+
1447
+ with pytest.raises(DataSourceError, match="does not exist in the file system"):
1448
+ read_dataframe("zip:///non-existent.zip", use_arrow=use_arrow)
1449
+
1450
+
1451
+ def test_read_sql(naturalearth_lowres_all_ext, use_arrow):
1452
+ # The geometry column cannot be specified when using the
1453
+ # default OGRSQL dialect but is returned nonetheless, so 4 columns.
1454
+ sql = "SELECT iso_a3 AS iso_a3_renamed, name, pop_est FROM naturalearth_lowres"
1455
+ df = read_dataframe(
1456
+ naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL", use_arrow=use_arrow
1457
+ )
1458
+ assert len(df.columns) == 4
1459
+ assert len(df) == 177
1460
+
1461
+ # Should return single row
1462
+ sql = "SELECT * FROM naturalearth_lowres WHERE iso_a3 = 'CAN'"
1463
+ df = read_dataframe(
1464
+ naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL", use_arrow=use_arrow
1465
+ )
1466
+ assert len(df) == 1
1467
+ assert len(df.columns) == 6
1468
+ assert df.iloc[0].iso_a3 == "CAN"
1469
+
1470
+ sql = """SELECT *
1471
+ FROM naturalearth_lowres
1472
+ WHERE iso_a3 IN ('CAN', 'USA', 'MEX')"""
1473
+ df = read_dataframe(
1474
+ naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL", use_arrow=use_arrow
1475
+ )
1476
+ assert len(df.columns) == 6
1477
+ assert len(df) == 3
1478
+ assert df.iso_a3.tolist() == ["CAN", "USA", "MEX"]
1479
+
1480
+ sql = """SELECT *
1481
+ FROM naturalearth_lowres
1482
+ WHERE iso_a3 IN ('CAN', 'USA', 'MEX')
1483
+ ORDER BY name"""
1484
+ df = read_dataframe(
1485
+ naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL", use_arrow=use_arrow
1486
+ )
1487
+ assert len(df.columns) == 6
1488
+ assert len(df) == 3
1489
+ assert df.iso_a3.tolist() == ["CAN", "MEX", "USA"]
1490
+
1491
+ # Should return items within range.
1492
+ sql = """SELECT *
1493
+ FROM naturalearth_lowres
1494
+ WHERE POP_EST >= 10000000 AND POP_EST < 100000000"""
1495
+ df = read_dataframe(
1496
+ naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL", use_arrow=use_arrow
1497
+ )
1498
+ assert len(df) == 75
1499
+ assert len(df.columns) == 6
1500
+ assert df.pop_est.min() >= 10000000
1501
+ assert df.pop_est.max() < 100000000
1502
+
1503
+ # Should match no items.
1504
+ sql = "SELECT * FROM naturalearth_lowres WHERE ISO_A3 = 'INVALID'"
1505
+ df = read_dataframe(
1506
+ naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL", use_arrow=use_arrow
1507
+ )
1508
+ assert len(df) == 0
1509
+
1510
+
1511
+ def test_read_sql_invalid(naturalearth_lowres_all_ext, use_arrow):
1512
+ if naturalearth_lowres_all_ext.suffix == ".gpkg":
1513
+ with pytest.raises(Exception, match="In ExecuteSQL().*"):
1514
+ read_dataframe(
1515
+ naturalearth_lowres_all_ext, sql="invalid", use_arrow=use_arrow
1516
+ )
1517
+ else:
1518
+ with pytest.raises(Exception, match="SQL Expression Parsing Error"):
1519
+ read_dataframe(
1520
+ naturalearth_lowres_all_ext, sql="invalid", use_arrow=use_arrow
1521
+ )
1522
+
1523
+ with pytest.raises(
1524
+ ValueError, match="'sql' parameter cannot be combined with 'layer'"
1525
+ ):
1526
+ read_dataframe(
1527
+ naturalearth_lowres_all_ext,
1528
+ sql="whatever",
1529
+ layer="invalid",
1530
+ use_arrow=use_arrow,
1531
+ )
1532
+
1533
+
1534
+ def test_read_sql_columns_where(naturalearth_lowres_all_ext, use_arrow):
1535
+ sql = "SELECT iso_a3 AS iso_a3_renamed, name, pop_est FROM naturalearth_lowres"
1536
+ df = read_dataframe(
1537
+ naturalearth_lowres_all_ext,
1538
+ sql=sql,
1539
+ sql_dialect="OGRSQL",
1540
+ columns=["iso_a3_renamed", "name"],
1541
+ where="iso_a3_renamed IN ('CAN', 'USA', 'MEX')",
1542
+ use_arrow=use_arrow,
1543
+ )
1544
+ assert len(df.columns) == 3
1545
+ assert len(df) == 3
1546
+ assert df.iso_a3_renamed.tolist() == ["CAN", "USA", "MEX"]
1547
+
1548
+
1549
+ def test_read_sql_columns_where_bbox(naturalearth_lowres_all_ext, use_arrow):
1550
+ sql = "SELECT iso_a3 AS iso_a3_renamed, name, pop_est FROM naturalearth_lowres"
1551
+ df = read_dataframe(
1552
+ naturalearth_lowres_all_ext,
1553
+ sql=sql,
1554
+ sql_dialect="OGRSQL",
1555
+ columns=["iso_a3_renamed", "name"],
1556
+ where="iso_a3_renamed IN ('CRI', 'PAN')",
1557
+ bbox=(-85, 8, -80, 10),
1558
+ use_arrow=use_arrow,
1559
+ )
1560
+ assert len(df.columns) == 3
1561
+ assert len(df) == 2
1562
+ assert df.iso_a3_renamed.tolist() == ["PAN", "CRI"]
1563
+
1564
+
1565
+ def test_read_sql_skip_max(naturalearth_lowres_all_ext, use_arrow):
1566
+ sql = """SELECT *
1567
+ FROM naturalearth_lowres
1568
+ WHERE iso_a3 IN ('CAN', 'MEX', 'USA')
1569
+ ORDER BY name"""
1570
+ df = read_dataframe(
1571
+ naturalearth_lowres_all_ext,
1572
+ sql=sql,
1573
+ skip_features=1,
1574
+ max_features=1,
1575
+ sql_dialect="OGRSQL",
1576
+ use_arrow=use_arrow,
1577
+ )
1578
+ assert len(df.columns) == 6
1579
+ assert len(df) == 1
1580
+ assert df.iso_a3.tolist() == ["MEX"]
1581
+
1582
+ sql = "SELECT * FROM naturalearth_lowres LIMIT 1"
1583
+ df = read_dataframe(
1584
+ naturalearth_lowres_all_ext,
1585
+ sql=sql,
1586
+ max_features=3,
1587
+ sql_dialect="OGRSQL",
1588
+ use_arrow=use_arrow,
1589
+ )
1590
+ assert len(df) == 1
1591
+
1592
+ sql = "SELECT * FROM naturalearth_lowres LIMIT 1"
1593
+ df = read_dataframe(
1594
+ naturalearth_lowres_all_ext,
1595
+ sql=sql,
1596
+ sql_dialect="OGRSQL",
1597
+ skip_features=1,
1598
+ use_arrow=use_arrow,
1599
+ )
1600
+ assert len(df) == 0
1601
+
1602
+
1603
+ @requires_gdal_geos
1604
+ @pytest.mark.parametrize(
1605
+ "naturalearth_lowres",
1606
+ [ext for ext in ALL_EXTS if ext != ".gpkg"],
1607
+ indirect=["naturalearth_lowres"],
1608
+ )
1609
+ def test_read_sql_dialect_sqlite_nogpkg(naturalearth_lowres, use_arrow):
1610
+ # Should return singular item
1611
+ sql = "SELECT * FROM naturalearth_lowres WHERE iso_a3 = 'CAN'"
1612
+ df = read_dataframe(
1613
+ naturalearth_lowres, sql=sql, sql_dialect="SQLITE", use_arrow=use_arrow
1614
+ )
1615
+ assert len(df) == 1
1616
+ assert len(df.columns) == 6
1617
+ assert df.iloc[0].iso_a3 == "CAN"
1618
+ area_canada = df.iloc[0].geometry.area
1619
+
1620
+ # Use spatialite function
1621
+ sql = """SELECT ST_Buffer(geometry, 5) AS geometry, name, pop_est, iso_a3
1622
+ FROM naturalearth_lowres
1623
+ WHERE ISO_A3 = 'CAN'"""
1624
+ df = read_dataframe(
1625
+ naturalearth_lowres, sql=sql, sql_dialect="SQLITE", use_arrow=use_arrow
1626
+ )
1627
+ assert len(df) == 1
1628
+ assert len(df.columns) == 4
1629
+ assert df.iloc[0].geometry.area > area_canada
1630
+
1631
+
1632
+ @requires_gdal_geos
1633
+ @pytest.mark.parametrize(
1634
+ "naturalearth_lowres", [".gpkg"], indirect=["naturalearth_lowres"]
1635
+ )
1636
+ def test_read_sql_dialect_sqlite_gpkg(naturalearth_lowres, use_arrow):
1637
+ # "INDIRECT_SQL" prohibits GDAL from passing the SQL statement to sqlite.
1638
+ # Because the statement is processed within GDAL it is possible to use
1639
+ # spatialite functions even if sqlite isn't built with spatialite support.
1640
+ sql = "SELECT * FROM naturalearth_lowres WHERE iso_a3 = 'CAN'"
1641
+ df = read_dataframe(
1642
+ naturalearth_lowres, sql=sql, sql_dialect="INDIRECT_SQLITE", use_arrow=use_arrow
1643
+ )
1644
+ assert len(df) == 1
1645
+ assert len(df.columns) == 6
1646
+ assert df.iloc[0].iso_a3 == "CAN"
1647
+ area_canada = df.iloc[0].geometry.area
1648
+
1649
+ # Use spatialite function
1650
+ sql = """SELECT ST_Buffer(geom, 5) AS geometry, name, pop_est, iso_a3
1651
+ FROM naturalearth_lowres
1652
+ WHERE ISO_A3 = 'CAN'"""
1653
+ df = read_dataframe(
1654
+ naturalearth_lowres, sql=sql, sql_dialect="INDIRECT_SQLITE", use_arrow=use_arrow
1655
+ )
1656
+ assert len(df) == 1
1657
+ assert len(df.columns) == 4
1658
+ assert df.iloc[0].geometry.area > area_canada
1659
+
1660
+
1661
+ @pytest.mark.parametrize(
1662
+ "encoding, arrow",
1663
+ [
1664
+ ("utf-8", False),
1665
+ pytest.param("utf-8", True, marks=requires_arrow_write_api),
1666
+ ("cp1252", False),
1667
+ (None, False),
1668
+ ],
1669
+ )
1670
+ def test_write_csv_encoding(tmp_path, encoding, arrow):
1671
+ """Test if write_dataframe uses the default encoding correctly.
1672
+
1673
+ Arrow only supports utf-8 encoding.
1674
+ """
1675
+ # Write csv test file. Depending on the os this will be written in a different
1676
+ # encoding: for linux and macos this is utf-8, for windows it is cp1252.
1677
+ csv_path = tmp_path / "test.csv"
1678
+
1679
+ with open(csv_path, "w", encoding=encoding) as csv:
1680
+ csv.write("näme,city\n")
1681
+ csv.write("Wilhelm Röntgen,Zürich\n")
1682
+
1683
+ # Write csv test file with the same data using write_dataframe. It should use the
1684
+ # same encoding as above.
1685
+ df = pd.DataFrame({"näme": ["Wilhelm Röntgen"], "city": ["Zürich"]})
1686
+ csv_pyogrio_path = tmp_path / "test_pyogrio.csv"
1687
+ write_dataframe(df, csv_pyogrio_path, encoding=encoding, use_arrow=arrow)
1688
+
1689
+ # Check if the text files written both ways can be read again and give same result.
1690
+ with open(csv_path, encoding=encoding) as csv:
1691
+ csv_str = csv.read()
1692
+ with open(csv_pyogrio_path, encoding=encoding) as csv_pyogrio:
1693
+ csv_pyogrio_str = csv_pyogrio.read()
1694
+ assert csv_str == csv_pyogrio_str
1695
+
1696
+ # Check if they files are binary identical, to be 100% sure they were written with
1697
+ # the same encoding.
1698
+ with open(csv_path, "rb") as csv:
1699
+ csv_bytes = csv.read()
1700
+ with open(csv_pyogrio_path, "rb") as csv_pyogrio:
1701
+ csv_pyogrio_bytes = csv_pyogrio.read()
1702
+ assert csv_bytes == csv_pyogrio_bytes
1703
+
1704
+
1705
+ @pytest.mark.parametrize(
1706
+ "ext, fid_column, fid_param_value",
1707
+ [
1708
+ (".gpkg", "fid", None),
1709
+ (".gpkg", "FID", None),
1710
+ (".sqlite", "ogc_fid", None),
1711
+ (".gpkg", "fid_custom", "fid_custom"),
1712
+ (".gpkg", "FID_custom", "fid_custom"),
1713
+ (".sqlite", "ogc_fid_custom", "ogc_fid_custom"),
1714
+ ],
1715
+ )
1716
+ @pytest.mark.requires_arrow_write_api
1717
+ def test_write_custom_fids(tmp_path, ext, fid_column, fid_param_value, use_arrow):
1718
+ """Test to specify FIDs to save when writing to a file.
1719
+
1720
+ Saving custom FIDs is only supported for formats that actually store the FID, like
1721
+ e.g. GPKG and SQLite. The fid_column name check is case-insensitive.
1722
+
1723
+ Typically, GDAL supports using a custom FID column for these file formats via a
1724
+ `FID` layer creation option, which is also tested here. If `fid_param_value` is
1725
+ specified (not None), an `fid` parameter is passed to `write_dataframe`, causing
1726
+ GDAL to use the column name specified for the FID.
1727
+ """
1728
+ input_gdf = gp.GeoDataFrame(
1729
+ {fid_column: [5]}, geometry=[shapely.Point(0, 0)], crs="epsg:4326"
1730
+ )
1731
+ kwargs = {}
1732
+ if fid_param_value is not None:
1733
+ kwargs["fid"] = fid_param_value
1734
+ path = tmp_path / f"test{ext}"
1735
+
1736
+ write_dataframe(input_gdf, path, use_arrow=use_arrow, **kwargs)
1737
+
1738
+ assert path.exists()
1739
+ output_gdf = read_dataframe(path, fid_as_index=True, use_arrow=use_arrow)
1740
+ output_gdf = output_gdf.reset_index()
1741
+
1742
+ # pyogrio always sets "fid" as index name with `fid_as_index`
1743
+ expected_gdf = input_gdf.rename(columns={fid_column: "fid"})
1744
+ assert_geodataframe_equal(output_gdf, expected_gdf)
1745
+
1746
+
1747
+ @pytest.mark.parametrize("ext", ALL_EXTS)
1748
+ @pytest.mark.requires_arrow_write_api
1749
+ def test_write_dataframe(tmp_path, naturalearth_lowres, ext, use_arrow):
1750
+ input_gdf = read_dataframe(naturalearth_lowres)
1751
+ output_path = tmp_path / f"test{ext}"
1752
+
1753
+ if ext == ".fgb":
1754
+ # For .fgb, spatial_index=False to avoid the rows being reordered
1755
+ write_dataframe(
1756
+ input_gdf, output_path, use_arrow=use_arrow, spatial_index=False
1757
+ )
1758
+ else:
1759
+ write_dataframe(input_gdf, output_path, use_arrow=use_arrow)
1760
+
1761
+ assert output_path.exists()
1762
+ result_gdf = read_dataframe(output_path)
1763
+
1764
+ geometry_types = result_gdf.geometry.type.unique()
1765
+ if DRIVERS[ext] in DRIVERS_NO_MIXED_SINGLE_MULTI:
1766
+ assert list(geometry_types) == ["MultiPolygon"]
1767
+ else:
1768
+ assert set(geometry_types) == {"MultiPolygon", "Polygon"}
1769
+
1770
+ # Coordinates are not precisely equal when written to JSON
1771
+ # dtypes do not necessarily round-trip precisely through JSON
1772
+ is_json = ext in [".geojson", ".geojsonl"]
1773
+ # In .geojsonl the vertices are reordered, so normalize
1774
+ is_jsons = ext == ".geojsonl"
1775
+
1776
+ assert_geodataframe_equal(
1777
+ result_gdf,
1778
+ input_gdf,
1779
+ check_less_precise=is_json,
1780
+ check_index_type=False,
1781
+ check_dtype=not is_json,
1782
+ normalize=is_jsons,
1783
+ )
1784
+
1785
+
1786
+ @pytest.mark.filterwarnings("ignore:.*No SRS set on layer.*")
1787
+ @pytest.mark.parametrize("write_geodf", [True, False])
1788
+ @pytest.mark.parametrize("ext", [ext for ext in ALL_EXTS + [".xlsx"] if ext != ".fgb"])
1789
+ @pytest.mark.requires_arrow_write_api
1790
+ def test_write_dataframe_no_geom(
1791
+ request, tmp_path, naturalearth_lowres, write_geodf, ext, use_arrow
1792
+ ):
1793
+ """Test writing a (geo)dataframe without a geometry column.
1794
+
1795
+ FlatGeobuf (.fgb) doesn't seem to support this, and just writes an empty file.
1796
+ """
1797
+ # Prepare test data
1798
+ input_df = read_dataframe(naturalearth_lowres, read_geometry=False)
1799
+ if write_geodf:
1800
+ input_df = gp.GeoDataFrame(input_df)
1801
+
1802
+ output_path = tmp_path / f"test{ext}"
1803
+
1804
+ # A shapefile without geometry column results in only a .dbf file.
1805
+ if ext == ".shp":
1806
+ output_path = output_path.with_suffix(".dbf")
1807
+
1808
+ # Determine driver
1809
+ driver = DRIVERS[ext] if ext != ".xlsx" else "XLSX"
1810
+
1811
+ write_dataframe(input_df, output_path, use_arrow=use_arrow, driver=driver)
1812
+
1813
+ assert output_path.exists()
1814
+ result_df = read_dataframe(output_path)
1815
+
1816
+ assert isinstance(result_df, pd.DataFrame)
1817
+
1818
+ # some dtypes do not round-trip precisely through these file types
1819
+ check_dtype = ext not in [".geojson", ".geojsonl", ".xlsx"]
1820
+
1821
+ if ext in [".gpkg", ".shp", ".xlsx"]:
1822
+ # These file types return a DataFrame when read.
1823
+ assert not isinstance(result_df, gp.GeoDataFrame)
1824
+ if isinstance(input_df, gp.GeoDataFrame):
1825
+ input_df = pd.DataFrame(input_df)
1826
+
1827
+ pd.testing.assert_frame_equal(
1828
+ result_df, input_df, check_index_type=False, check_dtype=check_dtype
1829
+ )
1830
+ else:
1831
+ # These file types return a GeoDataFrame with None Geometries when read.
1832
+ input_none_geom_gdf = gp.GeoDataFrame(
1833
+ input_df, geometry=np.repeat(None, len(input_df)), crs=4326
1834
+ )
1835
+ assert_geodataframe_equal(
1836
+ result_df,
1837
+ input_none_geom_gdf,
1838
+ check_index_type=False,
1839
+ check_dtype=check_dtype,
1840
+ )
1841
+
1842
+
1843
+ @pytest.mark.requires_arrow_write_api
1844
+ def test_write_dataframe_index(tmp_path, naturalearth_lowres, use_arrow):
1845
+ # dataframe writing ignores the index
1846
+ input_gdf = read_dataframe(naturalearth_lowres)
1847
+ input_gdf = input_gdf.set_index("iso_a3")
1848
+
1849
+ output_path = tmp_path / "test.shp"
1850
+ write_dataframe(input_gdf, output_path, use_arrow=use_arrow)
1851
+
1852
+ result_gdf = read_dataframe(output_path)
1853
+ assert isinstance(result_gdf.index, pd.RangeIndex)
1854
+ assert_geodataframe_equal(result_gdf, input_gdf.reset_index(drop=True))
1855
+
1856
+
1857
+ @pytest.mark.parametrize("ext", [ext for ext in ALL_EXTS if ext not in ".geojsonl"])
1858
+ @pytest.mark.parametrize(
1859
+ "columns, dtype",
1860
+ [
1861
+ ([], None),
1862
+ (["col_int"], np.int64),
1863
+ (["col_float"], np.float64),
1864
+ (["col_object"], object),
1865
+ ],
1866
+ )
1867
+ @pytest.mark.requires_arrow_write_api
1868
+ def test_write_empty_dataframe(tmp_path, ext, columns, dtype, use_arrow):
1869
+ """Test writing dataframe with no rows.
1870
+
1871
+ With use_arrow, object type columns with no rows are converted to null type columns
1872
+ by pyarrow, but null columns are not supported by GDAL. Added to test fix for #513.
1873
+ """
1874
+ expected = gp.GeoDataFrame(geometry=[], columns=columns, dtype=dtype, crs=4326)
1875
+ filename = tmp_path / f"test{ext}"
1876
+ write_dataframe(expected, filename, use_arrow=use_arrow)
1877
+
1878
+ assert filename.exists()
1879
+ df = read_dataframe(filename, use_arrow=use_arrow)
1880
+
1881
+ # Check result
1882
+ # For older pandas versions, the index is created as Object dtype but read as
1883
+ # RangeIndex, so don't check the index dtype in that case.
1884
+ check_index_type = True if PANDAS_GE_20 else False
1885
+ # with pandas 3+ and reading through arrow, we preserve the string dtype
1886
+ # (no proper dtype inference happens for the empty numpy object dtype arrays)
1887
+ if use_arrow and dtype is object:
1888
+ expected["col_object"] = expected["col_object"].astype("str")
1889
+ assert_geodataframe_equal(df, expected, check_index_type=check_index_type)
1890
+
1891
+
1892
+ def test_write_empty_geometry(tmp_path):
1893
+ expected = gp.GeoDataFrame({"x": [0]}, geometry=from_wkt(["POINT EMPTY"]), crs=4326)
1894
+ filename = tmp_path / "test.gpkg"
1895
+
1896
+ # Check that no warning is raised with GeoSeries.notna()
1897
+ with warnings.catch_warnings():
1898
+ warnings.simplefilter("error", UserWarning)
1899
+ if not HAS_PYPROJ:
1900
+ warnings.filterwarnings("ignore", message="'crs' was not provided.")
1901
+ write_dataframe(expected, filename)
1902
+ assert filename.exists()
1903
+
1904
+ # Xref GH-436: round-tripping possible with GPKG but not others
1905
+ df = read_dataframe(filename)
1906
+ assert_geodataframe_equal(df, expected)
1907
+
1908
+
1909
+ @pytest.mark.requires_arrow_write_api
1910
+ def test_write_None_string_column(tmp_path, use_arrow):
1911
+ """Test pandas object columns with all None values.
1912
+
1913
+ With use_arrow, such columns are converted to null type columns by pyarrow, but null
1914
+ columns are not supported by GDAL. Added to test fix for #513.
1915
+ """
1916
+ gdf = gp.GeoDataFrame({"object_col": [None]}, geometry=[Point(0, 0)], crs=4326)
1917
+ filename = tmp_path / "test.gpkg"
1918
+
1919
+ write_dataframe(gdf, filename, use_arrow=use_arrow)
1920
+ assert filename.exists()
1921
+
1922
+ result_gdf = read_dataframe(filename, use_arrow=use_arrow)
1923
+ if (
1924
+ PANDAS_GE_30 or (PANDAS_GE_23 and pd.options.future.infer_string)
1925
+ ) and use_arrow:
1926
+ assert result_gdf.object_col.dtype == "str"
1927
+ gdf["object_col"] = gdf["object_col"].astype("str")
1928
+ else:
1929
+ assert result_gdf.object_col.dtype == object
1930
+ assert_geodataframe_equal(result_gdf, gdf)
1931
+
1932
+
1933
+ @pytest.mark.parametrize("ext", [".geojsonl", ".geojsons"])
1934
+ @pytest.mark.requires_arrow_write_api
1935
+ def test_write_read_empty_dataframe_unsupported(tmp_path, ext, use_arrow):
1936
+ # Writing empty dataframe to .geojsons or .geojsonl results logically in a 0 byte
1937
+ # file, but gdal isn't able to read those again at the time of writing.
1938
+ # Issue logged here: https://github.com/geopandas/pyogrio/issues/94
1939
+ expected = gp.GeoDataFrame(geometry=[], crs=4326)
1940
+
1941
+ filename = tmp_path / f"test{ext}"
1942
+ write_dataframe(expected, filename, use_arrow=use_arrow)
1943
+
1944
+ assert filename.exists()
1945
+ with pytest.raises(
1946
+ Exception, match=".* not recognized as( being in)? a supported file format."
1947
+ ):
1948
+ _ = read_dataframe(filename, use_arrow=use_arrow)
1949
+
1950
+
1951
+ @pytest.mark.requires_arrow_write_api
1952
+ def test_write_dataframe_gpkg_multiple_layers(tmp_path, naturalearth_lowres, use_arrow):
1953
+ input_gdf = read_dataframe(naturalearth_lowres)
1954
+ filename = tmp_path / "test.gpkg"
1955
+
1956
+ write_dataframe(
1957
+ input_gdf,
1958
+ filename,
1959
+ layer="first",
1960
+ promote_to_multi=True,
1961
+ use_arrow=use_arrow,
1962
+ )
1963
+
1964
+ assert filename.exists()
1965
+ assert np.array_equal(list_layers(filename), [["first", "MultiPolygon"]])
1966
+
1967
+ write_dataframe(
1968
+ input_gdf,
1969
+ filename,
1970
+ layer="second",
1971
+ promote_to_multi=True,
1972
+ use_arrow=use_arrow,
1973
+ )
1974
+ assert np.array_equal(
1975
+ list_layers(filename),
1976
+ [["first", "MultiPolygon"], ["second", "MultiPolygon"]],
1977
+ )
1978
+
1979
+
1980
+ @pytest.mark.parametrize("ext", ALL_EXTS)
1981
+ @pytest.mark.requires_arrow_write_api
1982
+ def test_write_dataframe_append(request, tmp_path, naturalearth_lowres, ext, use_arrow):
1983
+ if use_arrow and ext.startswith(".geojson"):
1984
+ # Bug in GDAL when appending int64 to GeoJSON
1985
+ # (https://github.com/OSGeo/gdal/issues/9792)
1986
+ request.node.add_marker(
1987
+ pytest.mark.xfail(reason="Bugs with append when writing Arrow to GeoJSON")
1988
+ )
1989
+
1990
+ input_gdf = read_dataframe(naturalearth_lowres)
1991
+ filename = tmp_path / f"test{ext}"
1992
+
1993
+ write_dataframe(input_gdf, filename, use_arrow=use_arrow)
1994
+
1995
+ filename.exists()
1996
+ assert len(read_dataframe(filename)) == 177
1997
+
1998
+ write_dataframe(input_gdf, filename, use_arrow=use_arrow, append=True)
1999
+ assert len(read_dataframe(filename)) == 354
2000
+
2001
+
2002
+ @pytest.mark.parametrize("spatial_index", [False, True])
2003
+ @pytest.mark.requires_arrow_write_api
2004
+ def test_write_dataframe_gdal_options(
2005
+ tmp_path, naturalearth_lowres, spatial_index, use_arrow
2006
+ ):
2007
+ df = read_dataframe(naturalearth_lowres)
2008
+
2009
+ outfilename1 = tmp_path / "test1.shp"
2010
+ write_dataframe(
2011
+ df,
2012
+ outfilename1,
2013
+ use_arrow=use_arrow,
2014
+ SPATIAL_INDEX="YES" if spatial_index else "NO",
2015
+ )
2016
+ assert outfilename1.exists() is True
2017
+ index_filename1 = tmp_path / "test1.qix"
2018
+ assert index_filename1.exists() is spatial_index
2019
+
2020
+ # using explicit layer_options instead
2021
+ outfilename2 = tmp_path / "test2.shp"
2022
+ write_dataframe(
2023
+ df,
2024
+ outfilename2,
2025
+ use_arrow=use_arrow,
2026
+ layer_options={"spatial_index": spatial_index},
2027
+ )
2028
+ assert outfilename2.exists() is True
2029
+ index_filename2 = tmp_path / "test2.qix"
2030
+ assert index_filename2.exists() is spatial_index
2031
+
2032
+
2033
+ @pytest.mark.requires_arrow_write_api
2034
+ def test_write_dataframe_gdal_options_unknown(tmp_path, naturalearth_lowres, use_arrow):
2035
+ df = read_dataframe(naturalearth_lowres)
2036
+
2037
+ # geojson has no spatial index, so passing keyword should raise
2038
+ outfilename = tmp_path / "test.geojson"
2039
+ with pytest.raises(ValueError, match="unrecognized option 'SPATIAL_INDEX'"):
2040
+ write_dataframe(df, outfilename, use_arrow=use_arrow, spatial_index=True)
2041
+
2042
+
2043
+ def _get_gpkg_table_names(path):
2044
+ import sqlite3
2045
+
2046
+ con = sqlite3.connect(path)
2047
+ cursor = con.cursor()
2048
+ cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
2049
+ result = cursor.fetchall()
2050
+ return [res[0] for res in result]
2051
+
2052
+
2053
+ @pytest.mark.requires_arrow_write_api
2054
+ def test_write_dataframe_gdal_options_dataset(tmp_path, naturalearth_lowres, use_arrow):
2055
+ df = read_dataframe(naturalearth_lowres)
2056
+
2057
+ test_default_filename = tmp_path / "test_default.gpkg"
2058
+ write_dataframe(df, test_default_filename, use_arrow=use_arrow)
2059
+ assert "gpkg_ogr_contents" in _get_gpkg_table_names(test_default_filename)
2060
+
2061
+ test_no_contents_filename = tmp_path / "test_no_contents.gpkg"
2062
+ write_dataframe(
2063
+ df, test_default_filename, use_arrow=use_arrow, ADD_GPKG_OGR_CONTENTS="NO"
2064
+ )
2065
+ assert "gpkg_ogr_contents" not in _get_gpkg_table_names(test_no_contents_filename)
2066
+
2067
+ test_no_contents_filename2 = tmp_path / "test_no_contents2.gpkg"
2068
+ write_dataframe(
2069
+ df,
2070
+ test_no_contents_filename2,
2071
+ use_arrow=use_arrow,
2072
+ dataset_options={"add_gpkg_ogr_contents": False},
2073
+ )
2074
+ assert "gpkg_ogr_contents" not in _get_gpkg_table_names(test_no_contents_filename2)
2075
+
2076
+
2077
+ @pytest.mark.parametrize(
2078
+ "ext, promote_to_multi, expected_geometry_types, expected_geometry_type",
2079
+ [
2080
+ (".fgb", None, ["MultiPolygon"], "MultiPolygon"),
2081
+ (".fgb", True, ["MultiPolygon"], "MultiPolygon"),
2082
+ (".fgb", False, ["MultiPolygon", "Polygon"], "Unknown"),
2083
+ (".geojson", None, ["MultiPolygon", "Polygon"], "Unknown"),
2084
+ (".geojson", True, ["MultiPolygon"], "MultiPolygon"),
2085
+ (".geojson", False, ["MultiPolygon", "Polygon"], "Unknown"),
2086
+ ],
2087
+ )
2088
+ @pytest.mark.requires_arrow_write_api
2089
+ def test_write_dataframe_promote_to_multi(
2090
+ tmp_path,
2091
+ naturalearth_lowres,
2092
+ ext,
2093
+ promote_to_multi,
2094
+ expected_geometry_types,
2095
+ expected_geometry_type,
2096
+ use_arrow,
2097
+ ):
2098
+ input_gdf = read_dataframe(naturalearth_lowres)
2099
+
2100
+ output_path = tmp_path / f"test_promote{ext}"
2101
+ write_dataframe(
2102
+ input_gdf, output_path, use_arrow=use_arrow, promote_to_multi=promote_to_multi
2103
+ )
2104
+
2105
+ assert output_path.exists()
2106
+ output_gdf = read_dataframe(output_path)
2107
+ geometry_types = sorted(output_gdf.geometry.type.unique())
2108
+ assert geometry_types == expected_geometry_types
2109
+ assert read_info(output_path)["geometry_type"] == expected_geometry_type
2110
+
2111
+
2112
+ @pytest.mark.parametrize(
2113
+ "ext, promote_to_multi, geometry_type, "
2114
+ "expected_geometry_types, expected_geometry_type",
2115
+ [
2116
+ (".fgb", None, "Unknown", ["MultiPolygon"], "Unknown"),
2117
+ (".geojson", False, "Unknown", ["MultiPolygon", "Polygon"], "Unknown"),
2118
+ (".geojson", None, "Unknown", ["MultiPolygon", "Polygon"], "Unknown"),
2119
+ (".geojson", None, "Polygon", ["MultiPolygon", "Polygon"], "Unknown"),
2120
+ (".geojson", None, "MultiPolygon", ["MultiPolygon", "Polygon"], "Unknown"),
2121
+ (".geojson", None, "Point", ["MultiPolygon", "Polygon"], "Unknown"),
2122
+ (".geojson", True, "Unknown", ["MultiPolygon"], "MultiPolygon"),
2123
+ (".gpkg", False, "Unknown", ["MultiPolygon", "Polygon"], "Unknown"),
2124
+ (".gpkg", None, "Unknown", ["MultiPolygon"], "Unknown"),
2125
+ (".gpkg", None, "Polygon", ["MultiPolygon"], "Polygon"),
2126
+ (".gpkg", None, "MultiPolygon", ["MultiPolygon"], "MultiPolygon"),
2127
+ (".gpkg", None, "Point", ["MultiPolygon"], "Point"),
2128
+ (".gpkg", True, "Unknown", ["MultiPolygon"], "Unknown"),
2129
+ (".shp", False, "Unknown", ["MultiPolygon", "Polygon"], "Polygon"),
2130
+ (".shp", None, "Unknown", ["MultiPolygon", "Polygon"], "Polygon"),
2131
+ (".shp", None, "Polygon", ["MultiPolygon", "Polygon"], "Polygon"),
2132
+ (".shp", None, "MultiPolygon", ["MultiPolygon", "Polygon"], "Polygon"),
2133
+ (".shp", True, "Unknown", ["MultiPolygon", "Polygon"], "Polygon"),
2134
+ ],
2135
+ )
2136
+ @pytest.mark.requires_arrow_write_api
2137
+ def test_write_dataframe_promote_to_multi_layer_geom_type(
2138
+ tmp_path,
2139
+ naturalearth_lowres,
2140
+ ext,
2141
+ promote_to_multi,
2142
+ geometry_type,
2143
+ expected_geometry_types,
2144
+ expected_geometry_type,
2145
+ use_arrow,
2146
+ ):
2147
+ input_gdf = read_dataframe(naturalearth_lowres)
2148
+
2149
+ output_path = tmp_path / f"test_promote_layer_geom_type{ext}"
2150
+
2151
+ if ext == ".gpkg" and geometry_type in ("Polygon", "Point"):
2152
+ ctx = pytest.warns(
2153
+ RuntimeWarning, match="A geometry of type MULTIPOLYGON is inserted"
2154
+ )
2155
+ else:
2156
+ ctx = contextlib.nullcontext()
2157
+
2158
+ with ctx:
2159
+ write_dataframe(
2160
+ input_gdf,
2161
+ output_path,
2162
+ use_arrow=use_arrow,
2163
+ promote_to_multi=promote_to_multi,
2164
+ geometry_type=geometry_type,
2165
+ )
2166
+
2167
+ assert output_path.exists()
2168
+ output_gdf = read_dataframe(output_path)
2169
+ geometry_types = sorted(output_gdf.geometry.type.unique())
2170
+ assert geometry_types == expected_geometry_types
2171
+ assert read_info(output_path)["geometry_type"] == expected_geometry_type
2172
+
2173
+
2174
+ @pytest.mark.parametrize(
2175
+ "ext, promote_to_multi, geometry_type, expected_raises_match",
2176
+ [
2177
+ (".fgb", False, "MultiPolygon", "Mismatched geometry type"),
2178
+ (".fgb", False, "Polygon", "Mismatched geometry type"),
2179
+ (".fgb", None, "Point", "Mismatched geometry type"),
2180
+ (".fgb", None, "Polygon", "Mismatched geometry type"),
2181
+ (
2182
+ ".shp",
2183
+ None,
2184
+ "Point",
2185
+ "Could not add feature to layer at index|Error while writing batch to OGR "
2186
+ "layer",
2187
+ ),
2188
+ ],
2189
+ )
2190
+ @pytest.mark.requires_arrow_write_api
2191
+ def test_write_dataframe_promote_to_multi_layer_geom_type_invalid(
2192
+ tmp_path,
2193
+ naturalearth_lowres,
2194
+ ext,
2195
+ promote_to_multi,
2196
+ geometry_type,
2197
+ expected_raises_match,
2198
+ use_arrow,
2199
+ ):
2200
+ input_gdf = read_dataframe(naturalearth_lowres)
2201
+
2202
+ output_path = tmp_path / f"test{ext}"
2203
+ with pytest.raises((FeatureError, DataLayerError), match=expected_raises_match):
2204
+ write_dataframe(
2205
+ input_gdf,
2206
+ output_path,
2207
+ use_arrow=use_arrow,
2208
+ promote_to_multi=promote_to_multi,
2209
+ geometry_type=geometry_type,
2210
+ )
2211
+
2212
+
2213
+ @pytest.mark.requires_arrow_write_api
2214
+ def test_write_dataframe_layer_geom_type_invalid(
2215
+ tmp_path, naturalearth_lowres, use_arrow
2216
+ ):
2217
+ df = read_dataframe(naturalearth_lowres)
2218
+
2219
+ filename = tmp_path / "test.geojson"
2220
+ with pytest.raises(
2221
+ GeometryError, match="Geometry type is not supported: NotSupported"
2222
+ ):
2223
+ write_dataframe(df, filename, use_arrow=use_arrow, geometry_type="NotSupported")
2224
+
2225
+
2226
+ @pytest.mark.parametrize("ext", [ext for ext in ALL_EXTS if ext not in ".shp"])
2227
+ @pytest.mark.requires_arrow_write_api
2228
+ def test_write_dataframe_truly_mixed(tmp_path, ext, use_arrow):
2229
+ geometry = [
2230
+ shapely.Point(0, 0),
2231
+ shapely.LineString([(0, 0), (1, 1)]),
2232
+ shapely.box(0, 0, 1, 1),
2233
+ shapely.MultiPoint([shapely.Point(1, 1), shapely.Point(2, 2)]),
2234
+ shapely.MultiLineString(
2235
+ [shapely.LineString([(1, 1), (2, 2)]), shapely.LineString([(2, 2), (3, 3)])]
2236
+ ),
2237
+ shapely.MultiPolygon([shapely.box(1, 1, 2, 2), shapely.box(2, 2, 3, 3)]),
2238
+ ]
2239
+
2240
+ df = gp.GeoDataFrame(
2241
+ {"col": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]}, geometry=geometry, crs="EPSG:4326"
2242
+ )
2243
+
2244
+ filename = tmp_path / f"test{ext}"
2245
+
2246
+ if ext == ".fgb":
2247
+ # For .fgb, spatial_index=False to avoid the rows being reordered
2248
+ write_dataframe(df, filename, use_arrow=use_arrow, spatial_index=False)
2249
+ else:
2250
+ write_dataframe(df, filename, use_arrow=use_arrow)
2251
+
2252
+ # Drivers that support mixed geometries will default to "Unknown" geometry type
2253
+ assert read_info(filename)["geometry_type"] == "Unknown"
2254
+ result = read_dataframe(filename)
2255
+ assert_geodataframe_equal(result, df, check_geom_type=True)
2256
+
2257
+
2258
+ @pytest.mark.requires_arrow_write_api
2259
+ def test_write_dataframe_truly_mixed_invalid(tmp_path, use_arrow):
2260
+ # Shapefile doesn't support generic "Geometry" / "Unknown" type
2261
+ # for mixed geometries
2262
+
2263
+ df = gp.GeoDataFrame(
2264
+ {"col": [1.0, 2.0, 3.0]},
2265
+ geometry=[
2266
+ shapely.Point(0, 0),
2267
+ shapely.LineString([(0, 0), (1, 1)]),
2268
+ shapely.box(0, 0, 1, 1),
2269
+ ],
2270
+ crs="EPSG:4326",
2271
+ )
2272
+
2273
+ # ensure error message from GDAL is included
2274
+ msg = (
2275
+ "Could not add feature to layer at index 1: Attempt to "
2276
+ r"write non-point \(LINESTRING\) geometry to point shapefile."
2277
+ # DataLayerError when using Arrow
2278
+ "|Error while writing batch to OGR layer: Attempt to "
2279
+ r"write non-point \(LINESTRING\) geometry to point shapefile."
2280
+ )
2281
+ with pytest.raises((FeatureError, DataLayerError), match=msg):
2282
+ write_dataframe(df, tmp_path / "test.shp", use_arrow=use_arrow)
2283
+
2284
+
2285
+ @pytest.mark.parametrize("ext", [ext for ext in ALL_EXTS if ext not in ".fgb"])
2286
+ @pytest.mark.parametrize(
2287
+ "geoms",
2288
+ [
2289
+ [None, shapely.Point(1, 1)],
2290
+ [shapely.Point(1, 1), None],
2291
+ [None, shapely.Point(1, 1, 2)],
2292
+ [None, None],
2293
+ ],
2294
+ )
2295
+ @pytest.mark.requires_arrow_write_api
2296
+ def test_write_dataframe_infer_geometry_with_nulls(tmp_path, geoms, ext, use_arrow):
2297
+ filename = tmp_path / f"test{ext}"
2298
+
2299
+ df = gp.GeoDataFrame({"col": [1.0, 2.0]}, geometry=geoms, crs="EPSG:4326")
2300
+ write_dataframe(df, filename, use_arrow=use_arrow)
2301
+ result = read_dataframe(filename)
2302
+ assert_geodataframe_equal(result, df)
2303
+
2304
+
2305
+ @pytest.mark.filterwarnings(
2306
+ "ignore: You will likely lose important projection information"
2307
+ )
2308
+ @pytest.mark.requires_arrow_write_api
2309
+ @requires_pyproj
2310
+ def test_custom_crs_io(tmp_path, naturalearth_lowres_all_ext, use_arrow):
2311
+ df = read_dataframe(naturalearth_lowres_all_ext)
2312
+ # project Belgium to a custom Albers Equal Area projection
2313
+ expected = (
2314
+ df.loc[df.name == "Belgium"]
2315
+ .reset_index(drop=True)
2316
+ .to_crs("+proj=aea +lat_1=49.5 +lat_2=51.5 +lon_0=4.3")
2317
+ )
2318
+ filename = tmp_path / "test.shp"
2319
+ write_dataframe(expected, filename, use_arrow=use_arrow)
2320
+
2321
+ assert filename.exists()
2322
+
2323
+ df = read_dataframe(filename)
2324
+
2325
+ crs = df.crs.to_dict()
2326
+ assert crs["lat_1"] == 49.5
2327
+ assert crs["lat_2"] == 51.5
2328
+ assert crs["lon_0"] == 4.3
2329
+ assert df.crs.equals(expected.crs)
2330
+
2331
+
2332
+ @pytest.mark.parametrize("ext", [".gpkg.zip", ".shp.zip", ".shz"])
2333
+ @pytest.mark.requires_arrow_write_api
2334
+ def test_write_read_zipped_ext(tmp_path, naturalearth_lowres, ext, use_arrow):
2335
+ """Run a basic read and write test on some extra (zipped) extensions."""
2336
+ if ext == ".gpkg.zip" and not GDAL_GE_37:
2337
+ pytest.skip(".gpkg.zip support requires GDAL >= 3.7")
2338
+
2339
+ input_gdf = read_dataframe(naturalearth_lowres)
2340
+ output_path = tmp_path / f"test{ext}"
2341
+
2342
+ write_dataframe(input_gdf, output_path, use_arrow=use_arrow)
2343
+
2344
+ assert output_path.exists()
2345
+ result_gdf = read_dataframe(output_path)
2346
+
2347
+ geometry_types = result_gdf.geometry.type.unique()
2348
+ if DRIVERS[ext] in DRIVERS_NO_MIXED_SINGLE_MULTI:
2349
+ assert list(geometry_types) == ["MultiPolygon"]
2350
+ else:
2351
+ assert set(geometry_types) == {"MultiPolygon", "Polygon"}
2352
+
2353
+ assert_geodataframe_equal(result_gdf, input_gdf, check_index_type=False)
2354
+
2355
+
2356
+ def test_write_read_mixed_column_values(tmp_path):
2357
+ # use_arrow=True is tested separately below
2358
+ mixed_values = ["test", 1.0, 1, datetime.now(), None, np.nan]
2359
+ geoms = [shapely.Point(0, 0) for _ in mixed_values]
2360
+ test_gdf = gp.GeoDataFrame(
2361
+ {"geometry": geoms, "mixed": mixed_values}, crs="epsg:31370"
2362
+ )
2363
+ output_path = tmp_path / "test_write_mixed_column.gpkg"
2364
+ write_dataframe(test_gdf, output_path)
2365
+ output_gdf = read_dataframe(output_path)
2366
+ assert len(test_gdf) == len(output_gdf)
2367
+ # mixed values as object dtype are currently written as strings
2368
+ # (but preserving nulls)
2369
+ expected = pd.Series(
2370
+ [str(value) if value not in (None, np.nan) else None for value in mixed_values],
2371
+ name="mixed",
2372
+ )
2373
+ assert_series_equal(output_gdf["mixed"], expected)
2374
+
2375
+
2376
+ @requires_arrow_write_api
2377
+ def test_write_read_mixed_column_values_arrow(tmp_path):
2378
+ # Arrow cannot represent a column of mixed types
2379
+ mixed_values = ["test", 1.0, 1, datetime.now(), None, np.nan]
2380
+ geoms = [shapely.Point(0, 0) for _ in mixed_values]
2381
+ test_gdf = gp.GeoDataFrame(
2382
+ {"geometry": geoms, "mixed": mixed_values}, crs="epsg:31370"
2383
+ )
2384
+ output_path = tmp_path / "test_write_mixed_column.gpkg"
2385
+ with pytest.raises(TypeError, match=".*Conversion failed for column"):
2386
+ write_dataframe(test_gdf, output_path, use_arrow=True)
2387
+
2388
+
2389
+ @pytest.mark.requires_arrow_write_api
2390
+ def test_write_read_null(tmp_path, use_arrow):
2391
+ output_path = tmp_path / "test_write_nan.gpkg"
2392
+ geom = shapely.Point(0, 0)
2393
+ test_data = {
2394
+ "geometry": [geom, geom, geom],
2395
+ "float64": [1.0, None, np.nan],
2396
+ "object_str": ["test", None, np.nan],
2397
+ }
2398
+ test_gdf = gp.GeoDataFrame(test_data, crs="epsg:31370")
2399
+ write_dataframe(test_gdf, output_path, use_arrow=use_arrow)
2400
+ result_gdf = read_dataframe(output_path)
2401
+ assert len(test_gdf) == len(result_gdf)
2402
+ assert result_gdf["float64"][0] == 1.0
2403
+ assert pd.isna(result_gdf["float64"][1])
2404
+ assert pd.isna(result_gdf["float64"][2])
2405
+ assert result_gdf["object_str"][0] == "test"
2406
+ assert pd.isna(result_gdf["object_str"][1])
2407
+ assert pd.isna(result_gdf["object_str"][2])
2408
+
2409
+
2410
+ @pytest.mark.requires_arrow_write_api
2411
+ def test_write_read_vsimem(naturalearth_lowres_vsi, use_arrow):
2412
+ path, _ = naturalearth_lowres_vsi
2413
+ mem_path = f"/vsimem/{path.name}"
2414
+
2415
+ input = read_dataframe(path, use_arrow=use_arrow)
2416
+ assert len(input) == 177
2417
+
2418
+ try:
2419
+ write_dataframe(input, mem_path, use_arrow=use_arrow)
2420
+ result = read_dataframe(mem_path, use_arrow=use_arrow)
2421
+ assert len(result) == 177
2422
+ finally:
2423
+ vsi_unlink(mem_path)
2424
+
2425
+
2426
+ @pytest.mark.parametrize(
2427
+ "wkt,geom_types",
2428
+ [
2429
+ ("Point Z (0 0 0)", ["2.5D Point", "Point Z"]),
2430
+ ("LineString Z (0 0 0, 1 1 0)", ["2.5D LineString", "LineString Z"]),
2431
+ ("Polygon Z ((0 0 0, 0 1 0, 1 1 0, 0 0 0))", ["2.5D Polygon", "Polygon Z"]),
2432
+ ("MultiPoint Z (0 0 0, 1 1 0)", ["2.5D MultiPoint", "MultiPoint Z"]),
2433
+ (
2434
+ "MultiLineString Z ((0 0 0, 1 1 0), (2 2 2, 3 3 2))",
2435
+ ["2.5D MultiLineString", "MultiLineString Z"],
2436
+ ),
2437
+ (
2438
+ "MultiPolygon Z (((0 0 0, 0 1 0, 1 1 0, 0 0 0)), ((1 1 1, 1 2 1, 2 2 1, 1 1 1)))", # noqa: E501
2439
+ ["2.5D MultiPolygon", "MultiPolygon Z"],
2440
+ ),
2441
+ (
2442
+ "GeometryCollection Z (Point Z (0 0 0))",
2443
+ ["2.5D GeometryCollection", "GeometryCollection Z"],
2444
+ ),
2445
+ ],
2446
+ )
2447
+ @pytest.mark.requires_arrow_write_api
2448
+ def test_write_geometry_z_types(tmp_path, wkt, geom_types, use_arrow):
2449
+ filename = tmp_path / "test.fgb"
2450
+ gdf = gp.GeoDataFrame(geometry=from_wkt([wkt]), crs="EPSG:4326")
2451
+ for geom_type in geom_types:
2452
+ write_dataframe(gdf, filename, use_arrow=use_arrow, geometry_type=geom_type)
2453
+ df = read_dataframe(filename)
2454
+ assert_geodataframe_equal(df, gdf)
2455
+
2456
+
2457
+ @pytest.mark.parametrize("ext", ALL_EXTS)
2458
+ @pytest.mark.parametrize(
2459
+ "test_descr, exp_geometry_type, mixed_dimensions, wkt",
2460
+ [
2461
+ ("1 Point Z", "Point Z", False, ["Point Z (0 0 0)"]),
2462
+ ("1 LineString Z", "LineString Z", False, ["LineString Z (0 0 0, 1 1 0)"]),
2463
+ (
2464
+ "1 Polygon Z",
2465
+ "Polygon Z",
2466
+ False,
2467
+ ["Polygon Z ((0 0 0, 0 1 0, 1 1 0, 0 0 0))"],
2468
+ ),
2469
+ ("1 MultiPoint Z", "MultiPoint Z", False, ["MultiPoint Z (0 0 0, 1 1 0)"]),
2470
+ (
2471
+ "1 MultiLineString Z",
2472
+ "MultiLineString Z",
2473
+ False,
2474
+ ["MultiLineString Z ((0 0 0, 1 1 0), (2 2 2, 3 3 2))"],
2475
+ ),
2476
+ (
2477
+ "1 MultiLinePolygon Z",
2478
+ "MultiPolygon Z",
2479
+ False,
2480
+ [
2481
+ "MultiPolygon Z (((0 0 0, 0 1 0, 1 1 0, 0 0 0)), ((1 1 1, 1 2 1, 2 2 1, 1 1 1)))" # noqa: E501
2482
+ ],
2483
+ ),
2484
+ (
2485
+ "1 GeometryCollection Z",
2486
+ "GeometryCollection Z",
2487
+ False,
2488
+ ["GeometryCollection Z (Point Z (0 0 0))"],
2489
+ ),
2490
+ ("Point Z + Point", "Point Z", True, ["Point Z (0 0 0)", "Point (0 0)"]),
2491
+ ("Point Z + None", "Point Z", False, ["Point Z (0 0 0)", None]),
2492
+ (
2493
+ "Point Z + LineString Z",
2494
+ "Unknown",
2495
+ False,
2496
+ ["LineString Z (0 0 0, 1 1 0)", "Point Z (0 0 0)"],
2497
+ ),
2498
+ (
2499
+ "Point Z + LineString",
2500
+ "Unknown",
2501
+ True,
2502
+ ["LineString (0 0, 1 1)", "Point Z (0 0 0)"],
2503
+ ),
2504
+ ],
2505
+ )
2506
+ @pytest.mark.requires_arrow_write_api
2507
+ def test_write_geometry_z_types_auto(
2508
+ tmp_path, ext, test_descr, exp_geometry_type, mixed_dimensions, wkt, use_arrow
2509
+ ):
2510
+ # Shapefile has some different behaviour that other file types
2511
+ if ext == ".shp":
2512
+ if exp_geometry_type in ("GeometryCollection Z", "Unknown"):
2513
+ pytest.skip(f"ext {ext} doesn't support {exp_geometry_type}")
2514
+ elif exp_geometry_type == "MultiLineString Z":
2515
+ exp_geometry_type = "LineString Z"
2516
+ elif exp_geometry_type == "MultiPolygon Z":
2517
+ exp_geometry_type = "Polygon Z"
2518
+
2519
+ column_data = {}
2520
+ column_data["test_descr"] = [test_descr] * len(wkt)
2521
+ column_data["idx"] = [str(idx) for idx in range(len(wkt))]
2522
+ gdf = gp.GeoDataFrame(column_data, geometry=from_wkt(wkt), crs="EPSG:4326")
2523
+ filename = tmp_path / f"test{ext}"
2524
+
2525
+ if ext == ".fgb":
2526
+ # writing empty / null geometries not allowed by FlatGeobuf for
2527
+ # GDAL >= 3.6.4 and were simply not written previously
2528
+ gdf = gdf.loc[~(gdf.geometry.isna() | gdf.geometry.is_empty)]
2529
+
2530
+ if mixed_dimensions and DRIVERS[ext] in DRIVERS_NO_MIXED_DIMENSIONS:
2531
+ with pytest.raises(
2532
+ DataSourceError,
2533
+ match=("Mixed 2D and 3D coordinates are not supported by"),
2534
+ ):
2535
+ write_dataframe(gdf, filename, use_arrow=use_arrow)
2536
+ return
2537
+ else:
2538
+ write_dataframe(gdf, filename, use_arrow=use_arrow)
2539
+
2540
+ info = read_info(filename)
2541
+ assert info["geometry_type"] == exp_geometry_type
2542
+
2543
+ result_gdf = read_dataframe(filename)
2544
+ if ext == ".geojsonl":
2545
+ result_gdf.crs = "EPSG:4326"
2546
+
2547
+ assert_geodataframe_equal(gdf, result_gdf)
2548
+
2549
+
2550
+ @pytest.mark.parametrize(
2551
+ "on_invalid, message, expected_wkt",
2552
+ [
2553
+ (
2554
+ "warn",
2555
+ "Invalid WKB: geometry is returned as None. IllegalArgumentException: "
2556
+ "Points of LinearRing do not form a closed linestring",
2557
+ None,
2558
+ ),
2559
+ ("raise", "Points of LinearRing do not form a closed linestring", None),
2560
+ ("ignore", None, None),
2561
+ ("fix", None, "POLYGON ((0 0, 0 1, 0 0))"),
2562
+ ],
2563
+ )
2564
+ @pytest.mark.filterwarnings("ignore:Non closed ring detected:RuntimeWarning")
2565
+ def test_read_invalid_poly_ring(tmp_path, use_arrow, on_invalid, message, expected_wkt):
2566
+ if on_invalid == "fix" and not SHAPELY_GE_21:
2567
+ pytest.skip("on_invalid=fix not available for Shapely < 2.1")
2568
+
2569
+ if on_invalid == "raise":
2570
+ handler = pytest.raises(shapely.errors.GEOSException, match=message)
2571
+ elif on_invalid == "warn":
2572
+ handler = pytest.warns(match=message)
2573
+ elif on_invalid in ("fix", "ignore"):
2574
+ handler = contextlib.nullcontext()
2575
+ else:
2576
+ raise ValueError(f"unknown value for on_invalid: {on_invalid}")
2577
+
2578
+ # create a GeoJSON file with an invalid exterior ring
2579
+ invalid_geojson = """{
2580
+ "type": "FeatureCollection",
2581
+ "features": [
2582
+ {
2583
+ "type": "Feature",
2584
+ "properties": {},
2585
+ "geometry": {
2586
+ "type": "Polygon",
2587
+ "coordinates": [ [ [0, 0], [0, 1] ] ]
2588
+ }
2589
+ }
2590
+ ]
2591
+ }"""
2592
+
2593
+ filename = tmp_path / "test.geojson"
2594
+ with open(filename, "w") as f:
2595
+ _ = f.write(invalid_geojson)
2596
+
2597
+ with handler:
2598
+ df = read_dataframe(
2599
+ filename,
2600
+ use_arrow=use_arrow,
2601
+ on_invalid=on_invalid,
2602
+ )
2603
+ if expected_wkt is None:
2604
+ assert df.geometry.iloc[0] is None
2605
+ else:
2606
+ assert df.geometry.iloc[0].wkt == expected_wkt
2607
+
2608
+
2609
+ def test_read_multisurface(multisurface_file, use_arrow):
2610
+ if use_arrow:
2611
+ # TODO: revisit once https://github.com/geopandas/pyogrio/issues/478
2612
+ # is resolved.
2613
+ pytest.skip("Shapely + GEOS 3.13 crashes in from_wkb for this case")
2614
+
2615
+ with pytest.raises(shapely.errors.GEOSException):
2616
+ # TODO(Arrow)
2617
+ # shapely fails parsing the WKB
2618
+ read_dataframe(multisurface_file, use_arrow=True)
2619
+ else:
2620
+ df = read_dataframe(multisurface_file)
2621
+
2622
+ # MultiSurface should be converted to MultiPolygon
2623
+ assert df.geometry.type.tolist() == ["MultiPolygon"]
2624
+
2625
+
2626
+ def test_read_dataset_kwargs(nested_geojson_file, use_arrow):
2627
+ # by default, nested data are not flattened
2628
+ df = read_dataframe(nested_geojson_file, use_arrow=use_arrow)
2629
+
2630
+ expected = gp.GeoDataFrame(
2631
+ {
2632
+ "top_level": ["A"],
2633
+ "intermediate_level": [{"bottom_level": "B"}],
2634
+ },
2635
+ geometry=[shapely.Point(0, 0)],
2636
+ crs="EPSG:4326",
2637
+ )
2638
+ if GDAL_GE_311 and use_arrow:
2639
+ # GDAL 3.11 started to use json extension type, which is not yet handled
2640
+ # correctly in the arrow->pandas conversion (using object instead of str dtype)
2641
+ expected["intermediate_level"] = expected["intermediate_level"].astype(object)
2642
+
2643
+ assert_geodataframe_equal(df, expected)
2644
+
2645
+ df = read_dataframe(
2646
+ nested_geojson_file, use_arrow=use_arrow, FLATTEN_NESTED_ATTRIBUTES="YES"
2647
+ )
2648
+
2649
+ expected = gp.GeoDataFrame(
2650
+ {
2651
+ "top_level": ["A"],
2652
+ "intermediate_level_bottom_level": ["B"],
2653
+ },
2654
+ geometry=[shapely.Point(0, 0)],
2655
+ crs="EPSG:4326",
2656
+ )
2657
+
2658
+ assert_geodataframe_equal(df, expected)
2659
+
2660
+
2661
+ def test_read_invalid_dataset_kwargs(naturalearth_lowres, use_arrow):
2662
+ with pytest.warns(RuntimeWarning, match="does not support open option INVALID"):
2663
+ read_dataframe(naturalearth_lowres, use_arrow=use_arrow, INVALID="YES")
2664
+
2665
+
2666
+ @pytest.mark.requires_arrow_write_api
2667
+ def test_write_nullable_dtypes(tmp_path, use_arrow):
2668
+ path = tmp_path / "test_nullable_dtypes.gpkg"
2669
+ test_data = {
2670
+ "col1": pd.Series([1, 2, 3], dtype="int64"),
2671
+ "col2": pd.Series([1, 2, None], dtype="Int64"),
2672
+ "col3": pd.Series([0.1, None, 0.3], dtype="Float32"),
2673
+ "col4": pd.Series([True, False, None], dtype="boolean"),
2674
+ "col5": pd.Series(["a", None, "b"], dtype="string"),
2675
+ }
2676
+ input_gdf = gp.GeoDataFrame(
2677
+ test_data, geometry=[shapely.Point(0, 0)] * 3, crs="epsg:31370"
2678
+ )
2679
+ write_dataframe(input_gdf, path, use_arrow=use_arrow)
2680
+ output_gdf = read_dataframe(path)
2681
+ # We read it back as default (non-nullable) numpy dtypes, so we cast
2682
+ # to those for the expected result
2683
+ expected = input_gdf.copy()
2684
+ expected["col2"] = expected["col2"].astype("float64")
2685
+ expected["col3"] = expected["col3"].astype("float32")
2686
+ expected["col4"] = expected["col4"].astype("float64")
2687
+ expected["col5"] = expected["col5"].astype("str")
2688
+ expected.loc[1, "col5"] = None # pandas converts to pd.NA on line above
2689
+ assert_geodataframe_equal(output_gdf, expected)
2690
+
2691
+
2692
+ @pytest.mark.parametrize(
2693
+ "metadata_type", ["dataset_metadata", "layer_metadata", "metadata"]
2694
+ )
2695
+ @pytest.mark.requires_arrow_write_api
2696
+ def test_metadata_io(tmp_path, naturalearth_lowres, metadata_type, use_arrow):
2697
+ metadata = {"level": metadata_type}
2698
+
2699
+ df = read_dataframe(naturalearth_lowres)
2700
+
2701
+ filename = tmp_path / "test.gpkg"
2702
+ write_dataframe(df, filename, use_arrow=use_arrow, **{metadata_type: metadata})
2703
+
2704
+ metadata_key = "layer_metadata" if metadata_type == "metadata" else metadata_type
2705
+
2706
+ assert read_info(filename)[metadata_key] == metadata
2707
+
2708
+
2709
+ @pytest.mark.parametrize("metadata_type", ["dataset_metadata", "layer_metadata"])
2710
+ @pytest.mark.parametrize(
2711
+ "metadata",
2712
+ [
2713
+ {1: 2},
2714
+ {"key": None},
2715
+ {"key": 1},
2716
+ ],
2717
+ )
2718
+ @pytest.mark.requires_arrow_write_api
2719
+ def test_invalid_metadata(
2720
+ tmp_path, naturalearth_lowres, metadata_type, metadata, use_arrow
2721
+ ):
2722
+ df = read_dataframe(naturalearth_lowres)
2723
+ with pytest.raises(ValueError, match="must be a string"):
2724
+ write_dataframe(
2725
+ df, tmp_path / "test.gpkg", use_arrow=use_arrow, **{metadata_type: metadata}
2726
+ )
2727
+
2728
+
2729
+ @pytest.mark.parametrize("metadata_type", ["dataset_metadata", "layer_metadata"])
2730
+ @pytest.mark.requires_arrow_write_api
2731
+ def test_metadata_unsupported(tmp_path, naturalearth_lowres, metadata_type, use_arrow):
2732
+ """metadata is silently ignored"""
2733
+
2734
+ filename = tmp_path / "test.geojson"
2735
+ write_dataframe(
2736
+ read_dataframe(naturalearth_lowres),
2737
+ filename,
2738
+ use_arrow=use_arrow,
2739
+ **{metadata_type: {"key": "value"}},
2740
+ )
2741
+
2742
+ metadata_key = "layer_metadata" if metadata_type == "metadata" else metadata_type
2743
+
2744
+ assert read_info(filename)[metadata_key] is None
2745
+
2746
+
2747
+ @pytest.mark.skipif(not PANDAS_GE_15, reason="ArrowDtype requires pandas 1.5+")
2748
+ def test_read_dataframe_arrow_dtypes(tmp_path):
2749
+ # https://github.com/geopandas/pyogrio/issues/319 - ensure arrow binary
2750
+ # column can be converted with from_wkb in case of missing values
2751
+ pytest.importorskip("pyarrow")
2752
+ filename = tmp_path / "test.gpkg"
2753
+ df = gp.GeoDataFrame(
2754
+ {"col": [1.0, 2.0]}, geometry=[Point(1, 1), None], crs="EPSG:4326"
2755
+ )
2756
+ write_dataframe(df, filename)
2757
+
2758
+ result = read_dataframe(
2759
+ filename,
2760
+ use_arrow=True,
2761
+ arrow_to_pandas_kwargs={
2762
+ "types_mapper": lambda pa_dtype: pd.ArrowDtype(pa_dtype)
2763
+ },
2764
+ )
2765
+ assert isinstance(result["col"].dtype, pd.ArrowDtype)
2766
+ result["col"] = result["col"].astype("float64")
2767
+ assert_geodataframe_equal(result, df)
2768
+
2769
+
2770
+ @requires_pyarrow_api
2771
+ @pytest.mark.skipif(
2772
+ __gdal_version__ < (3, 8, 3), reason="Arrow bool value bug fixed in GDAL >= 3.8.3"
2773
+ )
2774
+ @pytest.mark.parametrize("ext", ALL_EXTS)
2775
+ def test_arrow_bool_roundtrip(tmp_path, ext):
2776
+ filename = tmp_path / f"test{ext}"
2777
+
2778
+ kwargs = {}
2779
+
2780
+ if ext == ".fgb":
2781
+ # For .fgb, spatial_index=False to avoid the rows being reordered
2782
+ kwargs["spatial_index"] = False
2783
+
2784
+ df = gp.GeoDataFrame(
2785
+ {"bool_col": [True, False, True, False, True], "geometry": [Point(0, 0)] * 5},
2786
+ crs="EPSG:4326",
2787
+ )
2788
+
2789
+ write_dataframe(df, filename, **kwargs)
2790
+ result = read_dataframe(filename, use_arrow=True)
2791
+ # Shapefiles do not support bool columns; these are returned as int32
2792
+ assert_geodataframe_equal(result, df, check_dtype=ext != ".shp")
2793
+
2794
+
2795
+ @requires_pyarrow_api
2796
+ @pytest.mark.skipif(
2797
+ __gdal_version__ >= (3, 8, 3), reason="Arrow bool value bug fixed in GDAL >= 3.8.3"
2798
+ )
2799
+ @pytest.mark.parametrize("ext", ALL_EXTS)
2800
+ def test_arrow_bool_exception(tmp_path, ext):
2801
+ filename = tmp_path / f"test{ext}"
2802
+
2803
+ df = gp.GeoDataFrame(
2804
+ {"bool_col": [True, False, True, False, True], "geometry": [Point(0, 0)] * 5},
2805
+ crs="EPSG:4326",
2806
+ )
2807
+
2808
+ write_dataframe(df, filename)
2809
+
2810
+ if ext in {".fgb", ".gpkg"}:
2811
+ # only raise exception for GPKG / FGB
2812
+ with pytest.raises(
2813
+ RuntimeError,
2814
+ match="GDAL < 3.8.3 does not correctly read boolean data values using "
2815
+ "the Arrow API",
2816
+ ):
2817
+ read_dataframe(filename, use_arrow=True)
2818
+
2819
+ # do not raise exception if no bool columns are read
2820
+ read_dataframe(filename, use_arrow=True, columns=[])
2821
+
2822
+ else:
2823
+ _ = read_dataframe(filename, use_arrow=True)
2824
+
2825
+
2826
+ @requires_pyarrow_api
2827
+ def test_arrow_enable_with_environment_variable(tmp_path):
2828
+ """Test if arrow can be enabled via an environment variable."""
2829
+ # Latin 1 / Western European
2830
+ encoding = "CP1252"
2831
+ text = "ÿ"
2832
+ test_path = tmp_path / "test.gpkg"
2833
+
2834
+ df = gp.GeoDataFrame({text: [text], "geometry": [Point(0, 0)]}, crs="EPSG:4326")
2835
+ write_dataframe(df, test_path, encoding=encoding)
2836
+
2837
+ # Without arrow, specifying the encoding is supported
2838
+ result = read_dataframe(test_path, encoding="cp1252")
2839
+ assert result is not None
2840
+
2841
+ # With arrow enabled, specifying the encoding is not supported
2842
+ with use_arrow_context():
2843
+ with pytest.raises(
2844
+ ValueError, match="non-UTF-8 encoding is not supported for Arrow"
2845
+ ):
2846
+ _ = read_dataframe(test_path, encoding="cp1252")
2847
+
2848
+
2849
+ @pytest.mark.filterwarnings("ignore:File /vsimem:RuntimeWarning")
2850
+ @pytest.mark.parametrize("driver", ["GeoJSON", "GPKG"])
2851
+ def test_write_memory(naturalearth_lowres, driver):
2852
+ df = read_dataframe(naturalearth_lowres)
2853
+
2854
+ buffer = BytesIO()
2855
+ write_dataframe(df, buffer, driver=driver, layer="test")
2856
+
2857
+ assert len(buffer.getbuffer()) > 0
2858
+
2859
+ actual = read_dataframe(buffer)
2860
+ assert len(actual) == len(df)
2861
+
2862
+ is_json = driver == "GeoJSON"
2863
+
2864
+ assert_geodataframe_equal(
2865
+ actual,
2866
+ df,
2867
+ check_less_precise=is_json,
2868
+ check_index_type=False,
2869
+ check_dtype=not is_json,
2870
+ )
2871
+
2872
+ # Check temp file was cleaned up. Filter, as gdal keeps cache files in /vsimem/.
2873
+ assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == []
2874
+
2875
+
2876
+ def test_write_memory_driver_required(naturalearth_lowres):
2877
+ df = read_dataframe(naturalearth_lowres)
2878
+
2879
+ buffer = BytesIO()
2880
+
2881
+ with pytest.raises(
2882
+ ValueError,
2883
+ match="driver must be provided to write to in-memory file",
2884
+ ):
2885
+ write_dataframe(df.head(1), buffer, driver=None, layer="test")
2886
+
2887
+ # Check temp file was cleaned up. Filter, as gdal keeps cache files in /vsimem/.
2888
+ assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == []
2889
+
2890
+
2891
+ @pytest.mark.parametrize("driver", ["ESRI Shapefile", "OpenFileGDB"])
2892
+ def test_write_memory_unsupported_driver(naturalearth_lowres, driver):
2893
+ df = read_dataframe(naturalearth_lowres)
2894
+
2895
+ buffer = BytesIO()
2896
+
2897
+ with pytest.raises(
2898
+ ValueError, match=f"writing to in-memory file is not supported for {driver}"
2899
+ ):
2900
+ write_dataframe(df, buffer, driver=driver, layer="test")
2901
+
2902
+ # Check temp file was cleaned up. Filter, as gdal keeps cache files in /vsimem/.
2903
+ assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == []
2904
+
2905
+
2906
+ @pytest.mark.parametrize("driver", ["GeoJSON", "GPKG"])
2907
+ def test_write_memory_append_unsupported(naturalearth_lowres, driver):
2908
+ df = read_dataframe(naturalearth_lowres)
2909
+
2910
+ buffer = BytesIO()
2911
+
2912
+ with pytest.raises(
2913
+ NotImplementedError, match="append is not supported for in-memory files"
2914
+ ):
2915
+ write_dataframe(df.head(1), buffer, driver=driver, layer="test", append=True)
2916
+
2917
+ # Check temp file was cleaned up. Filter, as gdal keeps cache files in /vsimem/.
2918
+ assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == []
2919
+
2920
+
2921
+ def test_write_memory_existing_unsupported(naturalearth_lowres):
2922
+ df = read_dataframe(naturalearth_lowres)
2923
+
2924
+ buffer = BytesIO(b"0000")
2925
+ with pytest.raises(
2926
+ NotImplementedError,
2927
+ match="writing to existing in-memory object is not supported",
2928
+ ):
2929
+ write_dataframe(df.head(1), buffer, driver="GeoJSON", layer="test")
2930
+
2931
+ # Check temp file was cleaned up. Filter, as gdal keeps cache files in /vsimem/.
2932
+ assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == []
2933
+
2934
+
2935
+ def test_write_open_file_handle(tmp_path, naturalearth_lowres):
2936
+ """Verify that writing to an open file handle is not currently supported"""
2937
+
2938
+ df = read_dataframe(naturalearth_lowres)
2939
+
2940
+ # verify it fails for regular file handle
2941
+ with pytest.raises(
2942
+ NotImplementedError, match="writing to an open file handle is not yet supported"
2943
+ ):
2944
+ with open(tmp_path / "test.geojson", "wb") as f:
2945
+ write_dataframe(df.head(1), f)
2946
+
2947
+ # verify it fails for ZipFile
2948
+ with pytest.raises(
2949
+ NotImplementedError, match="writing to an open file handle is not yet supported"
2950
+ ):
2951
+ with ZipFile(tmp_path / "test.geojson.zip", "w") as z:
2952
+ with z.open("test.geojson", "w") as f:
2953
+ write_dataframe(df.head(1), f)
2954
+
2955
+ # Check temp file was cleaned up. Filter, as gdal keeps cache files in /vsimem/.
2956
+ assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == []
2957
+
2958
+
2959
+ @pytest.mark.parametrize("ext", ["gpkg", "geojson"])
2960
+ def test_non_utf8_encoding_io(tmp_path, ext, encoded_text):
2961
+ """Verify that we write non-UTF data to the data source
2962
+
2963
+ IMPORTANT: this may not be valid for the data source and will likely render
2964
+ them unusable in other tools, but should successfully roundtrip unless we
2965
+ disable writing using other encodings.
2966
+
2967
+ NOTE: FlatGeobuff driver cannot handle non-UTF data in GDAL >= 3.9
2968
+
2969
+ NOTE: pyarrow cannot handle non-UTF-8 characters in this way
2970
+ """
2971
+
2972
+ encoding, text = encoded_text
2973
+ output_path = tmp_path / f"test.{ext}"
2974
+
2975
+ df = gp.GeoDataFrame({text: [text], "geometry": [Point(0, 0)]}, crs="EPSG:4326")
2976
+ write_dataframe(df, output_path, encoding=encoding)
2977
+
2978
+ # cannot open these files without specifying encoding
2979
+ with pytest.raises(UnicodeDecodeError):
2980
+ read_dataframe(output_path)
2981
+
2982
+ # must provide encoding to read these properly
2983
+ actual = read_dataframe(output_path, encoding=encoding)
2984
+ assert actual.columns[0] == text
2985
+ assert actual[text].values[0] == text
2986
+
2987
+
2988
+ @requires_pyarrow_api
2989
+ @pytest.mark.parametrize("ext", ["gpkg", "geojson"])
2990
+ def test_non_utf8_encoding_io_arrow_exception(tmp_path, ext, encoded_text):
2991
+ encoding, text = encoded_text
2992
+ output_path = tmp_path / f"test.{ext}"
2993
+
2994
+ df = gp.GeoDataFrame({text: [text], "geometry": [Point(0, 0)]}, crs="EPSG:4326")
2995
+ write_dataframe(df, output_path, encoding=encoding)
2996
+
2997
+ # cannot open these files without specifying encoding
2998
+ with pytest.raises(UnicodeDecodeError):
2999
+ read_dataframe(output_path)
3000
+
3001
+ with pytest.raises(
3002
+ ValueError, match="non-UTF-8 encoding is not supported for Arrow"
3003
+ ):
3004
+ read_dataframe(output_path, encoding=encoding, use_arrow=True)
3005
+
3006
+
3007
+ def test_non_utf8_encoding_io_shapefile(tmp_path, encoded_text, use_arrow):
3008
+ encoding, text = encoded_text
3009
+
3010
+ output_path = tmp_path / "test.shp"
3011
+
3012
+ df = gp.GeoDataFrame({text: [text], "geometry": [Point(0, 0)]}, crs="EPSG:4326")
3013
+ write_dataframe(df, output_path, encoding=encoding)
3014
+
3015
+ # NOTE: GDAL automatically creates a cpg file with the encoding name, which
3016
+ # means that if we read this without specifying the encoding it uses the
3017
+ # correct one
3018
+ actual = read_dataframe(output_path, use_arrow=use_arrow)
3019
+ assert actual.columns[0] == text
3020
+ assert actual[text].values[0] == text
3021
+
3022
+ # verify that if cpg file is not present, that user-provided encoding must be used
3023
+ output_path.with_suffix(".cpg").unlink()
3024
+
3025
+ # We will assume ISO-8859-1, which is wrong
3026
+ miscoded = text.encode(encoding).decode("ISO-8859-1")
3027
+
3028
+ if use_arrow:
3029
+ # pyarrow cannot decode column name with incorrect encoding
3030
+ with pytest.raises(
3031
+ DataSourceError,
3032
+ match="The file being read is not encoded in UTF-8; please use_arrow=False",
3033
+ ):
3034
+ read_dataframe(output_path, use_arrow=True)
3035
+ else:
3036
+ bad = read_dataframe(output_path, use_arrow=False)
3037
+ assert bad.columns[0] == miscoded
3038
+ assert bad[miscoded].values[0] == miscoded
3039
+
3040
+ # If encoding is provided, that should yield correct text
3041
+ actual = read_dataframe(output_path, encoding=encoding, use_arrow=use_arrow)
3042
+ assert actual.columns[0] == text
3043
+ assert actual[text].values[0] == text
3044
+
3045
+ # if ENCODING open option, that should yield correct text
3046
+ actual = read_dataframe(output_path, use_arrow=use_arrow, ENCODING=encoding)
3047
+ assert actual.columns[0] == text
3048
+ assert actual[text].values[0] == text
3049
+
3050
+
3051
+ def test_encoding_read_option_collision_shapefile(naturalearth_lowres, use_arrow):
3052
+ """Providing both encoding parameter and ENCODING open option
3053
+ (even if blank) is not allowed."""
3054
+
3055
+ with pytest.raises(
3056
+ ValueError, match='cannot provide both encoding parameter and "ENCODING" option'
3057
+ ):
3058
+ read_dataframe(
3059
+ naturalearth_lowres, encoding="CP936", ENCODING="", use_arrow=use_arrow
3060
+ )
3061
+
3062
+
3063
+ def test_encoding_write_layer_option_collision_shapefile(tmp_path, encoded_text):
3064
+ """Providing both encoding parameter and ENCODING layer creation option
3065
+ (even if blank) is not allowed."""
3066
+ encoding, text = encoded_text
3067
+
3068
+ output_path = tmp_path / "test.shp"
3069
+ df = gp.GeoDataFrame({text: [text], "geometry": [Point(0, 0)]}, crs="EPSG:4326")
3070
+
3071
+ with pytest.raises(
3072
+ ValueError,
3073
+ match=(
3074
+ 'cannot provide both encoding parameter and "ENCODING" layer creation '
3075
+ "option"
3076
+ ),
3077
+ ):
3078
+ write_dataframe(
3079
+ df, output_path, encoding=encoding, layer_options={"ENCODING": ""}
3080
+ )
3081
+
3082
+
3083
+ def test_non_utf8_encoding_shapefile_sql(tmp_path, use_arrow):
3084
+ encoding = "CP936"
3085
+
3086
+ output_path = tmp_path / "test.shp"
3087
+
3088
+ mandarin = "中文"
3089
+ df = gp.GeoDataFrame(
3090
+ {mandarin: mandarin, "geometry": [Point(0, 0)]}, crs="EPSG:4326"
3091
+ )
3092
+ write_dataframe(df, output_path, encoding=encoding)
3093
+
3094
+ actual = read_dataframe(
3095
+ output_path,
3096
+ sql=f"select * from test where \"{mandarin}\" = '{mandarin}'",
3097
+ use_arrow=use_arrow,
3098
+ )
3099
+ assert actual.columns[0] == mandarin
3100
+ assert actual[mandarin].values[0] == mandarin
3101
+
3102
+ actual = read_dataframe(
3103
+ output_path,
3104
+ sql=f"select * from test where \"{mandarin}\" = '{mandarin}'",
3105
+ encoding=encoding,
3106
+ use_arrow=use_arrow,
3107
+ )
3108
+ assert actual.columns[0] == mandarin
3109
+ assert actual[mandarin].values[0] == mandarin
3110
+
3111
+
3112
+ @pytest.mark.requires_arrow_write_api
3113
+ def test_write_kml_file_coordinate_order(tmp_path, use_arrow):
3114
+ # confirm KML coordinates are written in lon, lat order even if CRS axis
3115
+ # specifies otherwise
3116
+ points = [Point(10, 20), Point(30, 40), Point(50, 60)]
3117
+ gdf = gp.GeoDataFrame(geometry=points, crs="EPSG:4326")
3118
+ output_path = tmp_path / "test.kml"
3119
+ write_dataframe(
3120
+ gdf, output_path, layer="tmp_layer", driver="KML", use_arrow=use_arrow
3121
+ )
3122
+
3123
+ gdf_in = read_dataframe(output_path, use_arrow=use_arrow)
3124
+
3125
+ assert np.array_equal(gdf_in.geometry.values, points)
3126
+
3127
+
3128
+ @pytest.mark.requires_arrow_write_api
3129
+ @pytest.mark.skipif(
3130
+ "LIBKML" not in list_drivers(),
3131
+ reason="LIBKML driver is not available and is needed to append to .kml",
3132
+ )
3133
+ def test_write_kml_append(tmp_path, use_arrow):
3134
+ """Append features to an existing KML file.
3135
+
3136
+ Appending is only supported by the LIBKML driver, and the driver isn't
3137
+ included in the GDAL ubuntu-small images, so skip if not available.
3138
+ """
3139
+ points = [Point(10, 20), Point(30, 40), Point(50, 60)]
3140
+ gdf = gp.GeoDataFrame(geometry=points, crs="EPSG:4326")
3141
+ output_path = tmp_path / "test.kml"
3142
+ write_dataframe(
3143
+ gdf, output_path, layer="tmp_layer", driver="KML", use_arrow=use_arrow
3144
+ )
3145
+
3146
+ # test appending to the existing file only if LIBKML is available
3147
+ # as it appears to fall back on LIBKML driver when appending.
3148
+ points_append = [Point(7, 8), Point(9, 10), Point(11, 12)]
3149
+ gdf_append = gp.GeoDataFrame(geometry=points_append, crs="EPSG:4326")
3150
+
3151
+ write_dataframe(
3152
+ gdf_append,
3153
+ output_path,
3154
+ layer="tmp_layer",
3155
+ driver="KML",
3156
+ use_arrow=use_arrow,
3157
+ append=True,
3158
+ )
3159
+ # force_2d is used to only compare the xy dimensions of the geometry, as the LIBKML
3160
+ # driver always adds the z-dimension when the kml file is over-written.
3161
+ gdf_in_appended = read_dataframe(output_path, use_arrow=use_arrow, force_2d=True)
3162
+
3163
+ assert np.array_equal(gdf_in_appended.geometry.values, points + points_append)
3164
+
3165
+
3166
+ @pytest.mark.requires_arrow_write_api
3167
+ def test_write_geojson_rfc7946_coordinates(tmp_path, use_arrow):
3168
+ points = [Point(10, 20), Point(30, 40), Point(50, 60)]
3169
+ gdf = gp.GeoDataFrame(geometry=points, crs="EPSG:4326")
3170
+ output_path = tmp_path / "test.geojson"
3171
+ write_dataframe(
3172
+ gdf,
3173
+ output_path,
3174
+ layer="tmp_layer",
3175
+ driver="GeoJSON",
3176
+ RFC7946=True,
3177
+ use_arrow=use_arrow,
3178
+ )
3179
+
3180
+ gdf_in = read_dataframe(output_path, use_arrow=use_arrow)
3181
+
3182
+ assert np.array_equal(gdf_in.geometry.values, points)
3183
+
3184
+ # test appending to the existing file
3185
+
3186
+ points_append = [Point(70, 80), Point(90, 100), Point(110, 120)]
3187
+ gdf_append = gp.GeoDataFrame(geometry=points_append, crs="EPSG:4326")
3188
+
3189
+ write_dataframe(
3190
+ gdf_append,
3191
+ output_path,
3192
+ layer="tmp_layer",
3193
+ driver="GeoJSON",
3194
+ RFC7946=True,
3195
+ use_arrow=use_arrow,
3196
+ append=True,
3197
+ )
3198
+
3199
+ gdf_in_appended = read_dataframe(output_path, use_arrow=use_arrow)
3200
+ assert np.array_equal(gdf_in_appended.geometry.values, points + points_append)
3201
+
3202
+
3203
+ @pytest.mark.requires_arrow_api
3204
+ @pytest.mark.skipif(
3205
+ not GDAL_HAS_PARQUET_DRIVER, reason="Parquet driver is not available"
3206
+ )
3207
+ def test_parquet_driver(tmp_path, use_arrow):
3208
+ """
3209
+ Simple test verifying the Parquet driver works if available
3210
+ """
3211
+ gdf = gp.GeoDataFrame(
3212
+ {"col": [1, 2, 3], "geometry": [Point(0, 0), Point(1, 1), Point(2, 2)]},
3213
+ crs="EPSG:4326",
3214
+ )
3215
+ output_path = tmp_path / "test.parquet"
3216
+ write_dataframe(gdf, output_path, use_arrow=use_arrow)
3217
+ result = read_dataframe(output_path, use_arrow=use_arrow)
3218
+ assert_geodataframe_equal(result, gdf)