pyogrio 0.10.0__cp39-cp39-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyogrio might be problematic. Click here for more details.

Files changed (223) hide show
  1. pyogrio/__init__.py +55 -0
  2. pyogrio/_compat.py +47 -0
  3. pyogrio/_env.py +59 -0
  4. pyogrio/_err.cpython-39-x86_64-linux-gnu.so +0 -0
  5. pyogrio/_geometry.cpython-39-x86_64-linux-gnu.so +0 -0
  6. pyogrio/_io.cpython-39-x86_64-linux-gnu.so +0 -0
  7. pyogrio/_ogr.cpython-39-x86_64-linux-gnu.so +0 -0
  8. pyogrio/_version.py +21 -0
  9. pyogrio/_vsi.cpython-39-x86_64-linux-gnu.so +0 -0
  10. pyogrio/core.py +386 -0
  11. pyogrio/errors.py +25 -0
  12. pyogrio/gdal_data/GDAL-targets-release.cmake +19 -0
  13. pyogrio/gdal_data/GDAL-targets.cmake +105 -0
  14. pyogrio/gdal_data/GDALConfig.cmake +24 -0
  15. pyogrio/gdal_data/GDALConfigVersion.cmake +85 -0
  16. pyogrio/gdal_data/GDALLogoBW.svg +138 -0
  17. pyogrio/gdal_data/GDALLogoColor.svg +126 -0
  18. pyogrio/gdal_data/GDALLogoGS.svg +126 -0
  19. pyogrio/gdal_data/LICENSE.TXT +467 -0
  20. pyogrio/gdal_data/MM_m_idofic.csv +321 -0
  21. pyogrio/gdal_data/copyright +467 -0
  22. pyogrio/gdal_data/cubewerx_extra.wkt +48 -0
  23. pyogrio/gdal_data/default.rsc +0 -0
  24. pyogrio/gdal_data/ecw_cs.wkt +1453 -0
  25. pyogrio/gdal_data/eedaconf.json +23 -0
  26. pyogrio/gdal_data/epsg.wkt +1 -0
  27. pyogrio/gdal_data/esri_StatePlane_extra.wkt +631 -0
  28. pyogrio/gdal_data/gdalicon.png +0 -0
  29. pyogrio/gdal_data/gdalinfo_output.schema.json +346 -0
  30. pyogrio/gdal_data/gdalmdiminfo_output.schema.json +321 -0
  31. pyogrio/gdal_data/gdaltileindex.xsd +269 -0
  32. pyogrio/gdal_data/gdalvrt.xsd +880 -0
  33. pyogrio/gdal_data/gfs.xsd +246 -0
  34. pyogrio/gdal_data/gml_registry.xml +117 -0
  35. pyogrio/gdal_data/gml_registry.xsd +66 -0
  36. pyogrio/gdal_data/grib2_center.csv +251 -0
  37. pyogrio/gdal_data/grib2_process.csv +102 -0
  38. pyogrio/gdal_data/grib2_subcenter.csv +63 -0
  39. pyogrio/gdal_data/grib2_table_4_2_0_0.csv +261 -0
  40. pyogrio/gdal_data/grib2_table_4_2_0_1.csv +261 -0
  41. pyogrio/gdal_data/grib2_table_4_2_0_13.csv +261 -0
  42. pyogrio/gdal_data/grib2_table_4_2_0_14.csv +261 -0
  43. pyogrio/gdal_data/grib2_table_4_2_0_15.csv +261 -0
  44. pyogrio/gdal_data/grib2_table_4_2_0_16.csv +261 -0
  45. pyogrio/gdal_data/grib2_table_4_2_0_17.csv +11 -0
  46. pyogrio/gdal_data/grib2_table_4_2_0_18.csv +261 -0
  47. pyogrio/gdal_data/grib2_table_4_2_0_19.csv +261 -0
  48. pyogrio/gdal_data/grib2_table_4_2_0_190.csv +261 -0
  49. pyogrio/gdal_data/grib2_table_4_2_0_191.csv +261 -0
  50. pyogrio/gdal_data/grib2_table_4_2_0_2.csv +261 -0
  51. pyogrio/gdal_data/grib2_table_4_2_0_20.csv +261 -0
  52. pyogrio/gdal_data/grib2_table_4_2_0_21.csv +261 -0
  53. pyogrio/gdal_data/grib2_table_4_2_0_3.csv +261 -0
  54. pyogrio/gdal_data/grib2_table_4_2_0_4.csv +261 -0
  55. pyogrio/gdal_data/grib2_table_4_2_0_5.csv +261 -0
  56. pyogrio/gdal_data/grib2_table_4_2_0_6.csv +261 -0
  57. pyogrio/gdal_data/grib2_table_4_2_0_7.csv +261 -0
  58. pyogrio/gdal_data/grib2_table_4_2_10_0.csv +261 -0
  59. pyogrio/gdal_data/grib2_table_4_2_10_1.csv +261 -0
  60. pyogrio/gdal_data/grib2_table_4_2_10_191.csv +261 -0
  61. pyogrio/gdal_data/grib2_table_4_2_10_2.csv +261 -0
  62. pyogrio/gdal_data/grib2_table_4_2_10_3.csv +261 -0
  63. pyogrio/gdal_data/grib2_table_4_2_10_4.csv +261 -0
  64. pyogrio/gdal_data/grib2_table_4_2_1_0.csv +261 -0
  65. pyogrio/gdal_data/grib2_table_4_2_1_1.csv +261 -0
  66. pyogrio/gdal_data/grib2_table_4_2_1_2.csv +261 -0
  67. pyogrio/gdal_data/grib2_table_4_2_20_0.csv +261 -0
  68. pyogrio/gdal_data/grib2_table_4_2_20_1.csv +261 -0
  69. pyogrio/gdal_data/grib2_table_4_2_20_2.csv +261 -0
  70. pyogrio/gdal_data/grib2_table_4_2_2_0.csv +261 -0
  71. pyogrio/gdal_data/grib2_table_4_2_2_3.csv +261 -0
  72. pyogrio/gdal_data/grib2_table_4_2_2_4.csv +261 -0
  73. pyogrio/gdal_data/grib2_table_4_2_2_5.csv +261 -0
  74. pyogrio/gdal_data/grib2_table_4_2_2_6.csv +261 -0
  75. pyogrio/gdal_data/grib2_table_4_2_3_0.csv +261 -0
  76. pyogrio/gdal_data/grib2_table_4_2_3_1.csv +261 -0
  77. pyogrio/gdal_data/grib2_table_4_2_3_2.csv +28 -0
  78. pyogrio/gdal_data/grib2_table_4_2_3_3.csv +8 -0
  79. pyogrio/gdal_data/grib2_table_4_2_3_4.csv +14 -0
  80. pyogrio/gdal_data/grib2_table_4_2_3_5.csv +11 -0
  81. pyogrio/gdal_data/grib2_table_4_2_3_6.csv +11 -0
  82. pyogrio/gdal_data/grib2_table_4_2_4_0.csv +261 -0
  83. pyogrio/gdal_data/grib2_table_4_2_4_1.csv +261 -0
  84. pyogrio/gdal_data/grib2_table_4_2_4_10.csv +261 -0
  85. pyogrio/gdal_data/grib2_table_4_2_4_2.csv +261 -0
  86. pyogrio/gdal_data/grib2_table_4_2_4_3.csv +261 -0
  87. pyogrio/gdal_data/grib2_table_4_2_4_4.csv +261 -0
  88. pyogrio/gdal_data/grib2_table_4_2_4_5.csv +261 -0
  89. pyogrio/gdal_data/grib2_table_4_2_4_6.csv +261 -0
  90. pyogrio/gdal_data/grib2_table_4_2_4_7.csv +261 -0
  91. pyogrio/gdal_data/grib2_table_4_2_4_8.csv +261 -0
  92. pyogrio/gdal_data/grib2_table_4_2_4_9.csv +261 -0
  93. pyogrio/gdal_data/grib2_table_4_2_local_Canada.csv +5 -0
  94. pyogrio/gdal_data/grib2_table_4_2_local_HPC.csv +2 -0
  95. pyogrio/gdal_data/grib2_table_4_2_local_MRMS.csv +175 -0
  96. pyogrio/gdal_data/grib2_table_4_2_local_NCEP.csv +401 -0
  97. pyogrio/gdal_data/grib2_table_4_2_local_NDFD.csv +38 -0
  98. pyogrio/gdal_data/grib2_table_4_2_local_index.csv +7 -0
  99. pyogrio/gdal_data/grib2_table_4_5.csv +261 -0
  100. pyogrio/gdal_data/grib2_table_versions.csv +3 -0
  101. pyogrio/gdal_data/gt_datum.csv +229 -0
  102. pyogrio/gdal_data/gt_ellips.csv +24 -0
  103. pyogrio/gdal_data/header.dxf +1124 -0
  104. pyogrio/gdal_data/inspire_cp_BasicPropertyUnit.gfs +57 -0
  105. pyogrio/gdal_data/inspire_cp_CadastralBoundary.gfs +60 -0
  106. pyogrio/gdal_data/inspire_cp_CadastralParcel.gfs +81 -0
  107. pyogrio/gdal_data/inspire_cp_CadastralZoning.gfs +161 -0
  108. pyogrio/gdal_data/jpfgdgml_AdmArea.gfs +59 -0
  109. pyogrio/gdal_data/jpfgdgml_AdmBdry.gfs +49 -0
  110. pyogrio/gdal_data/jpfgdgml_AdmPt.gfs +59 -0
  111. pyogrio/gdal_data/jpfgdgml_BldA.gfs +54 -0
  112. pyogrio/gdal_data/jpfgdgml_BldL.gfs +54 -0
  113. pyogrio/gdal_data/jpfgdgml_Cntr.gfs +54 -0
  114. pyogrio/gdal_data/jpfgdgml_CommBdry.gfs +49 -0
  115. pyogrio/gdal_data/jpfgdgml_CommPt.gfs +59 -0
  116. pyogrio/gdal_data/jpfgdgml_Cstline.gfs +54 -0
  117. pyogrio/gdal_data/jpfgdgml_ElevPt.gfs +54 -0
  118. pyogrio/gdal_data/jpfgdgml_GCP.gfs +94 -0
  119. pyogrio/gdal_data/jpfgdgml_LeveeEdge.gfs +49 -0
  120. pyogrio/gdal_data/jpfgdgml_RailCL.gfs +54 -0
  121. pyogrio/gdal_data/jpfgdgml_RdASL.gfs +44 -0
  122. pyogrio/gdal_data/jpfgdgml_RdArea.gfs +54 -0
  123. pyogrio/gdal_data/jpfgdgml_RdCompt.gfs +59 -0
  124. pyogrio/gdal_data/jpfgdgml_RdEdg.gfs +59 -0
  125. pyogrio/gdal_data/jpfgdgml_RdMgtBdry.gfs +49 -0
  126. pyogrio/gdal_data/jpfgdgml_RdSgmtA.gfs +59 -0
  127. pyogrio/gdal_data/jpfgdgml_RvrMgtBdry.gfs +49 -0
  128. pyogrio/gdal_data/jpfgdgml_SBAPt.gfs +49 -0
  129. pyogrio/gdal_data/jpfgdgml_SBArea.gfs +54 -0
  130. pyogrio/gdal_data/jpfgdgml_SBBdry.gfs +44 -0
  131. pyogrio/gdal_data/jpfgdgml_WA.gfs +54 -0
  132. pyogrio/gdal_data/jpfgdgml_WL.gfs +54 -0
  133. pyogrio/gdal_data/jpfgdgml_WStrA.gfs +54 -0
  134. pyogrio/gdal_data/jpfgdgml_WStrL.gfs +54 -0
  135. pyogrio/gdal_data/nitf_spec.xml +3306 -0
  136. pyogrio/gdal_data/nitf_spec.xsd +189 -0
  137. pyogrio/gdal_data/ogrinfo_output.schema.json +528 -0
  138. pyogrio/gdal_data/ogrvrt.xsd +546 -0
  139. pyogrio/gdal_data/osmconf.ini +132 -0
  140. pyogrio/gdal_data/ozi_datum.csv +131 -0
  141. pyogrio/gdal_data/ozi_ellips.csv +35 -0
  142. pyogrio/gdal_data/pci_datum.txt +530 -0
  143. pyogrio/gdal_data/pci_ellips.txt +129 -0
  144. pyogrio/gdal_data/pdfcomposition.xsd +721 -0
  145. pyogrio/gdal_data/pds4_template.xml +65 -0
  146. pyogrio/gdal_data/plscenesconf.json +1985 -0
  147. pyogrio/gdal_data/ruian_vf_ob_v1.gfs +1455 -0
  148. pyogrio/gdal_data/ruian_vf_st_uvoh_v1.gfs +86 -0
  149. pyogrio/gdal_data/ruian_vf_st_v1.gfs +1489 -0
  150. pyogrio/gdal_data/ruian_vf_v1.gfs +2126 -0
  151. pyogrio/gdal_data/s57agencies.csv +249 -0
  152. pyogrio/gdal_data/s57attributes.csv +484 -0
  153. pyogrio/gdal_data/s57expectedinput.csv +1008 -0
  154. pyogrio/gdal_data/s57objectclasses.csv +287 -0
  155. pyogrio/gdal_data/seed_2d.dgn +0 -0
  156. pyogrio/gdal_data/seed_3d.dgn +0 -0
  157. pyogrio/gdal_data/stateplane.csv +259 -0
  158. pyogrio/gdal_data/tms_LINZAntarticaMapTileGrid.json +190 -0
  159. pyogrio/gdal_data/tms_MapML_APSTILE.json +268 -0
  160. pyogrio/gdal_data/tms_MapML_CBMTILE.json +346 -0
  161. pyogrio/gdal_data/tms_NZTM2000.json +243 -0
  162. pyogrio/gdal_data/trailer.dxf +434 -0
  163. pyogrio/gdal_data/usage +4 -0
  164. pyogrio/gdal_data/vcpkg-cmake-wrapper.cmake +23 -0
  165. pyogrio/gdal_data/vcpkg.spdx.json +264 -0
  166. pyogrio/gdal_data/vcpkg_abi_info.txt +41 -0
  167. pyogrio/gdal_data/vdv452.xml +367 -0
  168. pyogrio/gdal_data/vdv452.xsd +63 -0
  169. pyogrio/gdal_data/vicar.json +164 -0
  170. pyogrio/geopandas.py +683 -0
  171. pyogrio/proj_data/CH +22 -0
  172. pyogrio/proj_data/GL27 +23 -0
  173. pyogrio/proj_data/ITRF2000 +24 -0
  174. pyogrio/proj_data/ITRF2008 +94 -0
  175. pyogrio/proj_data/ITRF2014 +55 -0
  176. pyogrio/proj_data/copyright +34 -0
  177. pyogrio/proj_data/deformation_model.schema.json +582 -0
  178. pyogrio/proj_data/nad.lst +142 -0
  179. pyogrio/proj_data/nad27 +810 -0
  180. pyogrio/proj_data/nad83 +745 -0
  181. pyogrio/proj_data/other.extra +53 -0
  182. pyogrio/proj_data/proj-config-version.cmake +44 -0
  183. pyogrio/proj_data/proj-config.cmake +79 -0
  184. pyogrio/proj_data/proj-targets-release.cmake +19 -0
  185. pyogrio/proj_data/proj-targets.cmake +107 -0
  186. pyogrio/proj_data/proj.db +0 -0
  187. pyogrio/proj_data/proj.ini +51 -0
  188. pyogrio/proj_data/proj4-targets-release.cmake +19 -0
  189. pyogrio/proj_data/proj4-targets.cmake +107 -0
  190. pyogrio/proj_data/projjson.schema.json +1174 -0
  191. pyogrio/proj_data/triangulation.schema.json +214 -0
  192. pyogrio/proj_data/usage +4 -0
  193. pyogrio/proj_data/vcpkg.spdx.json +198 -0
  194. pyogrio/proj_data/vcpkg_abi_info.txt +27 -0
  195. pyogrio/proj_data/world +214 -0
  196. pyogrio/raw.py +887 -0
  197. pyogrio/tests/__init__.py +0 -0
  198. pyogrio/tests/conftest.py +398 -0
  199. pyogrio/tests/fixtures/README.md +108 -0
  200. pyogrio/tests/fixtures/curve.gpkg +0 -0
  201. pyogrio/tests/fixtures/curvepolygon.gpkg +0 -0
  202. pyogrio/tests/fixtures/line_zm.gpkg +0 -0
  203. pyogrio/tests/fixtures/multisurface.gpkg +0 -0
  204. pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.cpg +1 -0
  205. pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.dbf +0 -0
  206. pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.prj +1 -0
  207. pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.shp +0 -0
  208. pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.shx +0 -0
  209. pyogrio/tests/fixtures/sample.osm.pbf +0 -0
  210. pyogrio/tests/fixtures/test_gpkg_nulls.gpkg +0 -0
  211. pyogrio/tests/test_arrow.py +1195 -0
  212. pyogrio/tests/test_core.py +678 -0
  213. pyogrio/tests/test_geopandas_io.py +2314 -0
  214. pyogrio/tests/test_path.py +364 -0
  215. pyogrio/tests/test_raw_io.py +1515 -0
  216. pyogrio/tests/test_util.py +56 -0
  217. pyogrio/util.py +247 -0
  218. pyogrio-0.10.0.dist-info/LICENSE +21 -0
  219. pyogrio-0.10.0.dist-info/METADATA +129 -0
  220. pyogrio-0.10.0.dist-info/RECORD +223 -0
  221. pyogrio-0.10.0.dist-info/WHEEL +5 -0
  222. pyogrio-0.10.0.dist-info/top_level.txt +1 -0
  223. pyogrio.libs/libgdal-44263852.so.35.3.9.1 +0 -0
@@ -0,0 +1,2314 @@
1
+ import contextlib
2
+ import locale
3
+ import warnings
4
+ from datetime import datetime
5
+ from io import BytesIO
6
+ from zipfile import ZipFile
7
+
8
+ import numpy as np
9
+
10
+ from pyogrio import (
11
+ __gdal_version__,
12
+ list_drivers,
13
+ list_layers,
14
+ read_info,
15
+ vsi_listtree,
16
+ vsi_unlink,
17
+ )
18
+ from pyogrio._compat import HAS_ARROW_WRITE_API, HAS_PYPROJ, PANDAS_GE_15
19
+ from pyogrio.errors import DataLayerError, DataSourceError, FeatureError, GeometryError
20
+ from pyogrio.geopandas import PANDAS_GE_20, read_dataframe, write_dataframe
21
+ from pyogrio.raw import (
22
+ DRIVERS_NO_MIXED_DIMENSIONS,
23
+ DRIVERS_NO_MIXED_SINGLE_MULTI,
24
+ )
25
+ from pyogrio.tests.conftest import (
26
+ ALL_EXTS,
27
+ DRIVERS,
28
+ START_FID,
29
+ requires_arrow_write_api,
30
+ requires_gdal_geos,
31
+ requires_pyarrow_api,
32
+ requires_pyproj,
33
+ )
34
+
35
+ import pytest
36
+
37
+ try:
38
+ import geopandas as gp
39
+ import pandas as pd
40
+ from geopandas.array import from_wkt
41
+
42
+ import shapely # if geopandas is present, shapely is expected to be present
43
+ from shapely.geometry import Point
44
+
45
+ from geopandas.testing import assert_geodataframe_equal
46
+ from pandas.testing import (
47
+ assert_index_equal,
48
+ assert_series_equal,
49
+ )
50
+
51
+ except ImportError:
52
+ pass
53
+
54
+
55
+ pytest.importorskip("geopandas")
56
+
57
+
58
+ @pytest.fixture(
59
+ scope="session",
60
+ params=[
61
+ False,
62
+ pytest.param(True, marks=requires_pyarrow_api),
63
+ ],
64
+ )
65
+ def use_arrow(request):
66
+ return request.param
67
+
68
+
69
+ @pytest.fixture(autouse=True)
70
+ def skip_if_no_arrow_write_api(request):
71
+ # automatically skip tests with use_arrow=True and that require Arrow write
72
+ # API (marked with `@pytest.mark.requires_arrow_write_api`) if it is not available
73
+ use_arrow = (
74
+ request.getfixturevalue("use_arrow")
75
+ if "use_arrow" in request.fixturenames
76
+ else False
77
+ )
78
+ if (
79
+ use_arrow
80
+ and not HAS_ARROW_WRITE_API
81
+ and request.node.get_closest_marker("requires_arrow_write_api")
82
+ ):
83
+ pytest.skip("GDAL>=3.8 required for Arrow write API")
84
+
85
+
86
+ def spatialite_available(path):
87
+ try:
88
+ _ = read_dataframe(
89
+ path, sql="select spatialite_version();", sql_dialect="SQLITE"
90
+ )
91
+ return True
92
+ except Exception:
93
+ return False
94
+
95
+
96
+ @pytest.mark.parametrize("encoding", ["utf-8", "cp1252", None])
97
+ def test_read_csv_encoding(tmp_path, encoding):
98
+ # Write csv test file. Depending on the os this will be written in a different
99
+ # encoding: for linux and macos this is utf-8, for windows it is cp1252.
100
+ csv_path = tmp_path / "test.csv"
101
+ with open(csv_path, "w", encoding=encoding) as csv:
102
+ csv.write("näme,city\n")
103
+ csv.write("Wilhelm Röntgen,Zürich\n")
104
+
105
+ # Read csv. The data should be read with the same default encoding as the csv file
106
+ # was written in, but should have been converted to utf-8 in the dataframe returned.
107
+ # Hence, the asserts below, with strings in utf-8, be OK.
108
+ df = read_dataframe(csv_path, encoding=encoding)
109
+
110
+ assert len(df) == 1
111
+ assert df.columns.tolist() == ["näme", "city"]
112
+ assert df.city.tolist() == ["Zürich"]
113
+ assert df.näme.tolist() == ["Wilhelm Röntgen"]
114
+
115
+
116
+ @pytest.mark.skipif(
117
+ locale.getpreferredencoding().upper() == "UTF-8",
118
+ reason="test requires non-UTF-8 default platform",
119
+ )
120
+ def test_read_csv_platform_encoding(tmp_path):
121
+ """verify that read defaults to platform encoding; only works on Windows (CP1252)"""
122
+ csv_path = tmp_path / "test.csv"
123
+ with open(csv_path, "w", encoding=locale.getpreferredencoding()) as csv:
124
+ csv.write("näme,city\n")
125
+ csv.write("Wilhelm Röntgen,Zürich\n")
126
+
127
+ df = read_dataframe(csv_path)
128
+
129
+ assert len(df) == 1
130
+ assert df.columns.tolist() == ["näme", "city"]
131
+ assert df.city.tolist() == ["Zürich"]
132
+ assert df.näme.tolist() == ["Wilhelm Röntgen"]
133
+
134
+
135
+ def test_read_dataframe(naturalearth_lowres_all_ext):
136
+ df = read_dataframe(naturalearth_lowres_all_ext)
137
+
138
+ if HAS_PYPROJ:
139
+ assert df.crs == "EPSG:4326"
140
+ assert len(df) == 177
141
+ assert df.columns.tolist() == [
142
+ "pop_est",
143
+ "continent",
144
+ "name",
145
+ "iso_a3",
146
+ "gdp_md_est",
147
+ "geometry",
148
+ ]
149
+
150
+
151
+ def test_read_dataframe_vsi(naturalearth_lowres_vsi, use_arrow):
152
+ df = read_dataframe(naturalearth_lowres_vsi[1], use_arrow=use_arrow)
153
+ assert len(df) == 177
154
+
155
+
156
+ @pytest.mark.parametrize(
157
+ "columns, fid_as_index, exp_len", [(None, False, 3), ([], True, 3), ([], False, 0)]
158
+ )
159
+ def test_read_layer_without_geometry(
160
+ no_geometry_file, columns, fid_as_index, use_arrow, exp_len
161
+ ):
162
+ result = read_dataframe(
163
+ no_geometry_file,
164
+ columns=columns,
165
+ fid_as_index=fid_as_index,
166
+ use_arrow=use_arrow,
167
+ )
168
+ assert type(result) is pd.DataFrame
169
+ assert len(result) == exp_len
170
+
171
+
172
+ @pytest.mark.parametrize(
173
+ "naturalearth_lowres, expected_ext",
174
+ [(".gpkg", ".gpkg"), (".shp", ".shp")],
175
+ indirect=["naturalearth_lowres"],
176
+ )
177
+ def test_fixture_naturalearth_lowres(naturalearth_lowres, expected_ext):
178
+ # Test the fixture with "indirect" parameter
179
+ assert naturalearth_lowres.suffix == expected_ext
180
+ df = read_dataframe(naturalearth_lowres)
181
+ assert len(df) == 177
182
+
183
+
184
+ def test_read_no_geometry(naturalearth_lowres_all_ext, use_arrow):
185
+ df = read_dataframe(
186
+ naturalearth_lowres_all_ext, use_arrow=use_arrow, read_geometry=False
187
+ )
188
+ assert isinstance(df, pd.DataFrame)
189
+ assert not isinstance(df, gp.GeoDataFrame)
190
+
191
+
192
+ def test_read_no_geometry_no_columns_no_fids(naturalearth_lowres, use_arrow):
193
+ with pytest.raises(
194
+ ValueError,
195
+ match=(
196
+ "at least one of read_geometry or return_fids must be True or columns must "
197
+ "be None or non-empty"
198
+ ),
199
+ ):
200
+ _ = read_dataframe(
201
+ naturalearth_lowres,
202
+ columns=[],
203
+ read_geometry=False,
204
+ fid_as_index=False,
205
+ use_arrow=use_arrow,
206
+ )
207
+
208
+
209
+ def test_read_force_2d(tmp_path, use_arrow):
210
+ filename = tmp_path / "test.gpkg"
211
+
212
+ # create a GPKG with 3D point values
213
+ expected = gp.GeoDataFrame(
214
+ geometry=[Point(0, 0, 0), Point(1, 1, 0)], crs="EPSG:4326"
215
+ )
216
+ write_dataframe(expected, filename)
217
+
218
+ df = read_dataframe(filename)
219
+ assert df.iloc[0].geometry.has_z
220
+
221
+ df = read_dataframe(
222
+ filename,
223
+ force_2d=True,
224
+ max_features=1,
225
+ use_arrow=use_arrow,
226
+ )
227
+ assert not df.iloc[0].geometry.has_z
228
+
229
+
230
+ def test_read_layer(tmp_path, use_arrow):
231
+ filename = tmp_path / "test.gpkg"
232
+
233
+ # create a multilayer GPKG
234
+ expected1 = gp.GeoDataFrame(geometry=[Point(0, 0)], crs="EPSG:4326")
235
+ write_dataframe(
236
+ expected1,
237
+ filename,
238
+ layer="layer1",
239
+ )
240
+
241
+ expected2 = gp.GeoDataFrame(geometry=[Point(1, 1)], crs="EPSG:4326")
242
+ write_dataframe(expected2, filename, layer="layer2", append=True)
243
+
244
+ assert np.array_equal(
245
+ list_layers(filename), [["layer1", "Point"], ["layer2", "Point"]]
246
+ )
247
+
248
+ kwargs = {"use_arrow": use_arrow, "max_features": 1}
249
+
250
+ # The first layer is read by default, which will warn when there are multiple
251
+ # layers
252
+ with pytest.warns(UserWarning, match="More than one layer found"):
253
+ df = read_dataframe(filename, **kwargs)
254
+
255
+ assert_geodataframe_equal(df, expected1)
256
+
257
+ # Reading a specific layer by name should return that layer.
258
+ # Detected here by a known column.
259
+ df = read_dataframe(filename, layer="layer2", **kwargs)
260
+ assert_geodataframe_equal(df, expected2)
261
+
262
+ # Reading a specific layer by index should return that layer
263
+ df = read_dataframe(filename, layer=1, **kwargs)
264
+ assert_geodataframe_equal(df, expected2)
265
+
266
+
267
+ def test_read_layer_invalid(naturalearth_lowres_all_ext, use_arrow):
268
+ with pytest.raises(DataLayerError, match="Layer 'wrong' could not be opened"):
269
+ read_dataframe(naturalearth_lowres_all_ext, layer="wrong", use_arrow=use_arrow)
270
+
271
+
272
+ def test_read_datetime(datetime_file, use_arrow):
273
+ df = read_dataframe(datetime_file, use_arrow=use_arrow)
274
+ if PANDAS_GE_20:
275
+ # starting with pandas 2.0, it preserves the passed datetime resolution
276
+ assert df.col.dtype.name == "datetime64[ms]"
277
+ else:
278
+ assert df.col.dtype.name == "datetime64[ns]"
279
+
280
+
281
+ @pytest.mark.filterwarnings("ignore: Non-conformant content for record 1 in column ")
282
+ @pytest.mark.requires_arrow_write_api
283
+ def test_read_datetime_tz(datetime_tz_file, tmp_path, use_arrow):
284
+ df = read_dataframe(datetime_tz_file)
285
+ # Make the index non-consecutive to test this case as well. Added for issue
286
+ # https://github.com/geopandas/pyogrio/issues/324
287
+ df = df.set_index(np.array([0, 2]))
288
+ raw_expected = ["2020-01-01T09:00:00.123-05:00", "2020-01-01T10:00:00-05:00"]
289
+
290
+ if PANDAS_GE_20:
291
+ expected = pd.to_datetime(raw_expected, format="ISO8601").as_unit("ms")
292
+ else:
293
+ expected = pd.to_datetime(raw_expected)
294
+ expected = pd.Series(expected, name="datetime_col")
295
+ assert_series_equal(df.datetime_col, expected, check_index=False)
296
+ # test write and read round trips
297
+ fpath = tmp_path / "test.gpkg"
298
+ write_dataframe(df, fpath, use_arrow=use_arrow)
299
+ df_read = read_dataframe(fpath, use_arrow=use_arrow)
300
+ if use_arrow:
301
+ # with Arrow, the datetimes are always read as UTC
302
+ expected = expected.dt.tz_convert("UTC")
303
+ assert_series_equal(df_read.datetime_col, expected)
304
+
305
+
306
+ @pytest.mark.filterwarnings(
307
+ "ignore: Non-conformant content for record 1 in column dates"
308
+ )
309
+ @pytest.mark.requires_arrow_write_api
310
+ def test_write_datetime_mixed_offset(tmp_path, use_arrow):
311
+ # Australian Summer Time AEDT (GMT+11), Standard Time AEST (GMT+10)
312
+ dates = ["2023-01-01 11:00:01.111", "2023-06-01 10:00:01.111"]
313
+ naive_col = pd.Series(pd.to_datetime(dates), name="dates")
314
+ localised_col = naive_col.dt.tz_localize("Australia/Sydney")
315
+ utc_col = localised_col.dt.tz_convert("UTC")
316
+ if PANDAS_GE_20:
317
+ utc_col = utc_col.dt.as_unit("ms")
318
+
319
+ df = gp.GeoDataFrame(
320
+ {"dates": localised_col, "geometry": [Point(1, 1), Point(1, 1)]},
321
+ crs="EPSG:4326",
322
+ )
323
+ fpath = tmp_path / "test.gpkg"
324
+ write_dataframe(df, fpath, use_arrow=use_arrow)
325
+ result = read_dataframe(fpath, use_arrow=use_arrow)
326
+ # GDAL tz only encodes offsets, not timezones
327
+ # check multiple offsets are read as utc datetime instead of string values
328
+ assert_series_equal(result["dates"], utc_col)
329
+
330
+
331
+ @pytest.mark.filterwarnings(
332
+ "ignore: Non-conformant content for record 1 in column dates"
333
+ )
334
+ @pytest.mark.requires_arrow_write_api
335
+ def test_read_write_datetime_tz_with_nulls(tmp_path, use_arrow):
336
+ dates_raw = ["2020-01-01T09:00:00.123-05:00", "2020-01-01T10:00:00-05:00", pd.NaT]
337
+ if PANDAS_GE_20:
338
+ dates = pd.to_datetime(dates_raw, format="ISO8601").as_unit("ms")
339
+ else:
340
+ dates = pd.to_datetime(dates_raw)
341
+ df = gp.GeoDataFrame(
342
+ {"dates": dates, "geometry": [Point(1, 1), Point(1, 1), Point(1, 1)]},
343
+ crs="EPSG:4326",
344
+ )
345
+ fpath = tmp_path / "test.gpkg"
346
+ write_dataframe(df, fpath, use_arrow=use_arrow)
347
+ result = read_dataframe(fpath, use_arrow=use_arrow)
348
+ if use_arrow:
349
+ # with Arrow, the datetimes are always read as UTC
350
+ df["dates"] = df["dates"].dt.tz_convert("UTC")
351
+ assert_geodataframe_equal(df, result)
352
+
353
+
354
+ def test_read_null_values(tmp_path, use_arrow):
355
+ filename = tmp_path / "test_null_values_no_geometry.gpkg"
356
+
357
+ # create a GPKG with no geometries and only null values
358
+ expected = pd.DataFrame({"col": [None, None]})
359
+ write_dataframe(expected, filename)
360
+
361
+ df = read_dataframe(filename, use_arrow=use_arrow, read_geometry=False)
362
+
363
+ # make sure that Null values are preserved
364
+ assert np.array_equal(df.col.values, expected.col.values)
365
+
366
+
367
+ def test_read_fid_as_index(naturalearth_lowres_all_ext, use_arrow):
368
+ kwargs = {"use_arrow": use_arrow, "skip_features": 2, "max_features": 2}
369
+
370
+ # default is to not set FIDs as index
371
+ df = read_dataframe(naturalearth_lowres_all_ext, **kwargs)
372
+ assert_index_equal(df.index, pd.RangeIndex(0, 2))
373
+
374
+ df = read_dataframe(naturalearth_lowres_all_ext, fid_as_index=False, **kwargs)
375
+ assert_index_equal(df.index, pd.RangeIndex(0, 2))
376
+
377
+ df = read_dataframe(
378
+ naturalearth_lowres_all_ext,
379
+ fid_as_index=True,
380
+ **kwargs,
381
+ )
382
+ fids_expected = pd.Index([2, 3], name="fid")
383
+ fids_expected += START_FID[naturalearth_lowres_all_ext.suffix]
384
+ assert_index_equal(df.index, fids_expected)
385
+
386
+
387
+ def test_read_fid_as_index_only(naturalearth_lowres, use_arrow):
388
+ df = read_dataframe(
389
+ naturalearth_lowres,
390
+ columns=[],
391
+ read_geometry=False,
392
+ fid_as_index=True,
393
+ use_arrow=use_arrow,
394
+ )
395
+ assert df is not None
396
+ assert len(df) == 177
397
+ assert len(df.columns) == 0
398
+
399
+
400
+ def test_read_where(naturalearth_lowres_all_ext, use_arrow):
401
+ # empty filter should return full set of records
402
+ df = read_dataframe(naturalearth_lowres_all_ext, use_arrow=use_arrow, where="")
403
+ assert len(df) == 177
404
+
405
+ # should return singular item
406
+ df = read_dataframe(
407
+ naturalearth_lowres_all_ext, use_arrow=use_arrow, where="iso_a3 = 'CAN'"
408
+ )
409
+ assert len(df) == 1
410
+ assert df.iloc[0].iso_a3 == "CAN"
411
+
412
+ df = read_dataframe(
413
+ naturalearth_lowres_all_ext,
414
+ use_arrow=use_arrow,
415
+ where="iso_a3 IN ('CAN', 'USA', 'MEX')",
416
+ )
417
+ assert len(df) == 3
418
+ assert len(set(df.iso_a3.unique()).difference(["CAN", "USA", "MEX"])) == 0
419
+
420
+ # should return items within range
421
+ df = read_dataframe(
422
+ naturalearth_lowres_all_ext,
423
+ use_arrow=use_arrow,
424
+ where="POP_EST >= 10000000 AND POP_EST < 100000000",
425
+ )
426
+ assert len(df) == 75
427
+ assert df.pop_est.min() >= 10000000
428
+ assert df.pop_est.max() < 100000000
429
+
430
+ # should match no items
431
+ df = read_dataframe(
432
+ naturalearth_lowres_all_ext, use_arrow=use_arrow, where="ISO_A3 = 'INVALID'"
433
+ )
434
+ assert len(df) == 0
435
+
436
+
437
+ def test_read_where_invalid(request, naturalearth_lowres_all_ext, use_arrow):
438
+ if use_arrow and naturalearth_lowres_all_ext.suffix == ".gpkg":
439
+ # https://github.com/OSGeo/gdal/issues/8492
440
+ request.node.add_marker(pytest.mark.xfail(reason="GDAL doesn't error for GPGK"))
441
+ with pytest.raises(ValueError, match="Invalid SQL"):
442
+ read_dataframe(
443
+ naturalearth_lowres_all_ext, use_arrow=use_arrow, where="invalid"
444
+ )
445
+
446
+
447
+ def test_read_where_ignored_field(naturalearth_lowres, use_arrow):
448
+ # column included in where is not also included in list of columns, which means
449
+ # GDAL will return no features
450
+ # NOTE: this behavior is inconsistent across drivers so only shapefiles are
451
+ # tested for this
452
+ df = read_dataframe(
453
+ naturalearth_lowres,
454
+ where=""" "iso_a3" = 'CAN' """,
455
+ columns=["name"],
456
+ use_arrow=use_arrow,
457
+ )
458
+
459
+ assert len(df) == 0
460
+
461
+
462
+ @pytest.mark.parametrize("bbox", [(1,), (1, 2), (1, 2, 3)])
463
+ def test_read_bbox_invalid(naturalearth_lowres_all_ext, bbox, use_arrow):
464
+ with pytest.raises(ValueError, match="Invalid bbox"):
465
+ read_dataframe(naturalearth_lowres_all_ext, use_arrow=use_arrow, bbox=bbox)
466
+
467
+
468
+ @pytest.mark.parametrize(
469
+ "bbox,expected",
470
+ [
471
+ ((0, 0, 0.00001, 0.00001), []),
472
+ ((-85, 8, -80, 10), ["PAN", "CRI"]),
473
+ ((-104, 54, -105, 55), ["CAN"]),
474
+ ],
475
+ )
476
+ def test_read_bbox(naturalearth_lowres_all_ext, use_arrow, bbox, expected):
477
+ if (
478
+ use_arrow
479
+ and __gdal_version__ < (3, 8, 0)
480
+ and naturalearth_lowres_all_ext.suffix == ".gpkg"
481
+ ):
482
+ pytest.xfail(reason="GDAL bug: https://github.com/OSGeo/gdal/issues/8347")
483
+
484
+ df = read_dataframe(naturalearth_lowres_all_ext, use_arrow=use_arrow, bbox=bbox)
485
+
486
+ assert np.array_equal(df.iso_a3, expected)
487
+
488
+
489
+ def test_read_bbox_sql(naturalearth_lowres_all_ext, use_arrow):
490
+ df = read_dataframe(
491
+ naturalearth_lowres_all_ext,
492
+ use_arrow=use_arrow,
493
+ bbox=(-180, 50, -100, 90),
494
+ sql="SELECT * from naturalearth_lowres where iso_a3 not in ('USA', 'RUS')",
495
+ )
496
+ assert len(df) == 1
497
+ assert np.array_equal(df.iso_a3, ["CAN"])
498
+
499
+
500
+ def test_read_bbox_where(naturalearth_lowres_all_ext, use_arrow):
501
+ df = read_dataframe(
502
+ naturalearth_lowres_all_ext,
503
+ use_arrow=use_arrow,
504
+ bbox=(-180, 50, -100, 90),
505
+ where="iso_a3 not in ('USA', 'RUS')",
506
+ )
507
+ assert len(df) == 1
508
+ assert np.array_equal(df.iso_a3, ["CAN"])
509
+
510
+
511
+ @pytest.mark.parametrize(
512
+ "mask",
513
+ [
514
+ {"type": "Point", "coordinates": [0, 0]},
515
+ '{"type": "Point", "coordinates": [0, 0]}',
516
+ "invalid",
517
+ ],
518
+ )
519
+ def test_read_mask_invalid(naturalearth_lowres, use_arrow, mask):
520
+ with pytest.raises(ValueError, match="'mask' parameter must be a Shapely geometry"):
521
+ read_dataframe(naturalearth_lowres, use_arrow=use_arrow, mask=mask)
522
+
523
+
524
+ def test_read_bbox_mask_invalid(naturalearth_lowres, use_arrow):
525
+ with pytest.raises(ValueError, match="cannot set both 'bbox' and 'mask'"):
526
+ read_dataframe(
527
+ naturalearth_lowres,
528
+ use_arrow=use_arrow,
529
+ bbox=(-85, 8, -80, 10),
530
+ mask=shapely.Point(-105, 55),
531
+ )
532
+
533
+
534
+ @pytest.mark.parametrize(
535
+ "mask,expected",
536
+ [
537
+ (shapely.Point(-105, 55), ["CAN"]),
538
+ (shapely.box(-85, 8, -80, 10), ["PAN", "CRI"]),
539
+ (
540
+ shapely.Polygon(
541
+ (
542
+ [6.101929483362767, 50.97085041206964],
543
+ [5.773001596839322, 50.90661120482673],
544
+ [5.593156133704326, 50.642648747710325],
545
+ [6.059271089606312, 50.686051894002475],
546
+ [6.374064065737485, 50.851481340346965],
547
+ [6.101929483362767, 50.97085041206964],
548
+ )
549
+ ),
550
+ ["DEU", "BEL", "NLD"],
551
+ ),
552
+ (
553
+ shapely.GeometryCollection(
554
+ [shapely.Point(-7.7, 53), shapely.box(-85, 8, -80, 10)]
555
+ ),
556
+ ["PAN", "CRI", "IRL"],
557
+ ),
558
+ ],
559
+ )
560
+ def test_read_mask(
561
+ naturalearth_lowres_all_ext,
562
+ use_arrow,
563
+ mask,
564
+ expected,
565
+ ):
566
+ if (
567
+ use_arrow
568
+ and __gdal_version__ < (3, 8, 0)
569
+ and naturalearth_lowres_all_ext.suffix == ".gpkg"
570
+ ):
571
+ pytest.xfail(reason="GDAL bug: https://github.com/OSGeo/gdal/issues/8347")
572
+
573
+ df = read_dataframe(naturalearth_lowres_all_ext, use_arrow=use_arrow, mask=mask)
574
+
575
+ assert len(df) == len(expected)
576
+ assert np.array_equal(df.iso_a3, expected)
577
+
578
+
579
+ def test_read_mask_sql(naturalearth_lowres_all_ext, use_arrow):
580
+ df = read_dataframe(
581
+ naturalearth_lowres_all_ext,
582
+ use_arrow=use_arrow,
583
+ mask=shapely.box(-180, 50, -100, 90),
584
+ sql="SELECT * from naturalearth_lowres where iso_a3 not in ('USA', 'RUS')",
585
+ )
586
+ assert len(df) == 1
587
+ assert np.array_equal(df.iso_a3, ["CAN"])
588
+
589
+
590
+ def test_read_mask_where(naturalearth_lowres_all_ext, use_arrow):
591
+ df = read_dataframe(
592
+ naturalearth_lowres_all_ext,
593
+ use_arrow=use_arrow,
594
+ mask=shapely.box(-180, 50, -100, 90),
595
+ where="iso_a3 not in ('USA', 'RUS')",
596
+ )
597
+ assert len(df) == 1
598
+ assert np.array_equal(df.iso_a3, ["CAN"])
599
+
600
+
601
+ @pytest.mark.parametrize("fids", [[1, 5, 10], np.array([1, 5, 10], dtype=np.int64)])
602
+ def test_read_fids(naturalearth_lowres_all_ext, fids, use_arrow):
603
+ # ensure keyword is properly passed through
604
+ df = read_dataframe(
605
+ naturalearth_lowres_all_ext, fids=fids, fid_as_index=True, use_arrow=use_arrow
606
+ )
607
+ assert len(df) == 3
608
+ assert np.array_equal(fids, df.index.values)
609
+
610
+
611
+ @requires_pyarrow_api
612
+ def test_read_fids_arrow_max_exception(naturalearth_lowres):
613
+ # Maximum number at time of writing is 4997 for "OGRSQL". For e.g. for SQLite based
614
+ # formats like Geopackage, there is no limit.
615
+ nb_fids = 4998
616
+ fids = range(nb_fids)
617
+ with pytest.raises(ValueError, match=f"error applying filter for {nb_fids} fids"):
618
+ _ = read_dataframe(naturalearth_lowres, fids=fids, use_arrow=True)
619
+
620
+
621
+ @requires_pyarrow_api
622
+ @pytest.mark.skipif(
623
+ __gdal_version__ >= (3, 8, 0), reason="GDAL >= 3.8.0 does not need to warn"
624
+ )
625
+ def test_read_fids_arrow_warning_old_gdal(naturalearth_lowres_all_ext):
626
+ # A warning should be given for old GDAL versions, except for some file formats.
627
+ if naturalearth_lowres_all_ext.suffix not in [".gpkg", ".geojson"]:
628
+ handler = pytest.warns(
629
+ UserWarning,
630
+ match="Using 'fids' and 'use_arrow=True' with GDAL < 3.8 can be slow",
631
+ )
632
+ else:
633
+ handler = contextlib.nullcontext()
634
+
635
+ with handler:
636
+ df = read_dataframe(naturalearth_lowres_all_ext, fids=[22], use_arrow=True)
637
+ assert len(df) == 1
638
+
639
+
640
+ def test_read_fids_force_2d(tmp_path):
641
+ filename = tmp_path / "test.gpkg"
642
+
643
+ # create a GPKG with 3D point values
644
+ expected = gp.GeoDataFrame(
645
+ geometry=[Point(0, 0, 0), Point(1, 1, 0)], crs="EPSG:4326"
646
+ )
647
+ write_dataframe(expected, filename)
648
+
649
+ df = read_dataframe(filename, fids=[1])
650
+ assert_geodataframe_equal(df, expected.iloc[:1])
651
+
652
+ df = read_dataframe(filename, force_2d=True, fids=[1])
653
+ assert np.array_equal(
654
+ df.geometry.values, shapely.force_2d(expected.iloc[:1].geometry.values)
655
+ )
656
+
657
+
658
+ @pytest.mark.parametrize("skip_features", [10, 200])
659
+ def test_read_skip_features(naturalearth_lowres_all_ext, use_arrow, skip_features):
660
+ ext = naturalearth_lowres_all_ext.suffix
661
+ expected = (
662
+ read_dataframe(naturalearth_lowres_all_ext)
663
+ .iloc[skip_features:]
664
+ .reset_index(drop=True)
665
+ )
666
+
667
+ df = read_dataframe(
668
+ naturalearth_lowres_all_ext, skip_features=skip_features, use_arrow=use_arrow
669
+ )
670
+ assert len(df) == len(expected)
671
+
672
+ # Coordinates are not precisely equal when written to JSON
673
+ # dtypes do not necessarily round-trip precisely through JSON
674
+ is_json = ext in [".geojson", ".geojsonl"]
675
+ # In .geojsonl the vertices are reordered, so normalize
676
+ is_jsons = ext == ".geojsonl"
677
+
678
+ assert_geodataframe_equal(
679
+ df,
680
+ expected,
681
+ check_less_precise=is_json,
682
+ check_index_type=False,
683
+ check_dtype=not is_json,
684
+ normalize=is_jsons,
685
+ )
686
+
687
+
688
+ def test_read_negative_skip_features(naturalearth_lowres, use_arrow):
689
+ with pytest.raises(ValueError, match="'skip_features' must be >= 0"):
690
+ read_dataframe(naturalearth_lowres, skip_features=-1, use_arrow=use_arrow)
691
+
692
+
693
+ @pytest.mark.parametrize("max_features", [10, 100])
694
+ def test_read_max_features(naturalearth_lowres_all_ext, use_arrow, max_features):
695
+ ext = naturalearth_lowres_all_ext.suffix
696
+ expected = read_dataframe(naturalearth_lowres_all_ext).iloc[:max_features]
697
+ df = read_dataframe(
698
+ naturalearth_lowres_all_ext, max_features=max_features, use_arrow=use_arrow
699
+ )
700
+
701
+ assert len(df) == len(expected)
702
+
703
+ # Coordinates are not precisely equal when written to JSON
704
+ # dtypes do not necessarily round-trip precisely through JSON
705
+ is_json = ext in [".geojson", ".geojsonl"]
706
+ # In .geojsonl the vertices are reordered, so normalize
707
+ is_jsons = ext == ".geojsonl"
708
+
709
+ assert_geodataframe_equal(
710
+ df,
711
+ expected,
712
+ check_less_precise=is_json,
713
+ check_index_type=False,
714
+ check_dtype=not is_json,
715
+ normalize=is_jsons,
716
+ )
717
+
718
+
719
+ def test_read_negative_max_features(naturalearth_lowres, use_arrow):
720
+ with pytest.raises(ValueError, match="'max_features' must be >= 0"):
721
+ read_dataframe(naturalearth_lowres, max_features=-1, use_arrow=use_arrow)
722
+
723
+
724
+ def test_read_non_existent_file(use_arrow):
725
+ # ensure consistent error type / message from GDAL
726
+ with pytest.raises(DataSourceError, match="No such file or directory"):
727
+ read_dataframe("non-existent.shp", use_arrow=use_arrow)
728
+
729
+ with pytest.raises(DataSourceError, match="does not exist in the file system"):
730
+ read_dataframe("/vsizip/non-existent.zip", use_arrow=use_arrow)
731
+
732
+ with pytest.raises(DataSourceError, match="does not exist in the file system"):
733
+ read_dataframe("zip:///non-existent.zip", use_arrow=use_arrow)
734
+
735
+
736
+ def test_read_sql(naturalearth_lowres_all_ext, use_arrow):
737
+ # The geometry column cannot be specified when using the
738
+ # default OGRSQL dialect but is returned nonetheless, so 4 columns.
739
+ sql = "SELECT iso_a3 AS iso_a3_renamed, name, pop_est FROM naturalearth_lowres"
740
+ df = read_dataframe(
741
+ naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL", use_arrow=use_arrow
742
+ )
743
+ assert len(df.columns) == 4
744
+ assert len(df) == 177
745
+
746
+ # Should return single row
747
+ sql = "SELECT * FROM naturalearth_lowres WHERE iso_a3 = 'CAN'"
748
+ df = read_dataframe(
749
+ naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL", use_arrow=use_arrow
750
+ )
751
+ assert len(df) == 1
752
+ assert len(df.columns) == 6
753
+ assert df.iloc[0].iso_a3 == "CAN"
754
+
755
+ sql = """SELECT *
756
+ FROM naturalearth_lowres
757
+ WHERE iso_a3 IN ('CAN', 'USA', 'MEX')"""
758
+ df = read_dataframe(
759
+ naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL", use_arrow=use_arrow
760
+ )
761
+ assert len(df.columns) == 6
762
+ assert len(df) == 3
763
+ assert df.iso_a3.tolist() == ["CAN", "USA", "MEX"]
764
+
765
+ sql = """SELECT *
766
+ FROM naturalearth_lowres
767
+ WHERE iso_a3 IN ('CAN', 'USA', 'MEX')
768
+ ORDER BY name"""
769
+ df = read_dataframe(
770
+ naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL", use_arrow=use_arrow
771
+ )
772
+ assert len(df.columns) == 6
773
+ assert len(df) == 3
774
+ assert df.iso_a3.tolist() == ["CAN", "MEX", "USA"]
775
+
776
+ # Should return items within range.
777
+ sql = """SELECT *
778
+ FROM naturalearth_lowres
779
+ WHERE POP_EST >= 10000000 AND POP_EST < 100000000"""
780
+ df = read_dataframe(
781
+ naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL", use_arrow=use_arrow
782
+ )
783
+ assert len(df) == 75
784
+ assert len(df.columns) == 6
785
+ assert df.pop_est.min() >= 10000000
786
+ assert df.pop_est.max() < 100000000
787
+
788
+ # Should match no items.
789
+ sql = "SELECT * FROM naturalearth_lowres WHERE ISO_A3 = 'INVALID'"
790
+ df = read_dataframe(
791
+ naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL", use_arrow=use_arrow
792
+ )
793
+ assert len(df) == 0
794
+
795
+
796
+ def test_read_sql_invalid(naturalearth_lowres_all_ext, use_arrow):
797
+ if naturalearth_lowres_all_ext.suffix == ".gpkg":
798
+ with pytest.raises(Exception, match="In ExecuteSQL().*"):
799
+ read_dataframe(
800
+ naturalearth_lowres_all_ext, sql="invalid", use_arrow=use_arrow
801
+ )
802
+ else:
803
+ with pytest.raises(Exception, match="SQL Expression Parsing Error"):
804
+ read_dataframe(
805
+ naturalearth_lowres_all_ext, sql="invalid", use_arrow=use_arrow
806
+ )
807
+
808
+ with pytest.raises(
809
+ ValueError, match="'sql' parameter cannot be combined with 'layer'"
810
+ ):
811
+ read_dataframe(
812
+ naturalearth_lowres_all_ext,
813
+ sql="whatever",
814
+ layer="invalid",
815
+ use_arrow=use_arrow,
816
+ )
817
+
818
+
819
+ def test_read_sql_columns_where(naturalearth_lowres_all_ext, use_arrow):
820
+ sql = "SELECT iso_a3 AS iso_a3_renamed, name, pop_est FROM naturalearth_lowres"
821
+ df = read_dataframe(
822
+ naturalearth_lowres_all_ext,
823
+ sql=sql,
824
+ sql_dialect="OGRSQL",
825
+ columns=["iso_a3_renamed", "name"],
826
+ where="iso_a3_renamed IN ('CAN', 'USA', 'MEX')",
827
+ use_arrow=use_arrow,
828
+ )
829
+ assert len(df.columns) == 3
830
+ assert len(df) == 3
831
+ assert df.iso_a3_renamed.tolist() == ["CAN", "USA", "MEX"]
832
+
833
+
834
+ def test_read_sql_columns_where_bbox(naturalearth_lowres_all_ext, use_arrow):
835
+ sql = "SELECT iso_a3 AS iso_a3_renamed, name, pop_est FROM naturalearth_lowres"
836
+ df = read_dataframe(
837
+ naturalearth_lowres_all_ext,
838
+ sql=sql,
839
+ sql_dialect="OGRSQL",
840
+ columns=["iso_a3_renamed", "name"],
841
+ where="iso_a3_renamed IN ('CRI', 'PAN')",
842
+ bbox=(-85, 8, -80, 10),
843
+ use_arrow=use_arrow,
844
+ )
845
+ assert len(df.columns) == 3
846
+ assert len(df) == 2
847
+ assert df.iso_a3_renamed.tolist() == ["PAN", "CRI"]
848
+
849
+
850
+ def test_read_sql_skip_max(naturalearth_lowres_all_ext, use_arrow):
851
+ sql = """SELECT *
852
+ FROM naturalearth_lowres
853
+ WHERE iso_a3 IN ('CAN', 'MEX', 'USA')
854
+ ORDER BY name"""
855
+ df = read_dataframe(
856
+ naturalearth_lowres_all_ext,
857
+ sql=sql,
858
+ skip_features=1,
859
+ max_features=1,
860
+ sql_dialect="OGRSQL",
861
+ use_arrow=use_arrow,
862
+ )
863
+ assert len(df.columns) == 6
864
+ assert len(df) == 1
865
+ assert df.iso_a3.tolist() == ["MEX"]
866
+
867
+ sql = "SELECT * FROM naturalearth_lowres LIMIT 1"
868
+ df = read_dataframe(
869
+ naturalearth_lowres_all_ext,
870
+ sql=sql,
871
+ max_features=3,
872
+ sql_dialect="OGRSQL",
873
+ use_arrow=use_arrow,
874
+ )
875
+ assert len(df) == 1
876
+
877
+ sql = "SELECT * FROM naturalearth_lowres LIMIT 1"
878
+ df = read_dataframe(
879
+ naturalearth_lowres_all_ext,
880
+ sql=sql,
881
+ sql_dialect="OGRSQL",
882
+ skip_features=1,
883
+ use_arrow=use_arrow,
884
+ )
885
+ assert len(df) == 0
886
+
887
+
888
+ @requires_gdal_geos
889
+ @pytest.mark.parametrize(
890
+ "naturalearth_lowres",
891
+ [ext for ext in ALL_EXTS if ext != ".gpkg"],
892
+ indirect=["naturalearth_lowres"],
893
+ )
894
+ def test_read_sql_dialect_sqlite_nogpkg(naturalearth_lowres, use_arrow):
895
+ # Should return singular item
896
+ sql = "SELECT * FROM naturalearth_lowres WHERE iso_a3 = 'CAN'"
897
+ df = read_dataframe(
898
+ naturalearth_lowres, sql=sql, sql_dialect="SQLITE", use_arrow=use_arrow
899
+ )
900
+ assert len(df) == 1
901
+ assert len(df.columns) == 6
902
+ assert df.iloc[0].iso_a3 == "CAN"
903
+ area_canada = df.iloc[0].geometry.area
904
+
905
+ # Use spatialite function
906
+ sql = """SELECT ST_Buffer(geometry, 5) AS geometry, name, pop_est, iso_a3
907
+ FROM naturalearth_lowres
908
+ WHERE ISO_A3 = 'CAN'"""
909
+ df = read_dataframe(
910
+ naturalearth_lowres, sql=sql, sql_dialect="SQLITE", use_arrow=use_arrow
911
+ )
912
+ assert len(df) == 1
913
+ assert len(df.columns) == 4
914
+ assert df.iloc[0].geometry.area > area_canada
915
+
916
+
917
+ @requires_gdal_geos
918
+ @pytest.mark.parametrize(
919
+ "naturalearth_lowres", [".gpkg"], indirect=["naturalearth_lowres"]
920
+ )
921
+ def test_read_sql_dialect_sqlite_gpkg(naturalearth_lowres, use_arrow):
922
+ # "INDIRECT_SQL" prohibits GDAL from passing the SQL statement to sqlite.
923
+ # Because the statement is processed within GDAL it is possible to use
924
+ # spatialite functions even if sqlite isn't built with spatialite support.
925
+ sql = "SELECT * FROM naturalearth_lowres WHERE iso_a3 = 'CAN'"
926
+ df = read_dataframe(
927
+ naturalearth_lowres, sql=sql, sql_dialect="INDIRECT_SQLITE", use_arrow=use_arrow
928
+ )
929
+ assert len(df) == 1
930
+ assert len(df.columns) == 6
931
+ assert df.iloc[0].iso_a3 == "CAN"
932
+ area_canada = df.iloc[0].geometry.area
933
+
934
+ # Use spatialite function
935
+ sql = """SELECT ST_Buffer(geom, 5) AS geometry, name, pop_est, iso_a3
936
+ FROM naturalearth_lowres
937
+ WHERE ISO_A3 = 'CAN'"""
938
+ df = read_dataframe(
939
+ naturalearth_lowres, sql=sql, sql_dialect="INDIRECT_SQLITE", use_arrow=use_arrow
940
+ )
941
+ assert len(df) == 1
942
+ assert len(df.columns) == 4
943
+ assert df.iloc[0].geometry.area > area_canada
944
+
945
+
946
+ @pytest.mark.parametrize("encoding", ["utf-8", "cp1252", None])
947
+ def test_write_csv_encoding(tmp_path, encoding):
948
+ """Test if write_dataframe uses the default encoding correctly."""
949
+ # Write csv test file. Depending on the os this will be written in a different
950
+ # encoding: for linux and macos this is utf-8, for windows it is cp1252.
951
+ csv_path = tmp_path / "test.csv"
952
+
953
+ with open(csv_path, "w", encoding=encoding) as csv:
954
+ csv.write("näme,city\n")
955
+ csv.write("Wilhelm Röntgen,Zürich\n")
956
+
957
+ # Write csv test file with the same data using write_dataframe. It should use the
958
+ # same encoding as above.
959
+ df = pd.DataFrame({"näme": ["Wilhelm Röntgen"], "city": ["Zürich"]})
960
+ csv_pyogrio_path = tmp_path / "test_pyogrio.csv"
961
+ write_dataframe(df, csv_pyogrio_path, encoding=encoding)
962
+
963
+ # Check if the text files written both ways can be read again and give same result.
964
+ with open(csv_path, encoding=encoding) as csv:
965
+ csv_str = csv.read()
966
+ with open(csv_pyogrio_path, encoding=encoding) as csv_pyogrio:
967
+ csv_pyogrio_str = csv_pyogrio.read()
968
+ assert csv_str == csv_pyogrio_str
969
+
970
+ # Check if they files are binary identical, to be 100% sure they were written with
971
+ # the same encoding.
972
+ with open(csv_path, "rb") as csv:
973
+ csv_bytes = csv.read()
974
+ with open(csv_pyogrio_path, "rb") as csv_pyogrio:
975
+ csv_pyogrio_bytes = csv_pyogrio.read()
976
+ assert csv_bytes == csv_pyogrio_bytes
977
+
978
+
979
+ @pytest.mark.parametrize("ext", ALL_EXTS)
980
+ @pytest.mark.requires_arrow_write_api
981
+ def test_write_dataframe(tmp_path, naturalearth_lowres, ext, use_arrow):
982
+ input_gdf = read_dataframe(naturalearth_lowres)
983
+ output_path = tmp_path / f"test{ext}"
984
+
985
+ if ext == ".fgb":
986
+ # For .fgb, spatial_index=False to avoid the rows being reordered
987
+ write_dataframe(
988
+ input_gdf, output_path, use_arrow=use_arrow, spatial_index=False
989
+ )
990
+ else:
991
+ write_dataframe(input_gdf, output_path, use_arrow=use_arrow)
992
+
993
+ assert output_path.exists()
994
+ result_gdf = read_dataframe(output_path)
995
+
996
+ geometry_types = result_gdf.geometry.type.unique()
997
+ if DRIVERS[ext] in DRIVERS_NO_MIXED_SINGLE_MULTI:
998
+ assert list(geometry_types) == ["MultiPolygon"]
999
+ else:
1000
+ assert set(geometry_types) == {"MultiPolygon", "Polygon"}
1001
+
1002
+ # Coordinates are not precisely equal when written to JSON
1003
+ # dtypes do not necessarily round-trip precisely through JSON
1004
+ is_json = ext in [".geojson", ".geojsonl"]
1005
+ # In .geojsonl the vertices are reordered, so normalize
1006
+ is_jsons = ext == ".geojsonl"
1007
+
1008
+ assert_geodataframe_equal(
1009
+ result_gdf,
1010
+ input_gdf,
1011
+ check_less_precise=is_json,
1012
+ check_index_type=False,
1013
+ check_dtype=not is_json,
1014
+ normalize=is_jsons,
1015
+ )
1016
+
1017
+
1018
+ @pytest.mark.filterwarnings("ignore:.*No SRS set on layer.*")
1019
+ @pytest.mark.parametrize("write_geodf", [True, False])
1020
+ @pytest.mark.parametrize("ext", [ext for ext in ALL_EXTS + [".xlsx"] if ext != ".fgb"])
1021
+ @pytest.mark.requires_arrow_write_api
1022
+ def test_write_dataframe_no_geom(
1023
+ request, tmp_path, naturalearth_lowres, write_geodf, ext, use_arrow
1024
+ ):
1025
+ """Test writing a (geo)dataframe without a geometry column.
1026
+
1027
+ FlatGeobuf (.fgb) doesn't seem to support this, and just writes an empty file.
1028
+ """
1029
+ # Prepare test data
1030
+ input_df = read_dataframe(naturalearth_lowres, read_geometry=False)
1031
+ if write_geodf:
1032
+ input_df = gp.GeoDataFrame(input_df)
1033
+
1034
+ output_path = tmp_path / f"test{ext}"
1035
+
1036
+ # A shapefile without geometry column results in only a .dbf file.
1037
+ if ext == ".shp":
1038
+ output_path = output_path.with_suffix(".dbf")
1039
+
1040
+ # Determine driver
1041
+ driver = DRIVERS[ext] if ext != ".xlsx" else "XLSX"
1042
+
1043
+ write_dataframe(input_df, output_path, use_arrow=use_arrow, driver=driver)
1044
+
1045
+ assert output_path.exists()
1046
+ result_df = read_dataframe(output_path)
1047
+
1048
+ assert isinstance(result_df, pd.DataFrame)
1049
+
1050
+ # some dtypes do not round-trip precisely through these file types
1051
+ check_dtype = ext not in [".geojson", ".geojsonl", ".xlsx"]
1052
+
1053
+ if ext in [".gpkg", ".shp", ".xlsx"]:
1054
+ # These file types return a DataFrame when read.
1055
+ assert not isinstance(result_df, gp.GeoDataFrame)
1056
+ if isinstance(input_df, gp.GeoDataFrame):
1057
+ input_df = pd.DataFrame(input_df)
1058
+
1059
+ pd.testing.assert_frame_equal(
1060
+ result_df, input_df, check_index_type=False, check_dtype=check_dtype
1061
+ )
1062
+ else:
1063
+ # These file types return a GeoDataFrame with None Geometries when read.
1064
+ input_none_geom_gdf = gp.GeoDataFrame(
1065
+ input_df, geometry=np.repeat(None, len(input_df)), crs=4326
1066
+ )
1067
+ assert_geodataframe_equal(
1068
+ result_df,
1069
+ input_none_geom_gdf,
1070
+ check_index_type=False,
1071
+ check_dtype=check_dtype,
1072
+ )
1073
+
1074
+
1075
+ @pytest.mark.requires_arrow_write_api
1076
+ def test_write_dataframe_index(tmp_path, naturalearth_lowres, use_arrow):
1077
+ # dataframe writing ignores the index
1078
+ input_gdf = read_dataframe(naturalearth_lowres)
1079
+ input_gdf = input_gdf.set_index("iso_a3")
1080
+
1081
+ output_path = tmp_path / "test.shp"
1082
+ write_dataframe(input_gdf, output_path, use_arrow=use_arrow)
1083
+
1084
+ result_gdf = read_dataframe(output_path)
1085
+ assert isinstance(result_gdf.index, pd.RangeIndex)
1086
+ assert_geodataframe_equal(result_gdf, input_gdf.reset_index(drop=True))
1087
+
1088
+
1089
+ @pytest.mark.parametrize("ext", [ext for ext in ALL_EXTS if ext not in ".geojsonl"])
1090
+ @pytest.mark.requires_arrow_write_api
1091
+ def test_write_empty_dataframe(tmp_path, ext, use_arrow):
1092
+ expected = gp.GeoDataFrame(geometry=[], crs=4326)
1093
+
1094
+ filename = tmp_path / f"test{ext}"
1095
+ write_dataframe(expected, filename, use_arrow=use_arrow)
1096
+
1097
+ assert filename.exists()
1098
+ df = read_dataframe(filename)
1099
+ assert_geodataframe_equal(df, expected)
1100
+
1101
+
1102
+ def test_write_empty_geometry(tmp_path):
1103
+ expected = gp.GeoDataFrame({"x": [0]}, geometry=from_wkt(["POINT EMPTY"]), crs=4326)
1104
+ filename = tmp_path / "test.gpkg"
1105
+
1106
+ # Check that no warning is raised with GeoSeries.notna()
1107
+ with warnings.catch_warnings():
1108
+ warnings.simplefilter("error", UserWarning)
1109
+ if not HAS_PYPROJ:
1110
+ warnings.filterwarnings("ignore", message="'crs' was not provided.")
1111
+ write_dataframe(expected, filename)
1112
+ assert filename.exists()
1113
+
1114
+ # Xref GH-436: round-tripping possible with GPKG but not others
1115
+ df = read_dataframe(filename)
1116
+ assert_geodataframe_equal(df, expected)
1117
+
1118
+
1119
+ @pytest.mark.parametrize("ext", [".geojsonl", ".geojsons"])
1120
+ @pytest.mark.requires_arrow_write_api
1121
+ def test_write_read_empty_dataframe_unsupported(tmp_path, ext, use_arrow):
1122
+ # Writing empty dataframe to .geojsons or .geojsonl results logically in a 0 byte
1123
+ # file, but gdal isn't able to read those again at the time of writing.
1124
+ # Issue logged here: https://github.com/geopandas/pyogrio/issues/94
1125
+ expected = gp.GeoDataFrame(geometry=[], crs=4326)
1126
+
1127
+ filename = tmp_path / f"test{ext}"
1128
+ write_dataframe(expected, filename, use_arrow=use_arrow)
1129
+
1130
+ assert filename.exists()
1131
+ with pytest.raises(
1132
+ Exception, match=".* not recognized as( being in)? a supported file format."
1133
+ ):
1134
+ _ = read_dataframe(filename, use_arrow=use_arrow)
1135
+
1136
+
1137
+ @pytest.mark.requires_arrow_write_api
1138
+ def test_write_dataframe_gpkg_multiple_layers(tmp_path, naturalearth_lowres, use_arrow):
1139
+ input_gdf = read_dataframe(naturalearth_lowres)
1140
+ filename = tmp_path / "test.gpkg"
1141
+
1142
+ write_dataframe(
1143
+ input_gdf,
1144
+ filename,
1145
+ layer="first",
1146
+ promote_to_multi=True,
1147
+ use_arrow=use_arrow,
1148
+ )
1149
+
1150
+ assert filename.exists()
1151
+ assert np.array_equal(list_layers(filename), [["first", "MultiPolygon"]])
1152
+
1153
+ write_dataframe(
1154
+ input_gdf,
1155
+ filename,
1156
+ layer="second",
1157
+ promote_to_multi=True,
1158
+ use_arrow=use_arrow,
1159
+ )
1160
+ assert np.array_equal(
1161
+ list_layers(filename),
1162
+ [["first", "MultiPolygon"], ["second", "MultiPolygon"]],
1163
+ )
1164
+
1165
+
1166
+ @pytest.mark.parametrize("ext", ALL_EXTS)
1167
+ @pytest.mark.requires_arrow_write_api
1168
+ def test_write_dataframe_append(request, tmp_path, naturalearth_lowres, ext, use_arrow):
1169
+ if ext == ".fgb" and __gdal_version__ <= (3, 5, 0):
1170
+ pytest.skip("Append to FlatGeobuf fails for GDAL <= 3.5.0")
1171
+
1172
+ if ext in (".geojsonl", ".geojsons") and __gdal_version__ <= (3, 6, 0):
1173
+ pytest.skip("Append to GeoJSONSeq only available for GDAL >= 3.6.0")
1174
+
1175
+ if use_arrow and ext.startswith(".geojson"):
1176
+ # Bug in GDAL when appending int64 to GeoJSON
1177
+ # (https://github.com/OSGeo/gdal/issues/9792)
1178
+ request.node.add_marker(
1179
+ pytest.mark.xfail(reason="Bugs with append when writing Arrow to GeoJSON")
1180
+ )
1181
+
1182
+ input_gdf = read_dataframe(naturalearth_lowres)
1183
+ filename = tmp_path / f"test{ext}"
1184
+
1185
+ write_dataframe(input_gdf, filename, use_arrow=use_arrow)
1186
+
1187
+ filename.exists()
1188
+ assert len(read_dataframe(filename)) == 177
1189
+
1190
+ write_dataframe(input_gdf, filename, use_arrow=use_arrow, append=True)
1191
+ assert len(read_dataframe(filename)) == 354
1192
+
1193
+
1194
+ @pytest.mark.parametrize("spatial_index", [False, True])
1195
+ @pytest.mark.requires_arrow_write_api
1196
+ def test_write_dataframe_gdal_options(
1197
+ tmp_path, naturalearth_lowres, spatial_index, use_arrow
1198
+ ):
1199
+ df = read_dataframe(naturalearth_lowres)
1200
+
1201
+ outfilename1 = tmp_path / "test1.shp"
1202
+ write_dataframe(
1203
+ df,
1204
+ outfilename1,
1205
+ use_arrow=use_arrow,
1206
+ SPATIAL_INDEX="YES" if spatial_index else "NO",
1207
+ )
1208
+ assert outfilename1.exists() is True
1209
+ index_filename1 = tmp_path / "test1.qix"
1210
+ assert index_filename1.exists() is spatial_index
1211
+
1212
+ # using explicit layer_options instead
1213
+ outfilename2 = tmp_path / "test2.shp"
1214
+ write_dataframe(
1215
+ df,
1216
+ outfilename2,
1217
+ use_arrow=use_arrow,
1218
+ layer_options={"spatial_index": spatial_index},
1219
+ )
1220
+ assert outfilename2.exists() is True
1221
+ index_filename2 = tmp_path / "test2.qix"
1222
+ assert index_filename2.exists() is spatial_index
1223
+
1224
+
1225
+ @pytest.mark.requires_arrow_write_api
1226
+ def test_write_dataframe_gdal_options_unknown(tmp_path, naturalearth_lowres, use_arrow):
1227
+ df = read_dataframe(naturalearth_lowres)
1228
+
1229
+ # geojson has no spatial index, so passing keyword should raise
1230
+ outfilename = tmp_path / "test.geojson"
1231
+ with pytest.raises(ValueError, match="unrecognized option 'SPATIAL_INDEX'"):
1232
+ write_dataframe(df, outfilename, use_arrow=use_arrow, spatial_index=True)
1233
+
1234
+
1235
+ def _get_gpkg_table_names(path):
1236
+ import sqlite3
1237
+
1238
+ con = sqlite3.connect(path)
1239
+ cursor = con.cursor()
1240
+ cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
1241
+ result = cursor.fetchall()
1242
+ return [res[0] for res in result]
1243
+
1244
+
1245
+ @pytest.mark.requires_arrow_write_api
1246
+ def test_write_dataframe_gdal_options_dataset(tmp_path, naturalearth_lowres, use_arrow):
1247
+ df = read_dataframe(naturalearth_lowres)
1248
+
1249
+ test_default_filename = tmp_path / "test_default.gpkg"
1250
+ write_dataframe(df, test_default_filename, use_arrow=use_arrow)
1251
+ assert "gpkg_ogr_contents" in _get_gpkg_table_names(test_default_filename)
1252
+
1253
+ test_no_contents_filename = tmp_path / "test_no_contents.gpkg"
1254
+ write_dataframe(
1255
+ df, test_default_filename, use_arrow=use_arrow, ADD_GPKG_OGR_CONTENTS="NO"
1256
+ )
1257
+ assert "gpkg_ogr_contents" not in _get_gpkg_table_names(test_no_contents_filename)
1258
+
1259
+ test_no_contents_filename2 = tmp_path / "test_no_contents2.gpkg"
1260
+ write_dataframe(
1261
+ df,
1262
+ test_no_contents_filename2,
1263
+ use_arrow=use_arrow,
1264
+ dataset_options={"add_gpkg_ogr_contents": False},
1265
+ )
1266
+ assert "gpkg_ogr_contents" not in _get_gpkg_table_names(test_no_contents_filename2)
1267
+
1268
+
1269
+ @pytest.mark.parametrize(
1270
+ "ext, promote_to_multi, expected_geometry_types, expected_geometry_type",
1271
+ [
1272
+ (".fgb", None, ["MultiPolygon"], "MultiPolygon"),
1273
+ (".fgb", True, ["MultiPolygon"], "MultiPolygon"),
1274
+ (".fgb", False, ["MultiPolygon", "Polygon"], "Unknown"),
1275
+ (".geojson", None, ["MultiPolygon", "Polygon"], "Unknown"),
1276
+ (".geojson", True, ["MultiPolygon"], "MultiPolygon"),
1277
+ (".geojson", False, ["MultiPolygon", "Polygon"], "Unknown"),
1278
+ ],
1279
+ )
1280
+ @pytest.mark.requires_arrow_write_api
1281
+ def test_write_dataframe_promote_to_multi(
1282
+ tmp_path,
1283
+ naturalearth_lowres,
1284
+ ext,
1285
+ promote_to_multi,
1286
+ expected_geometry_types,
1287
+ expected_geometry_type,
1288
+ use_arrow,
1289
+ ):
1290
+ input_gdf = read_dataframe(naturalearth_lowres)
1291
+
1292
+ output_path = tmp_path / f"test_promote{ext}"
1293
+ write_dataframe(
1294
+ input_gdf, output_path, use_arrow=use_arrow, promote_to_multi=promote_to_multi
1295
+ )
1296
+
1297
+ assert output_path.exists()
1298
+ output_gdf = read_dataframe(output_path)
1299
+ geometry_types = sorted(output_gdf.geometry.type.unique())
1300
+ assert geometry_types == expected_geometry_types
1301
+ assert read_info(output_path)["geometry_type"] == expected_geometry_type
1302
+
1303
+
1304
+ @pytest.mark.parametrize(
1305
+ "ext, promote_to_multi, geometry_type, "
1306
+ "expected_geometry_types, expected_geometry_type",
1307
+ [
1308
+ (".fgb", None, "Unknown", ["MultiPolygon"], "Unknown"),
1309
+ (".geojson", False, "Unknown", ["MultiPolygon", "Polygon"], "Unknown"),
1310
+ (".geojson", None, "Unknown", ["MultiPolygon", "Polygon"], "Unknown"),
1311
+ (".geojson", None, "Polygon", ["MultiPolygon", "Polygon"], "Unknown"),
1312
+ (".geojson", None, "MultiPolygon", ["MultiPolygon", "Polygon"], "Unknown"),
1313
+ (".geojson", None, "Point", ["MultiPolygon", "Polygon"], "Unknown"),
1314
+ (".geojson", True, "Unknown", ["MultiPolygon"], "MultiPolygon"),
1315
+ (".gpkg", False, "Unknown", ["MultiPolygon", "Polygon"], "Unknown"),
1316
+ (".gpkg", None, "Unknown", ["MultiPolygon"], "Unknown"),
1317
+ (".gpkg", None, "Polygon", ["MultiPolygon"], "Polygon"),
1318
+ (".gpkg", None, "MultiPolygon", ["MultiPolygon"], "MultiPolygon"),
1319
+ (".gpkg", None, "Point", ["MultiPolygon"], "Point"),
1320
+ (".gpkg", True, "Unknown", ["MultiPolygon"], "Unknown"),
1321
+ (".shp", False, "Unknown", ["MultiPolygon", "Polygon"], "Polygon"),
1322
+ (".shp", None, "Unknown", ["MultiPolygon", "Polygon"], "Polygon"),
1323
+ (".shp", None, "Polygon", ["MultiPolygon", "Polygon"], "Polygon"),
1324
+ (".shp", None, "MultiPolygon", ["MultiPolygon", "Polygon"], "Polygon"),
1325
+ (".shp", True, "Unknown", ["MultiPolygon", "Polygon"], "Polygon"),
1326
+ ],
1327
+ )
1328
+ @pytest.mark.requires_arrow_write_api
1329
+ def test_write_dataframe_promote_to_multi_layer_geom_type(
1330
+ tmp_path,
1331
+ naturalearth_lowres,
1332
+ ext,
1333
+ promote_to_multi,
1334
+ geometry_type,
1335
+ expected_geometry_types,
1336
+ expected_geometry_type,
1337
+ use_arrow,
1338
+ ):
1339
+ input_gdf = read_dataframe(naturalearth_lowres)
1340
+
1341
+ output_path = tmp_path / f"test_promote_layer_geom_type{ext}"
1342
+
1343
+ if ext == ".gpkg" and geometry_type in ("Polygon", "Point"):
1344
+ ctx = pytest.warns(
1345
+ RuntimeWarning, match="A geometry of type MULTIPOLYGON is inserted"
1346
+ )
1347
+ else:
1348
+ ctx = contextlib.nullcontext()
1349
+
1350
+ with ctx:
1351
+ write_dataframe(
1352
+ input_gdf,
1353
+ output_path,
1354
+ use_arrow=use_arrow,
1355
+ promote_to_multi=promote_to_multi,
1356
+ geometry_type=geometry_type,
1357
+ )
1358
+
1359
+ assert output_path.exists()
1360
+ output_gdf = read_dataframe(output_path)
1361
+ geometry_types = sorted(output_gdf.geometry.type.unique())
1362
+ assert geometry_types == expected_geometry_types
1363
+ assert read_info(output_path)["geometry_type"] == expected_geometry_type
1364
+
1365
+
1366
+ @pytest.mark.parametrize(
1367
+ "ext, promote_to_multi, geometry_type, expected_raises_match",
1368
+ [
1369
+ (".fgb", False, "MultiPolygon", "Mismatched geometry type"),
1370
+ (".fgb", False, "Polygon", "Mismatched geometry type"),
1371
+ (".fgb", None, "Point", "Mismatched geometry type"),
1372
+ (".fgb", None, "Polygon", "Mismatched geometry type"),
1373
+ (
1374
+ ".shp",
1375
+ None,
1376
+ "Point",
1377
+ "Could not add feature to layer at index|Error while writing batch to OGR "
1378
+ "layer",
1379
+ ),
1380
+ ],
1381
+ )
1382
+ @pytest.mark.requires_arrow_write_api
1383
+ def test_write_dataframe_promote_to_multi_layer_geom_type_invalid(
1384
+ tmp_path,
1385
+ naturalearth_lowres,
1386
+ ext,
1387
+ promote_to_multi,
1388
+ geometry_type,
1389
+ expected_raises_match,
1390
+ use_arrow,
1391
+ ):
1392
+ input_gdf = read_dataframe(naturalearth_lowres)
1393
+
1394
+ output_path = tmp_path / f"test{ext}"
1395
+ with pytest.raises((FeatureError, DataLayerError), match=expected_raises_match):
1396
+ write_dataframe(
1397
+ input_gdf,
1398
+ output_path,
1399
+ use_arrow=use_arrow,
1400
+ promote_to_multi=promote_to_multi,
1401
+ geometry_type=geometry_type,
1402
+ )
1403
+
1404
+
1405
+ @pytest.mark.requires_arrow_write_api
1406
+ def test_write_dataframe_layer_geom_type_invalid(
1407
+ tmp_path, naturalearth_lowres, use_arrow
1408
+ ):
1409
+ df = read_dataframe(naturalearth_lowres)
1410
+
1411
+ filename = tmp_path / "test.geojson"
1412
+ with pytest.raises(
1413
+ GeometryError, match="Geometry type is not supported: NotSupported"
1414
+ ):
1415
+ write_dataframe(df, filename, use_arrow=use_arrow, geometry_type="NotSupported")
1416
+
1417
+
1418
+ @pytest.mark.parametrize("ext", [ext for ext in ALL_EXTS if ext not in ".shp"])
1419
+ @pytest.mark.requires_arrow_write_api
1420
+ def test_write_dataframe_truly_mixed(tmp_path, ext, use_arrow):
1421
+ geometry = [
1422
+ shapely.Point(0, 0),
1423
+ shapely.LineString([(0, 0), (1, 1)]),
1424
+ shapely.box(0, 0, 1, 1),
1425
+ shapely.MultiPoint([shapely.Point(1, 1), shapely.Point(2, 2)]),
1426
+ shapely.MultiLineString(
1427
+ [shapely.LineString([(1, 1), (2, 2)]), shapely.LineString([(2, 2), (3, 3)])]
1428
+ ),
1429
+ shapely.MultiPolygon([shapely.box(1, 1, 2, 2), shapely.box(2, 2, 3, 3)]),
1430
+ ]
1431
+
1432
+ df = gp.GeoDataFrame(
1433
+ {"col": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]}, geometry=geometry, crs="EPSG:4326"
1434
+ )
1435
+
1436
+ filename = tmp_path / f"test{ext}"
1437
+
1438
+ if ext == ".fgb":
1439
+ # For .fgb, spatial_index=False to avoid the rows being reordered
1440
+ write_dataframe(df, filename, use_arrow=use_arrow, spatial_index=False)
1441
+ else:
1442
+ write_dataframe(df, filename, use_arrow=use_arrow)
1443
+
1444
+ # Drivers that support mixed geometries will default to "Unknown" geometry type
1445
+ assert read_info(filename)["geometry_type"] == "Unknown"
1446
+ result = read_dataframe(filename)
1447
+ assert_geodataframe_equal(result, df, check_geom_type=True)
1448
+
1449
+
1450
+ @pytest.mark.requires_arrow_write_api
1451
+ def test_write_dataframe_truly_mixed_invalid(tmp_path, use_arrow):
1452
+ # Shapefile doesn't support generic "Geometry" / "Unknown" type
1453
+ # for mixed geometries
1454
+
1455
+ df = gp.GeoDataFrame(
1456
+ {"col": [1.0, 2.0, 3.0]},
1457
+ geometry=[
1458
+ shapely.Point(0, 0),
1459
+ shapely.LineString([(0, 0), (1, 1)]),
1460
+ shapely.box(0, 0, 1, 1),
1461
+ ],
1462
+ crs="EPSG:4326",
1463
+ )
1464
+
1465
+ # ensure error message from GDAL is included
1466
+ msg = (
1467
+ "Could not add feature to layer at index 1: Attempt to "
1468
+ r"write non-point \(LINESTRING\) geometry to point shapefile."
1469
+ # DataLayerError when using Arrow
1470
+ "|Error while writing batch to OGR layer: Attempt to "
1471
+ r"write non-point \(LINESTRING\) geometry to point shapefile."
1472
+ )
1473
+ with pytest.raises((FeatureError, DataLayerError), match=msg):
1474
+ write_dataframe(df, tmp_path / "test.shp", use_arrow=use_arrow)
1475
+
1476
+
1477
+ @pytest.mark.parametrize("ext", [ext for ext in ALL_EXTS if ext not in ".fgb"])
1478
+ @pytest.mark.parametrize(
1479
+ "geoms",
1480
+ [
1481
+ [None, shapely.Point(1, 1)],
1482
+ [shapely.Point(1, 1), None],
1483
+ [None, shapely.Point(1, 1, 2)],
1484
+ [None, None],
1485
+ ],
1486
+ )
1487
+ @pytest.mark.requires_arrow_write_api
1488
+ def test_write_dataframe_infer_geometry_with_nulls(tmp_path, geoms, ext, use_arrow):
1489
+ filename = tmp_path / f"test{ext}"
1490
+
1491
+ df = gp.GeoDataFrame({"col": [1.0, 2.0]}, geometry=geoms, crs="EPSG:4326")
1492
+ write_dataframe(df, filename, use_arrow=use_arrow)
1493
+ result = read_dataframe(filename)
1494
+ assert_geodataframe_equal(result, df)
1495
+
1496
+
1497
+ @pytest.mark.filterwarnings(
1498
+ "ignore: You will likely lose important projection information"
1499
+ )
1500
+ @pytest.mark.requires_arrow_write_api
1501
+ @requires_pyproj
1502
+ def test_custom_crs_io(tmp_path, naturalearth_lowres_all_ext, use_arrow):
1503
+ df = read_dataframe(naturalearth_lowres_all_ext)
1504
+ # project Belgium to a custom Albers Equal Area projection
1505
+ expected = (
1506
+ df.loc[df.name == "Belgium"]
1507
+ .reset_index(drop=True)
1508
+ .to_crs("+proj=aea +lat_1=49.5 +lat_2=51.5 +lon_0=4.3")
1509
+ )
1510
+ filename = tmp_path / "test.shp"
1511
+ write_dataframe(expected, filename, use_arrow=use_arrow)
1512
+
1513
+ assert filename.exists()
1514
+
1515
+ df = read_dataframe(filename)
1516
+
1517
+ crs = df.crs.to_dict()
1518
+ assert crs["lat_1"] == 49.5
1519
+ assert crs["lat_2"] == 51.5
1520
+ assert crs["lon_0"] == 4.3
1521
+ assert df.crs.equals(expected.crs)
1522
+
1523
+
1524
+ def test_write_read_mixed_column_values(tmp_path):
1525
+ # use_arrow=True is tested separately below
1526
+ mixed_values = ["test", 1.0, 1, datetime.now(), None, np.nan]
1527
+ geoms = [shapely.Point(0, 0) for _ in mixed_values]
1528
+ test_gdf = gp.GeoDataFrame(
1529
+ {"geometry": geoms, "mixed": mixed_values}, crs="epsg:31370"
1530
+ )
1531
+ output_path = tmp_path / "test_write_mixed_column.gpkg"
1532
+ write_dataframe(test_gdf, output_path)
1533
+ output_gdf = read_dataframe(output_path)
1534
+ assert len(test_gdf) == len(output_gdf)
1535
+ for idx, value in enumerate(mixed_values):
1536
+ if value in (None, np.nan):
1537
+ assert output_gdf["mixed"][idx] is None
1538
+ else:
1539
+ assert output_gdf["mixed"][idx] == str(value)
1540
+
1541
+
1542
+ @requires_arrow_write_api
1543
+ def test_write_read_mixed_column_values_arrow(tmp_path):
1544
+ # Arrow cannot represent a column of mixed types
1545
+ mixed_values = ["test", 1.0, 1, datetime.now(), None, np.nan]
1546
+ geoms = [shapely.Point(0, 0) for _ in mixed_values]
1547
+ test_gdf = gp.GeoDataFrame(
1548
+ {"geometry": geoms, "mixed": mixed_values}, crs="epsg:31370"
1549
+ )
1550
+ output_path = tmp_path / "test_write_mixed_column.gpkg"
1551
+ with pytest.raises(TypeError, match=".*Conversion failed for column"):
1552
+ write_dataframe(test_gdf, output_path, use_arrow=True)
1553
+
1554
+
1555
+ @pytest.mark.requires_arrow_write_api
1556
+ def test_write_read_null(tmp_path, use_arrow):
1557
+ output_path = tmp_path / "test_write_nan.gpkg"
1558
+ geom = shapely.Point(0, 0)
1559
+ test_data = {
1560
+ "geometry": [geom, geom, geom],
1561
+ "float64": [1.0, None, np.nan],
1562
+ "object_str": ["test", None, np.nan],
1563
+ }
1564
+ test_gdf = gp.GeoDataFrame(test_data, crs="epsg:31370")
1565
+ write_dataframe(test_gdf, output_path, use_arrow=use_arrow)
1566
+ result_gdf = read_dataframe(output_path)
1567
+ assert len(test_gdf) == len(result_gdf)
1568
+ assert result_gdf["float64"][0] == 1.0
1569
+ assert pd.isna(result_gdf["float64"][1])
1570
+ assert pd.isna(result_gdf["float64"][2])
1571
+ assert result_gdf["object_str"][0] == "test"
1572
+ assert result_gdf["object_str"][1] is None
1573
+ assert result_gdf["object_str"][2] is None
1574
+
1575
+
1576
+ @pytest.mark.requires_arrow_write_api
1577
+ def test_write_read_vsimem(naturalearth_lowres_vsi, use_arrow):
1578
+ path, _ = naturalearth_lowres_vsi
1579
+ mem_path = f"/vsimem/{path.name}"
1580
+
1581
+ input = read_dataframe(path, use_arrow=use_arrow)
1582
+ assert len(input) == 177
1583
+
1584
+ try:
1585
+ write_dataframe(input, mem_path, use_arrow=use_arrow)
1586
+ result = read_dataframe(mem_path, use_arrow=use_arrow)
1587
+ assert len(result) == 177
1588
+ finally:
1589
+ vsi_unlink(mem_path)
1590
+
1591
+
1592
+ @pytest.mark.parametrize(
1593
+ "wkt,geom_types",
1594
+ [
1595
+ ("Point Z (0 0 0)", ["2.5D Point", "Point Z"]),
1596
+ ("LineString Z (0 0 0, 1 1 0)", ["2.5D LineString", "LineString Z"]),
1597
+ ("Polygon Z ((0 0 0, 0 1 0, 1 1 0, 0 0 0))", ["2.5D Polygon", "Polygon Z"]),
1598
+ ("MultiPoint Z (0 0 0, 1 1 0)", ["2.5D MultiPoint", "MultiPoint Z"]),
1599
+ (
1600
+ "MultiLineString Z ((0 0 0, 1 1 0), (2 2 2, 3 3 2))",
1601
+ ["2.5D MultiLineString", "MultiLineString Z"],
1602
+ ),
1603
+ (
1604
+ "MultiPolygon Z (((0 0 0, 0 1 0, 1 1 0, 0 0 0)), ((1 1 1, 1 2 1, 2 2 1, 1 1 1)))", # noqa: E501
1605
+ ["2.5D MultiPolygon", "MultiPolygon Z"],
1606
+ ),
1607
+ (
1608
+ "GeometryCollection Z (Point Z (0 0 0))",
1609
+ ["2.5D GeometryCollection", "GeometryCollection Z"],
1610
+ ),
1611
+ ],
1612
+ )
1613
+ @pytest.mark.requires_arrow_write_api
1614
+ def test_write_geometry_z_types(tmp_path, wkt, geom_types, use_arrow):
1615
+ filename = tmp_path / "test.fgb"
1616
+ gdf = gp.GeoDataFrame(geometry=from_wkt([wkt]), crs="EPSG:4326")
1617
+ for geom_type in geom_types:
1618
+ write_dataframe(gdf, filename, use_arrow=use_arrow, geometry_type=geom_type)
1619
+ df = read_dataframe(filename)
1620
+ assert_geodataframe_equal(df, gdf)
1621
+
1622
+
1623
+ @pytest.mark.parametrize("ext", ALL_EXTS)
1624
+ @pytest.mark.parametrize(
1625
+ "test_descr, exp_geometry_type, mixed_dimensions, wkt",
1626
+ [
1627
+ ("1 Point Z", "Point Z", False, ["Point Z (0 0 0)"]),
1628
+ ("1 LineString Z", "LineString Z", False, ["LineString Z (0 0 0, 1 1 0)"]),
1629
+ (
1630
+ "1 Polygon Z",
1631
+ "Polygon Z",
1632
+ False,
1633
+ ["Polygon Z ((0 0 0, 0 1 0, 1 1 0, 0 0 0))"],
1634
+ ),
1635
+ ("1 MultiPoint Z", "MultiPoint Z", False, ["MultiPoint Z (0 0 0, 1 1 0)"]),
1636
+ (
1637
+ "1 MultiLineString Z",
1638
+ "MultiLineString Z",
1639
+ False,
1640
+ ["MultiLineString Z ((0 0 0, 1 1 0), (2 2 2, 3 3 2))"],
1641
+ ),
1642
+ (
1643
+ "1 MultiLinePolygon Z",
1644
+ "MultiPolygon Z",
1645
+ False,
1646
+ [
1647
+ "MultiPolygon Z (((0 0 0, 0 1 0, 1 1 0, 0 0 0)), ((1 1 1, 1 2 1, 2 2 1, 1 1 1)))" # noqa: E501
1648
+ ],
1649
+ ),
1650
+ (
1651
+ "1 GeometryCollection Z",
1652
+ "GeometryCollection Z",
1653
+ False,
1654
+ ["GeometryCollection Z (Point Z (0 0 0))"],
1655
+ ),
1656
+ ("Point Z + Point", "Point Z", True, ["Point Z (0 0 0)", "Point (0 0)"]),
1657
+ ("Point Z + None", "Point Z", False, ["Point Z (0 0 0)", None]),
1658
+ (
1659
+ "Point Z + LineString Z",
1660
+ "Unknown",
1661
+ False,
1662
+ ["LineString Z (0 0 0, 1 1 0)", "Point Z (0 0 0)"],
1663
+ ),
1664
+ (
1665
+ "Point Z + LineString",
1666
+ "Unknown",
1667
+ True,
1668
+ ["LineString (0 0, 1 1)", "Point Z (0 0 0)"],
1669
+ ),
1670
+ ],
1671
+ )
1672
+ @pytest.mark.requires_arrow_write_api
1673
+ def test_write_geometry_z_types_auto(
1674
+ tmp_path, ext, test_descr, exp_geometry_type, mixed_dimensions, wkt, use_arrow
1675
+ ):
1676
+ # Shapefile has some different behaviour that other file types
1677
+ if ext == ".shp":
1678
+ if exp_geometry_type in ("GeometryCollection Z", "Unknown"):
1679
+ pytest.skip(f"ext {ext} doesn't support {exp_geometry_type}")
1680
+ elif exp_geometry_type == "MultiLineString Z":
1681
+ exp_geometry_type = "LineString Z"
1682
+ elif exp_geometry_type == "MultiPolygon Z":
1683
+ exp_geometry_type = "Polygon Z"
1684
+
1685
+ column_data = {}
1686
+ column_data["test_descr"] = [test_descr] * len(wkt)
1687
+ column_data["idx"] = [str(idx) for idx in range(len(wkt))]
1688
+ gdf = gp.GeoDataFrame(column_data, geometry=from_wkt(wkt), crs="EPSG:4326")
1689
+ filename = tmp_path / f"test{ext}"
1690
+
1691
+ if ext == ".fgb":
1692
+ # writing empty / null geometries not allowed by FlatGeobuf for
1693
+ # GDAL >= 3.6.4 and were simply not written previously
1694
+ gdf = gdf.loc[~(gdf.geometry.isna() | gdf.geometry.is_empty)]
1695
+
1696
+ if mixed_dimensions and DRIVERS[ext] in DRIVERS_NO_MIXED_DIMENSIONS:
1697
+ with pytest.raises(
1698
+ DataSourceError,
1699
+ match=("Mixed 2D and 3D coordinates are not supported by"),
1700
+ ):
1701
+ write_dataframe(gdf, filename, use_arrow=use_arrow)
1702
+ return
1703
+ else:
1704
+ write_dataframe(gdf, filename, use_arrow=use_arrow)
1705
+
1706
+ info = read_info(filename)
1707
+ assert info["geometry_type"] == exp_geometry_type
1708
+
1709
+ result_gdf = read_dataframe(filename)
1710
+ if ext == ".geojsonl":
1711
+ result_gdf.crs = "EPSG:4326"
1712
+
1713
+ assert_geodataframe_equal(gdf, result_gdf)
1714
+
1715
+
1716
+ @pytest.mark.parametrize(
1717
+ "on_invalid, message",
1718
+ [
1719
+ (
1720
+ "warn",
1721
+ "Invalid WKB: geometry is returned as None. IllegalArgumentException: "
1722
+ "Invalid number of points in LinearRing found 2 - must be 0 or >=",
1723
+ ),
1724
+ ("raise", "Invalid number of points in LinearRing found 2 - must be 0 or >="),
1725
+ ("ignore", None),
1726
+ ],
1727
+ )
1728
+ def test_read_invalid_poly_ring(tmp_path, use_arrow, on_invalid, message):
1729
+ if on_invalid == "raise":
1730
+ handler = pytest.raises(shapely.errors.GEOSException, match=message)
1731
+ elif on_invalid == "warn":
1732
+ handler = pytest.warns(match=message)
1733
+ elif on_invalid == "ignore":
1734
+ handler = contextlib.nullcontext()
1735
+ else:
1736
+ raise ValueError(f"unknown value for on_invalid: {on_invalid}")
1737
+
1738
+ # create a GeoJSON file with an invalid exterior ring
1739
+ invalid_geojson = """{
1740
+ "type": "FeatureCollection",
1741
+ "features": [
1742
+ {
1743
+ "type": "Feature",
1744
+ "properties": {},
1745
+ "geometry": {
1746
+ "type": "Polygon",
1747
+ "coordinates": [ [ [0, 0], [0, 0] ] ]
1748
+ }
1749
+ }
1750
+ ]
1751
+ }"""
1752
+
1753
+ filename = tmp_path / "test.geojson"
1754
+ with open(filename, "w") as f:
1755
+ _ = f.write(invalid_geojson)
1756
+
1757
+ with handler:
1758
+ df = read_dataframe(
1759
+ filename,
1760
+ use_arrow=use_arrow,
1761
+ on_invalid=on_invalid,
1762
+ )
1763
+ df.geometry.isnull().all()
1764
+
1765
+
1766
+ def test_read_multisurface(multisurface_file, use_arrow):
1767
+ if use_arrow:
1768
+ # TODO: revisit once https://github.com/geopandas/pyogrio/issues/478
1769
+ # is resolved.
1770
+ pytest.skip("Shapely + GEOS 3.13 crashes in from_wkb for this case")
1771
+
1772
+ with pytest.raises(shapely.errors.GEOSException):
1773
+ # TODO(Arrow)
1774
+ # shapely fails parsing the WKB
1775
+ read_dataframe(multisurface_file, use_arrow=True)
1776
+ else:
1777
+ df = read_dataframe(multisurface_file)
1778
+
1779
+ # MultiSurface should be converted to MultiPolygon
1780
+ assert df.geometry.type.tolist() == ["MultiPolygon"]
1781
+
1782
+
1783
+ def test_read_dataset_kwargs(nested_geojson_file, use_arrow):
1784
+ # by default, nested data are not flattened
1785
+ df = read_dataframe(nested_geojson_file, use_arrow=use_arrow)
1786
+
1787
+ expected = gp.GeoDataFrame(
1788
+ {
1789
+ "top_level": ["A"],
1790
+ "intermediate_level": ['{ "bottom_level": "B" }'],
1791
+ },
1792
+ geometry=[shapely.Point(0, 0)],
1793
+ crs="EPSG:4326",
1794
+ )
1795
+
1796
+ assert_geodataframe_equal(df, expected)
1797
+
1798
+ df = read_dataframe(
1799
+ nested_geojson_file, use_arrow=use_arrow, FLATTEN_NESTED_ATTRIBUTES="YES"
1800
+ )
1801
+
1802
+ expected = gp.GeoDataFrame(
1803
+ {
1804
+ "top_level": ["A"],
1805
+ "intermediate_level_bottom_level": ["B"],
1806
+ },
1807
+ geometry=[shapely.Point(0, 0)],
1808
+ crs="EPSG:4326",
1809
+ )
1810
+
1811
+ assert_geodataframe_equal(df, expected)
1812
+
1813
+
1814
+ def test_read_invalid_dataset_kwargs(naturalearth_lowres, use_arrow):
1815
+ with pytest.warns(RuntimeWarning, match="does not support open option INVALID"):
1816
+ read_dataframe(naturalearth_lowres, use_arrow=use_arrow, INVALID="YES")
1817
+
1818
+
1819
+ @pytest.mark.requires_arrow_write_api
1820
+ def test_write_nullable_dtypes(tmp_path, use_arrow):
1821
+ path = tmp_path / "test_nullable_dtypes.gpkg"
1822
+ test_data = {
1823
+ "col1": pd.Series([1, 2, 3], dtype="int64"),
1824
+ "col2": pd.Series([1, 2, None], dtype="Int64"),
1825
+ "col3": pd.Series([0.1, None, 0.3], dtype="Float32"),
1826
+ "col4": pd.Series([True, False, None], dtype="boolean"),
1827
+ "col5": pd.Series(["a", None, "b"], dtype="string"),
1828
+ }
1829
+ input_gdf = gp.GeoDataFrame(
1830
+ test_data, geometry=[shapely.Point(0, 0)] * 3, crs="epsg:31370"
1831
+ )
1832
+ write_dataframe(input_gdf, path, use_arrow=use_arrow)
1833
+ output_gdf = read_dataframe(path)
1834
+ # We read it back as default (non-nullable) numpy dtypes, so we cast
1835
+ # to those for the expected result
1836
+ expected = input_gdf.copy()
1837
+ expected["col2"] = expected["col2"].astype("float64")
1838
+ expected["col3"] = expected["col3"].astype("float32")
1839
+ expected["col4"] = expected["col4"].astype("float64")
1840
+ expected["col5"] = expected["col5"].astype(object)
1841
+ expected.loc[1, "col5"] = None # pandas converts to pd.NA on line above
1842
+ assert_geodataframe_equal(output_gdf, expected)
1843
+
1844
+
1845
+ @pytest.mark.parametrize(
1846
+ "metadata_type", ["dataset_metadata", "layer_metadata", "metadata"]
1847
+ )
1848
+ @pytest.mark.requires_arrow_write_api
1849
+ def test_metadata_io(tmp_path, naturalearth_lowres, metadata_type, use_arrow):
1850
+ metadata = {"level": metadata_type}
1851
+
1852
+ df = read_dataframe(naturalearth_lowres)
1853
+
1854
+ filename = tmp_path / "test.gpkg"
1855
+ write_dataframe(df, filename, use_arrow=use_arrow, **{metadata_type: metadata})
1856
+
1857
+ metadata_key = "layer_metadata" if metadata_type == "metadata" else metadata_type
1858
+
1859
+ assert read_info(filename)[metadata_key] == metadata
1860
+
1861
+
1862
+ @pytest.mark.parametrize("metadata_type", ["dataset_metadata", "layer_metadata"])
1863
+ @pytest.mark.parametrize(
1864
+ "metadata",
1865
+ [
1866
+ {1: 2},
1867
+ {"key": None},
1868
+ {"key": 1},
1869
+ ],
1870
+ )
1871
+ @pytest.mark.requires_arrow_write_api
1872
+ def test_invalid_metadata(
1873
+ tmp_path, naturalearth_lowres, metadata_type, metadata, use_arrow
1874
+ ):
1875
+ df = read_dataframe(naturalearth_lowres)
1876
+ with pytest.raises(ValueError, match="must be a string"):
1877
+ write_dataframe(
1878
+ df, tmp_path / "test.gpkg", use_arrow=use_arrow, **{metadata_type: metadata}
1879
+ )
1880
+
1881
+
1882
+ @pytest.mark.parametrize("metadata_type", ["dataset_metadata", "layer_metadata"])
1883
+ @pytest.mark.requires_arrow_write_api
1884
+ def test_metadata_unsupported(tmp_path, naturalearth_lowres, metadata_type, use_arrow):
1885
+ """metadata is silently ignored"""
1886
+
1887
+ filename = tmp_path / "test.geojson"
1888
+ write_dataframe(
1889
+ read_dataframe(naturalearth_lowres),
1890
+ filename,
1891
+ use_arrow=use_arrow,
1892
+ **{metadata_type: {"key": "value"}},
1893
+ )
1894
+
1895
+ metadata_key = "layer_metadata" if metadata_type == "metadata" else metadata_type
1896
+
1897
+ assert read_info(filename)[metadata_key] is None
1898
+
1899
+
1900
+ @pytest.mark.skipif(not PANDAS_GE_15, reason="ArrowDtype requires pandas 1.5+")
1901
+ def test_read_dataframe_arrow_dtypes(tmp_path):
1902
+ # https://github.com/geopandas/pyogrio/issues/319 - ensure arrow binary
1903
+ # column can be converted with from_wkb in case of missing values
1904
+ pytest.importorskip("pyarrow")
1905
+ filename = tmp_path / "test.gpkg"
1906
+ df = gp.GeoDataFrame(
1907
+ {"col": [1.0, 2.0]}, geometry=[Point(1, 1), None], crs="EPSG:4326"
1908
+ )
1909
+ write_dataframe(df, filename)
1910
+
1911
+ result = read_dataframe(
1912
+ filename,
1913
+ use_arrow=True,
1914
+ arrow_to_pandas_kwargs={
1915
+ "types_mapper": lambda pa_dtype: pd.ArrowDtype(pa_dtype)
1916
+ },
1917
+ )
1918
+ assert isinstance(result["col"].dtype, pd.ArrowDtype)
1919
+ result["col"] = result["col"].astype("float64")
1920
+ assert_geodataframe_equal(result, df)
1921
+
1922
+
1923
+ @requires_pyarrow_api
1924
+ @pytest.mark.skipif(
1925
+ __gdal_version__ < (3, 8, 3), reason="Arrow bool value bug fixed in GDAL >= 3.8.3"
1926
+ )
1927
+ @pytest.mark.parametrize("ext", ALL_EXTS)
1928
+ def test_arrow_bool_roundtrip(tmp_path, ext):
1929
+ filename = tmp_path / f"test{ext}"
1930
+
1931
+ kwargs = {}
1932
+
1933
+ if ext == ".fgb":
1934
+ # For .fgb, spatial_index=False to avoid the rows being reordered
1935
+ kwargs["spatial_index"] = False
1936
+
1937
+ df = gp.GeoDataFrame(
1938
+ {"bool_col": [True, False, True, False, True], "geometry": [Point(0, 0)] * 5},
1939
+ crs="EPSG:4326",
1940
+ )
1941
+
1942
+ write_dataframe(df, filename, **kwargs)
1943
+ result = read_dataframe(filename, use_arrow=True)
1944
+ # Shapefiles do not support bool columns; these are returned as int32
1945
+ assert_geodataframe_equal(result, df, check_dtype=ext != ".shp")
1946
+
1947
+
1948
+ @requires_pyarrow_api
1949
+ @pytest.mark.skipif(
1950
+ __gdal_version__ >= (3, 8, 3), reason="Arrow bool value bug fixed in GDAL >= 3.8.3"
1951
+ )
1952
+ @pytest.mark.parametrize("ext", ALL_EXTS)
1953
+ def test_arrow_bool_exception(tmp_path, ext):
1954
+ filename = tmp_path / f"test{ext}"
1955
+
1956
+ df = gp.GeoDataFrame(
1957
+ {"bool_col": [True, False, True, False, True], "geometry": [Point(0, 0)] * 5},
1958
+ crs="EPSG:4326",
1959
+ )
1960
+
1961
+ write_dataframe(df, filename)
1962
+
1963
+ if ext in {".fgb", ".gpkg"}:
1964
+ # only raise exception for GPKG / FGB
1965
+ with pytest.raises(
1966
+ RuntimeError,
1967
+ match="GDAL < 3.8.3 does not correctly read boolean data values using "
1968
+ "the Arrow API",
1969
+ ):
1970
+ read_dataframe(filename, use_arrow=True)
1971
+
1972
+ # do not raise exception if no bool columns are read
1973
+ read_dataframe(filename, use_arrow=True, columns=[])
1974
+
1975
+ else:
1976
+ _ = read_dataframe(filename, use_arrow=True)
1977
+
1978
+
1979
+ @pytest.mark.filterwarnings("ignore:File /vsimem:RuntimeWarning")
1980
+ @pytest.mark.parametrize("driver", ["GeoJSON", "GPKG"])
1981
+ def test_write_memory(naturalearth_lowres, driver):
1982
+ df = read_dataframe(naturalearth_lowres)
1983
+
1984
+ buffer = BytesIO()
1985
+ write_dataframe(df, buffer, driver=driver, layer="test")
1986
+
1987
+ assert len(buffer.getbuffer()) > 0
1988
+
1989
+ actual = read_dataframe(buffer)
1990
+ assert len(actual) == len(df)
1991
+
1992
+ is_json = driver == "GeoJSON"
1993
+
1994
+ assert_geodataframe_equal(
1995
+ actual,
1996
+ df,
1997
+ check_less_precise=is_json,
1998
+ check_index_type=False,
1999
+ check_dtype=not is_json,
2000
+ )
2001
+
2002
+ # Check temp file was cleaned up. Filter, as gdal keeps cache files in /vsimem/.
2003
+ assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == []
2004
+
2005
+
2006
+ def test_write_memory_driver_required(naturalearth_lowres):
2007
+ df = read_dataframe(naturalearth_lowres)
2008
+
2009
+ buffer = BytesIO()
2010
+
2011
+ with pytest.raises(
2012
+ ValueError,
2013
+ match="driver must be provided to write to in-memory file",
2014
+ ):
2015
+ write_dataframe(df.head(1), buffer, driver=None, layer="test")
2016
+
2017
+ # Check temp file was cleaned up. Filter, as gdal keeps cache files in /vsimem/.
2018
+ assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == []
2019
+
2020
+
2021
+ @pytest.mark.parametrize("driver", ["ESRI Shapefile", "OpenFileGDB"])
2022
+ def test_write_memory_unsupported_driver(naturalearth_lowres, driver):
2023
+ if driver == "OpenFileGDB" and __gdal_version__ < (3, 6, 0):
2024
+ pytest.skip("OpenFileGDB write support only available for GDAL >= 3.6.0")
2025
+
2026
+ df = read_dataframe(naturalearth_lowres)
2027
+
2028
+ buffer = BytesIO()
2029
+
2030
+ with pytest.raises(
2031
+ ValueError, match=f"writing to in-memory file is not supported for {driver}"
2032
+ ):
2033
+ write_dataframe(df, buffer, driver=driver, layer="test")
2034
+
2035
+ # Check temp file was cleaned up. Filter, as gdal keeps cache files in /vsimem/.
2036
+ assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == []
2037
+
2038
+
2039
+ @pytest.mark.parametrize("driver", ["GeoJSON", "GPKG"])
2040
+ def test_write_memory_append_unsupported(naturalearth_lowres, driver):
2041
+ df = read_dataframe(naturalearth_lowres)
2042
+
2043
+ buffer = BytesIO()
2044
+
2045
+ with pytest.raises(
2046
+ NotImplementedError, match="append is not supported for in-memory files"
2047
+ ):
2048
+ write_dataframe(df.head(1), buffer, driver=driver, layer="test", append=True)
2049
+
2050
+ # Check temp file was cleaned up. Filter, as gdal keeps cache files in /vsimem/.
2051
+ assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == []
2052
+
2053
+
2054
+ def test_write_memory_existing_unsupported(naturalearth_lowres):
2055
+ df = read_dataframe(naturalearth_lowres)
2056
+
2057
+ buffer = BytesIO(b"0000")
2058
+ with pytest.raises(
2059
+ NotImplementedError,
2060
+ match="writing to existing in-memory object is not supported",
2061
+ ):
2062
+ write_dataframe(df.head(1), buffer, driver="GeoJSON", layer="test")
2063
+
2064
+ # Check temp file was cleaned up. Filter, as gdal keeps cache files in /vsimem/.
2065
+ assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == []
2066
+
2067
+
2068
+ def test_write_open_file_handle(tmp_path, naturalearth_lowres):
2069
+ """Verify that writing to an open file handle is not currently supported"""
2070
+
2071
+ df = read_dataframe(naturalearth_lowres)
2072
+
2073
+ # verify it fails for regular file handle
2074
+ with pytest.raises(
2075
+ NotImplementedError, match="writing to an open file handle is not yet supported"
2076
+ ):
2077
+ with open(tmp_path / "test.geojson", "wb") as f:
2078
+ write_dataframe(df.head(1), f)
2079
+
2080
+ # verify it fails for ZipFile
2081
+ with pytest.raises(
2082
+ NotImplementedError, match="writing to an open file handle is not yet supported"
2083
+ ):
2084
+ with ZipFile(tmp_path / "test.geojson.zip", "w") as z:
2085
+ with z.open("test.geojson", "w") as f:
2086
+ write_dataframe(df.head(1), f)
2087
+
2088
+ # Check temp file was cleaned up. Filter, as gdal keeps cache files in /vsimem/.
2089
+ assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == []
2090
+
2091
+
2092
+ @pytest.mark.parametrize("ext", ["gpkg", "geojson"])
2093
+ def test_non_utf8_encoding_io(tmp_path, ext, encoded_text):
2094
+ """Verify that we write non-UTF data to the data source
2095
+
2096
+ IMPORTANT: this may not be valid for the data source and will likely render
2097
+ them unusable in other tools, but should successfully roundtrip unless we
2098
+ disable writing using other encodings.
2099
+
2100
+ NOTE: FlatGeobuff driver cannot handle non-UTF data in GDAL >= 3.9
2101
+
2102
+ NOTE: pyarrow cannot handle non-UTF-8 characters in this way
2103
+ """
2104
+
2105
+ encoding, text = encoded_text
2106
+ output_path = tmp_path / f"test.{ext}"
2107
+
2108
+ df = gp.GeoDataFrame({text: [text], "geometry": [Point(0, 0)]}, crs="EPSG:4326")
2109
+ write_dataframe(df, output_path, encoding=encoding)
2110
+
2111
+ # cannot open these files without specifying encoding
2112
+ with pytest.raises(UnicodeDecodeError):
2113
+ read_dataframe(output_path)
2114
+
2115
+ # must provide encoding to read these properly
2116
+ actual = read_dataframe(output_path, encoding=encoding)
2117
+ assert actual.columns[0] == text
2118
+ assert actual[text].values[0] == text
2119
+
2120
+
2121
+ @requires_pyarrow_api
2122
+ @pytest.mark.parametrize("ext", ["gpkg", "geojson"])
2123
+ def test_non_utf8_encoding_io_arrow_exception(tmp_path, ext, encoded_text):
2124
+ encoding, text = encoded_text
2125
+ output_path = tmp_path / f"test.{ext}"
2126
+
2127
+ df = gp.GeoDataFrame({text: [text], "geometry": [Point(0, 0)]}, crs="EPSG:4326")
2128
+ write_dataframe(df, output_path, encoding=encoding)
2129
+
2130
+ # cannot open these files without specifying encoding
2131
+ with pytest.raises(UnicodeDecodeError):
2132
+ read_dataframe(output_path)
2133
+
2134
+ with pytest.raises(
2135
+ ValueError, match="non-UTF-8 encoding is not supported for Arrow"
2136
+ ):
2137
+ read_dataframe(output_path, encoding=encoding, use_arrow=True)
2138
+
2139
+
2140
+ def test_non_utf8_encoding_io_shapefile(tmp_path, encoded_text, use_arrow):
2141
+ encoding, text = encoded_text
2142
+
2143
+ output_path = tmp_path / "test.shp"
2144
+
2145
+ df = gp.GeoDataFrame({text: [text], "geometry": [Point(0, 0)]}, crs="EPSG:4326")
2146
+ write_dataframe(df, output_path, encoding=encoding)
2147
+
2148
+ # NOTE: GDAL automatically creates a cpg file with the encoding name, which
2149
+ # means that if we read this without specifying the encoding it uses the
2150
+ # correct one
2151
+ actual = read_dataframe(output_path, use_arrow=use_arrow)
2152
+ assert actual.columns[0] == text
2153
+ assert actual[text].values[0] == text
2154
+
2155
+ # verify that if cpg file is not present, that user-provided encoding must be used
2156
+ output_path.with_suffix(".cpg").unlink()
2157
+
2158
+ # We will assume ISO-8859-1, which is wrong
2159
+ miscoded = text.encode(encoding).decode("ISO-8859-1")
2160
+
2161
+ if use_arrow:
2162
+ # pyarrow cannot decode column name with incorrect encoding
2163
+ with pytest.raises(UnicodeDecodeError):
2164
+ read_dataframe(output_path, use_arrow=True)
2165
+ else:
2166
+ bad = read_dataframe(output_path, use_arrow=False)
2167
+ assert bad.columns[0] == miscoded
2168
+ assert bad[miscoded].values[0] == miscoded
2169
+
2170
+ # If encoding is provided, that should yield correct text
2171
+ actual = read_dataframe(output_path, encoding=encoding, use_arrow=use_arrow)
2172
+ assert actual.columns[0] == text
2173
+ assert actual[text].values[0] == text
2174
+
2175
+ # if ENCODING open option, that should yield correct text
2176
+ actual = read_dataframe(output_path, use_arrow=use_arrow, ENCODING=encoding)
2177
+ assert actual.columns[0] == text
2178
+ assert actual[text].values[0] == text
2179
+
2180
+
2181
+ def test_encoding_read_option_collision_shapefile(naturalearth_lowres, use_arrow):
2182
+ """Providing both encoding parameter and ENCODING open option
2183
+ (even if blank) is not allowed."""
2184
+
2185
+ with pytest.raises(
2186
+ ValueError, match='cannot provide both encoding parameter and "ENCODING" option'
2187
+ ):
2188
+ read_dataframe(
2189
+ naturalearth_lowres, encoding="CP936", ENCODING="", use_arrow=use_arrow
2190
+ )
2191
+
2192
+
2193
+ def test_encoding_write_layer_option_collision_shapefile(tmp_path, encoded_text):
2194
+ """Providing both encoding parameter and ENCODING layer creation option
2195
+ (even if blank) is not allowed."""
2196
+ encoding, text = encoded_text
2197
+
2198
+ output_path = tmp_path / "test.shp"
2199
+ df = gp.GeoDataFrame({text: [text], "geometry": [Point(0, 0)]}, crs="EPSG:4326")
2200
+
2201
+ with pytest.raises(
2202
+ ValueError,
2203
+ match=(
2204
+ 'cannot provide both encoding parameter and "ENCODING" layer creation '
2205
+ "option"
2206
+ ),
2207
+ ):
2208
+ write_dataframe(
2209
+ df, output_path, encoding=encoding, layer_options={"ENCODING": ""}
2210
+ )
2211
+
2212
+
2213
+ def test_non_utf8_encoding_shapefile_sql(tmp_path, use_arrow):
2214
+ encoding = "CP936"
2215
+
2216
+ output_path = tmp_path / "test.shp"
2217
+
2218
+ mandarin = "中文"
2219
+ df = gp.GeoDataFrame(
2220
+ {mandarin: mandarin, "geometry": [Point(0, 0)]}, crs="EPSG:4326"
2221
+ )
2222
+ write_dataframe(df, output_path, encoding=encoding)
2223
+
2224
+ actual = read_dataframe(
2225
+ output_path,
2226
+ sql=f"select * from test where \"{mandarin}\" = '{mandarin}'",
2227
+ use_arrow=use_arrow,
2228
+ )
2229
+ assert actual.columns[0] == mandarin
2230
+ assert actual[mandarin].values[0] == mandarin
2231
+
2232
+ actual = read_dataframe(
2233
+ output_path,
2234
+ sql=f"select * from test where \"{mandarin}\" = '{mandarin}'",
2235
+ encoding=encoding,
2236
+ use_arrow=use_arrow,
2237
+ )
2238
+ assert actual.columns[0] == mandarin
2239
+ assert actual[mandarin].values[0] == mandarin
2240
+
2241
+
2242
+ @pytest.mark.requires_arrow_write_api
2243
+ def test_write_kml_file_coordinate_order(tmp_path, use_arrow):
2244
+ # confirm KML coordinates are written in lon, lat order even if CRS axis
2245
+ # specifies otherwise
2246
+ points = [Point(10, 20), Point(30, 40), Point(50, 60)]
2247
+ gdf = gp.GeoDataFrame(geometry=points, crs="EPSG:4326")
2248
+ output_path = tmp_path / "test.kml"
2249
+ write_dataframe(
2250
+ gdf, output_path, layer="tmp_layer", driver="KML", use_arrow=use_arrow
2251
+ )
2252
+
2253
+ gdf_in = read_dataframe(output_path, use_arrow=use_arrow)
2254
+
2255
+ assert np.array_equal(gdf_in.geometry.values, points)
2256
+
2257
+ if "LIBKML" in list_drivers():
2258
+ # test appending to the existing file only if LIBKML is available
2259
+ # as it appears to fall back on LIBKML driver when appending.
2260
+ points_append = [Point(70, 80), Point(90, 100), Point(110, 120)]
2261
+ gdf_append = gp.GeoDataFrame(geometry=points_append, crs="EPSG:4326")
2262
+
2263
+ write_dataframe(
2264
+ gdf_append,
2265
+ output_path,
2266
+ layer="tmp_layer",
2267
+ driver="KML",
2268
+ use_arrow=use_arrow,
2269
+ append=True,
2270
+ )
2271
+ # force_2d used to only compare xy geometry as z-dimension is undesirably
2272
+ # introduced when the kml file is over-written.
2273
+ gdf_in_appended = read_dataframe(
2274
+ output_path, use_arrow=use_arrow, force_2d=True
2275
+ )
2276
+
2277
+ assert np.array_equal(gdf_in_appended.geometry.values, points + points_append)
2278
+
2279
+
2280
+ @pytest.mark.requires_arrow_write_api
2281
+ def test_write_geojson_rfc7946_coordinates(tmp_path, use_arrow):
2282
+ points = [Point(10, 20), Point(30, 40), Point(50, 60)]
2283
+ gdf = gp.GeoDataFrame(geometry=points, crs="EPSG:4326")
2284
+ output_path = tmp_path / "test.geojson"
2285
+ write_dataframe(
2286
+ gdf,
2287
+ output_path,
2288
+ layer="tmp_layer",
2289
+ driver="GeoJSON",
2290
+ RFC7946=True,
2291
+ use_arrow=use_arrow,
2292
+ )
2293
+
2294
+ gdf_in = read_dataframe(output_path, use_arrow=use_arrow)
2295
+
2296
+ assert np.array_equal(gdf_in.geometry.values, points)
2297
+
2298
+ # test appending to the existing file
2299
+
2300
+ points_append = [Point(70, 80), Point(90, 100), Point(110, 120)]
2301
+ gdf_append = gp.GeoDataFrame(geometry=points_append, crs="EPSG:4326")
2302
+
2303
+ write_dataframe(
2304
+ gdf_append,
2305
+ output_path,
2306
+ layer="tmp_layer",
2307
+ driver="GeoJSON",
2308
+ RFC7946=True,
2309
+ use_arrow=use_arrow,
2310
+ append=True,
2311
+ )
2312
+
2313
+ gdf_in_appended = read_dataframe(output_path, use_arrow=use_arrow)
2314
+ assert np.array_equal(gdf_in_appended.geometry.values, points + points_append)