pyogrio 0.9.0__cp310-cp310-macosx_12_0_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyogrio might be problematic. Click here for more details.

Files changed (239) hide show
  1. pyogrio/.dylibs/libgdal.34.3.8.5.dylib +0 -0
  2. pyogrio/__init__.py +48 -0
  3. pyogrio/_compat.py +41 -0
  4. pyogrio/_env.py +61 -0
  5. pyogrio/_err.cpython-310-darwin.so +0 -0
  6. pyogrio/_err.pxd +4 -0
  7. pyogrio/_err.pyx +250 -0
  8. pyogrio/_geometry.cpython-310-darwin.so +0 -0
  9. pyogrio/_geometry.pxd +4 -0
  10. pyogrio/_geometry.pyx +129 -0
  11. pyogrio/_io.cpython-310-darwin.so +0 -0
  12. pyogrio/_io.pxd +0 -0
  13. pyogrio/_io.pyx +2742 -0
  14. pyogrio/_ogr.cpython-310-darwin.so +0 -0
  15. pyogrio/_ogr.pxd +444 -0
  16. pyogrio/_ogr.pyx +346 -0
  17. pyogrio/_version.py +21 -0
  18. pyogrio/_vsi.cpython-310-darwin.so +0 -0
  19. pyogrio/_vsi.pxd +4 -0
  20. pyogrio/_vsi.pyx +140 -0
  21. pyogrio/arrow_bridge.h +115 -0
  22. pyogrio/core.py +320 -0
  23. pyogrio/errors.py +32 -0
  24. pyogrio/gdal_data/GDAL-targets-release.cmake +19 -0
  25. pyogrio/gdal_data/GDAL-targets.cmake +105 -0
  26. pyogrio/gdal_data/GDALConfig.cmake +25 -0
  27. pyogrio/gdal_data/GDALConfigVersion.cmake +85 -0
  28. pyogrio/gdal_data/GDALLogoBW.svg +138 -0
  29. pyogrio/gdal_data/GDALLogoColor.svg +126 -0
  30. pyogrio/gdal_data/GDALLogoGS.svg +126 -0
  31. pyogrio/gdal_data/LICENSE.TXT +467 -0
  32. pyogrio/gdal_data/bag_template.xml +201 -0
  33. pyogrio/gdal_data/copyright +467 -0
  34. pyogrio/gdal_data/cubewerx_extra.wkt +48 -0
  35. pyogrio/gdal_data/default.rsc +0 -0
  36. pyogrio/gdal_data/ecw_cs.wkt +1453 -0
  37. pyogrio/gdal_data/eedaconf.json +23 -0
  38. pyogrio/gdal_data/epsg.wkt +1 -0
  39. pyogrio/gdal_data/esri_StatePlane_extra.wkt +631 -0
  40. pyogrio/gdal_data/gdalicon.png +0 -0
  41. pyogrio/gdal_data/gdalinfo_output.schema.json +346 -0
  42. pyogrio/gdal_data/gdalmdiminfo_output.schema.json +321 -0
  43. pyogrio/gdal_data/gdalvrt.xsd +772 -0
  44. pyogrio/gdal_data/gfs.xsd +246 -0
  45. pyogrio/gdal_data/gml_registry.xml +117 -0
  46. pyogrio/gdal_data/gml_registry.xsd +66 -0
  47. pyogrio/gdal_data/gmlasconf.xml +169 -0
  48. pyogrio/gdal_data/gmlasconf.xsd +1066 -0
  49. pyogrio/gdal_data/grib2_center.csv +251 -0
  50. pyogrio/gdal_data/grib2_process.csv +102 -0
  51. pyogrio/gdal_data/grib2_subcenter.csv +63 -0
  52. pyogrio/gdal_data/grib2_table_4_2_0_0.csv +261 -0
  53. pyogrio/gdal_data/grib2_table_4_2_0_1.csv +261 -0
  54. pyogrio/gdal_data/grib2_table_4_2_0_13.csv +261 -0
  55. pyogrio/gdal_data/grib2_table_4_2_0_14.csv +261 -0
  56. pyogrio/gdal_data/grib2_table_4_2_0_15.csv +261 -0
  57. pyogrio/gdal_data/grib2_table_4_2_0_16.csv +261 -0
  58. pyogrio/gdal_data/grib2_table_4_2_0_17.csv +11 -0
  59. pyogrio/gdal_data/grib2_table_4_2_0_18.csv +261 -0
  60. pyogrio/gdal_data/grib2_table_4_2_0_19.csv +261 -0
  61. pyogrio/gdal_data/grib2_table_4_2_0_190.csv +261 -0
  62. pyogrio/gdal_data/grib2_table_4_2_0_191.csv +261 -0
  63. pyogrio/gdal_data/grib2_table_4_2_0_2.csv +261 -0
  64. pyogrio/gdal_data/grib2_table_4_2_0_20.csv +261 -0
  65. pyogrio/gdal_data/grib2_table_4_2_0_21.csv +261 -0
  66. pyogrio/gdal_data/grib2_table_4_2_0_3.csv +261 -0
  67. pyogrio/gdal_data/grib2_table_4_2_0_4.csv +261 -0
  68. pyogrio/gdal_data/grib2_table_4_2_0_5.csv +261 -0
  69. pyogrio/gdal_data/grib2_table_4_2_0_6.csv +261 -0
  70. pyogrio/gdal_data/grib2_table_4_2_0_7.csv +261 -0
  71. pyogrio/gdal_data/grib2_table_4_2_10_0.csv +261 -0
  72. pyogrio/gdal_data/grib2_table_4_2_10_1.csv +261 -0
  73. pyogrio/gdal_data/grib2_table_4_2_10_191.csv +261 -0
  74. pyogrio/gdal_data/grib2_table_4_2_10_2.csv +261 -0
  75. pyogrio/gdal_data/grib2_table_4_2_10_3.csv +261 -0
  76. pyogrio/gdal_data/grib2_table_4_2_10_4.csv +261 -0
  77. pyogrio/gdal_data/grib2_table_4_2_1_0.csv +261 -0
  78. pyogrio/gdal_data/grib2_table_4_2_1_1.csv +261 -0
  79. pyogrio/gdal_data/grib2_table_4_2_1_2.csv +261 -0
  80. pyogrio/gdal_data/grib2_table_4_2_20_0.csv +261 -0
  81. pyogrio/gdal_data/grib2_table_4_2_20_1.csv +261 -0
  82. pyogrio/gdal_data/grib2_table_4_2_20_2.csv +261 -0
  83. pyogrio/gdal_data/grib2_table_4_2_2_0.csv +261 -0
  84. pyogrio/gdal_data/grib2_table_4_2_2_3.csv +261 -0
  85. pyogrio/gdal_data/grib2_table_4_2_2_4.csv +261 -0
  86. pyogrio/gdal_data/grib2_table_4_2_2_5.csv +261 -0
  87. pyogrio/gdal_data/grib2_table_4_2_2_6.csv +261 -0
  88. pyogrio/gdal_data/grib2_table_4_2_3_0.csv +261 -0
  89. pyogrio/gdal_data/grib2_table_4_2_3_1.csv +261 -0
  90. pyogrio/gdal_data/grib2_table_4_2_3_2.csv +28 -0
  91. pyogrio/gdal_data/grib2_table_4_2_3_3.csv +8 -0
  92. pyogrio/gdal_data/grib2_table_4_2_3_4.csv +14 -0
  93. pyogrio/gdal_data/grib2_table_4_2_3_5.csv +11 -0
  94. pyogrio/gdal_data/grib2_table_4_2_3_6.csv +11 -0
  95. pyogrio/gdal_data/grib2_table_4_2_4_0.csv +261 -0
  96. pyogrio/gdal_data/grib2_table_4_2_4_1.csv +261 -0
  97. pyogrio/gdal_data/grib2_table_4_2_4_10.csv +261 -0
  98. pyogrio/gdal_data/grib2_table_4_2_4_2.csv +261 -0
  99. pyogrio/gdal_data/grib2_table_4_2_4_3.csv +261 -0
  100. pyogrio/gdal_data/grib2_table_4_2_4_4.csv +261 -0
  101. pyogrio/gdal_data/grib2_table_4_2_4_5.csv +261 -0
  102. pyogrio/gdal_data/grib2_table_4_2_4_6.csv +261 -0
  103. pyogrio/gdal_data/grib2_table_4_2_4_7.csv +261 -0
  104. pyogrio/gdal_data/grib2_table_4_2_4_8.csv +261 -0
  105. pyogrio/gdal_data/grib2_table_4_2_4_9.csv +261 -0
  106. pyogrio/gdal_data/grib2_table_4_2_local_Canada.csv +5 -0
  107. pyogrio/gdal_data/grib2_table_4_2_local_HPC.csv +2 -0
  108. pyogrio/gdal_data/grib2_table_4_2_local_MRMS.csv +175 -0
  109. pyogrio/gdal_data/grib2_table_4_2_local_NCEP.csv +401 -0
  110. pyogrio/gdal_data/grib2_table_4_2_local_NDFD.csv +38 -0
  111. pyogrio/gdal_data/grib2_table_4_2_local_index.csv +7 -0
  112. pyogrio/gdal_data/grib2_table_4_5.csv +261 -0
  113. pyogrio/gdal_data/grib2_table_versions.csv +3 -0
  114. pyogrio/gdal_data/gt_datum.csv +229 -0
  115. pyogrio/gdal_data/gt_ellips.csv +24 -0
  116. pyogrio/gdal_data/header.dxf +1124 -0
  117. pyogrio/gdal_data/inspire_cp_BasicPropertyUnit.gfs +57 -0
  118. pyogrio/gdal_data/inspire_cp_CadastralBoundary.gfs +60 -0
  119. pyogrio/gdal_data/inspire_cp_CadastralParcel.gfs +81 -0
  120. pyogrio/gdal_data/inspire_cp_CadastralZoning.gfs +161 -0
  121. pyogrio/gdal_data/jpfgdgml_AdmArea.gfs +59 -0
  122. pyogrio/gdal_data/jpfgdgml_AdmBdry.gfs +49 -0
  123. pyogrio/gdal_data/jpfgdgml_AdmPt.gfs +59 -0
  124. pyogrio/gdal_data/jpfgdgml_BldA.gfs +54 -0
  125. pyogrio/gdal_data/jpfgdgml_BldL.gfs +54 -0
  126. pyogrio/gdal_data/jpfgdgml_Cntr.gfs +54 -0
  127. pyogrio/gdal_data/jpfgdgml_CommBdry.gfs +49 -0
  128. pyogrio/gdal_data/jpfgdgml_CommPt.gfs +59 -0
  129. pyogrio/gdal_data/jpfgdgml_Cstline.gfs +54 -0
  130. pyogrio/gdal_data/jpfgdgml_ElevPt.gfs +54 -0
  131. pyogrio/gdal_data/jpfgdgml_GCP.gfs +94 -0
  132. pyogrio/gdal_data/jpfgdgml_LeveeEdge.gfs +49 -0
  133. pyogrio/gdal_data/jpfgdgml_RailCL.gfs +54 -0
  134. pyogrio/gdal_data/jpfgdgml_RdASL.gfs +44 -0
  135. pyogrio/gdal_data/jpfgdgml_RdArea.gfs +54 -0
  136. pyogrio/gdal_data/jpfgdgml_RdCompt.gfs +59 -0
  137. pyogrio/gdal_data/jpfgdgml_RdEdg.gfs +59 -0
  138. pyogrio/gdal_data/jpfgdgml_RdMgtBdry.gfs +49 -0
  139. pyogrio/gdal_data/jpfgdgml_RdSgmtA.gfs +59 -0
  140. pyogrio/gdal_data/jpfgdgml_RvrMgtBdry.gfs +49 -0
  141. pyogrio/gdal_data/jpfgdgml_SBAPt.gfs +49 -0
  142. pyogrio/gdal_data/jpfgdgml_SBArea.gfs +54 -0
  143. pyogrio/gdal_data/jpfgdgml_SBBdry.gfs +44 -0
  144. pyogrio/gdal_data/jpfgdgml_WA.gfs +54 -0
  145. pyogrio/gdal_data/jpfgdgml_WL.gfs +54 -0
  146. pyogrio/gdal_data/jpfgdgml_WStrA.gfs +54 -0
  147. pyogrio/gdal_data/jpfgdgml_WStrL.gfs +54 -0
  148. pyogrio/gdal_data/netcdf_config.xsd +143 -0
  149. pyogrio/gdal_data/nitf_spec.xml +3306 -0
  150. pyogrio/gdal_data/nitf_spec.xsd +189 -0
  151. pyogrio/gdal_data/ogrinfo_output.schema.json +505 -0
  152. pyogrio/gdal_data/ogrvrt.xsd +543 -0
  153. pyogrio/gdal_data/osmconf.ini +132 -0
  154. pyogrio/gdal_data/ozi_datum.csv +131 -0
  155. pyogrio/gdal_data/ozi_ellips.csv +35 -0
  156. pyogrio/gdal_data/pci_datum.txt +463 -0
  157. pyogrio/gdal_data/pci_ellips.txt +77 -0
  158. pyogrio/gdal_data/pdfcomposition.xsd +721 -0
  159. pyogrio/gdal_data/pds4_template.xml +65 -0
  160. pyogrio/gdal_data/plscenesconf.json +1985 -0
  161. pyogrio/gdal_data/ruian_vf_ob_v1.gfs +1455 -0
  162. pyogrio/gdal_data/ruian_vf_st_uvoh_v1.gfs +86 -0
  163. pyogrio/gdal_data/ruian_vf_st_v1.gfs +1489 -0
  164. pyogrio/gdal_data/ruian_vf_v1.gfs +2126 -0
  165. pyogrio/gdal_data/s57agencies.csv +249 -0
  166. pyogrio/gdal_data/s57attributes.csv +484 -0
  167. pyogrio/gdal_data/s57expectedinput.csv +1008 -0
  168. pyogrio/gdal_data/s57objectclasses.csv +287 -0
  169. pyogrio/gdal_data/seed_2d.dgn +0 -0
  170. pyogrio/gdal_data/seed_3d.dgn +0 -0
  171. pyogrio/gdal_data/stateplane.csv +259 -0
  172. pyogrio/gdal_data/template_tiles.mapml +28 -0
  173. pyogrio/gdal_data/tms_LINZAntarticaMapTileGrid.json +190 -0
  174. pyogrio/gdal_data/tms_MapML_APSTILE.json +268 -0
  175. pyogrio/gdal_data/tms_MapML_CBMTILE.json +346 -0
  176. pyogrio/gdal_data/tms_NZTM2000.json +243 -0
  177. pyogrio/gdal_data/trailer.dxf +434 -0
  178. pyogrio/gdal_data/usage +4 -0
  179. pyogrio/gdal_data/vcpkg-cmake-wrapper.cmake +23 -0
  180. pyogrio/gdal_data/vcpkg.spdx.json +264 -0
  181. pyogrio/gdal_data/vcpkg_abi_info.txt +41 -0
  182. pyogrio/gdal_data/vdv452.xml +367 -0
  183. pyogrio/gdal_data/vdv452.xsd +63 -0
  184. pyogrio/gdal_data/vicar.json +164 -0
  185. pyogrio/geopandas.py +675 -0
  186. pyogrio/proj_data/CH +22 -0
  187. pyogrio/proj_data/GL27 +23 -0
  188. pyogrio/proj_data/ITRF2000 +24 -0
  189. pyogrio/proj_data/ITRF2008 +94 -0
  190. pyogrio/proj_data/ITRF2014 +55 -0
  191. pyogrio/proj_data/copyright +34 -0
  192. pyogrio/proj_data/deformation_model.schema.json +582 -0
  193. pyogrio/proj_data/nad.lst +142 -0
  194. pyogrio/proj_data/nad27 +810 -0
  195. pyogrio/proj_data/nad83 +745 -0
  196. pyogrio/proj_data/other.extra +53 -0
  197. pyogrio/proj_data/proj-config-version.cmake +44 -0
  198. pyogrio/proj_data/proj-config.cmake +79 -0
  199. pyogrio/proj_data/proj-targets-release.cmake +19 -0
  200. pyogrio/proj_data/proj-targets.cmake +107 -0
  201. pyogrio/proj_data/proj.db +0 -0
  202. pyogrio/proj_data/proj.ini +51 -0
  203. pyogrio/proj_data/proj4-targets-release.cmake +19 -0
  204. pyogrio/proj_data/proj4-targets.cmake +107 -0
  205. pyogrio/proj_data/projjson.schema.json +1174 -0
  206. pyogrio/proj_data/triangulation.schema.json +214 -0
  207. pyogrio/proj_data/usage +4 -0
  208. pyogrio/proj_data/vcpkg.spdx.json +198 -0
  209. pyogrio/proj_data/vcpkg_abi_info.txt +27 -0
  210. pyogrio/proj_data/world +214 -0
  211. pyogrio/raw.py +871 -0
  212. pyogrio/tests/__init__.py +0 -0
  213. pyogrio/tests/conftest.py +204 -0
  214. pyogrio/tests/fixtures/README.md +89 -0
  215. pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.cpg +1 -0
  216. pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.dbf +0 -0
  217. pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.prj +1 -0
  218. pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.shp +0 -0
  219. pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.shx +0 -0
  220. pyogrio/tests/fixtures/poly_not_enough_points.shp.zip +0 -0
  221. pyogrio/tests/fixtures/sample.osm.pbf +0 -0
  222. pyogrio/tests/fixtures/test_datetime.geojson +7 -0
  223. pyogrio/tests/fixtures/test_datetime_tz.geojson +8 -0
  224. pyogrio/tests/fixtures/test_fgdb.gdb.zip +0 -0
  225. pyogrio/tests/fixtures/test_gpkg_nulls.gpkg +0 -0
  226. pyogrio/tests/fixtures/test_multisurface.gpkg +0 -0
  227. pyogrio/tests/fixtures/test_nested.geojson +18 -0
  228. pyogrio/tests/fixtures/test_ogr_types_list.geojson +12 -0
  229. pyogrio/tests/test_arrow.py +1041 -0
  230. pyogrio/tests/test_core.py +588 -0
  231. pyogrio/tests/test_geopandas_io.py +2174 -0
  232. pyogrio/tests/test_path.py +352 -0
  233. pyogrio/tests/test_raw_io.py +1404 -0
  234. pyogrio/util.py +223 -0
  235. pyogrio-0.9.0.dist-info/LICENSE +21 -0
  236. pyogrio-0.9.0.dist-info/METADATA +100 -0
  237. pyogrio-0.9.0.dist-info/RECORD +239 -0
  238. pyogrio-0.9.0.dist-info/WHEEL +5 -0
  239. pyogrio-0.9.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,2174 @@
1
+ import contextlib
2
+ from datetime import datetime
3
+ from io import BytesIO
4
+ import locale
5
+
6
+ import numpy as np
7
+ import pytest
8
+
9
+ from pyogrio import list_layers, list_drivers, read_info, __gdal_version__
10
+ from pyogrio.errors import DataLayerError, DataSourceError, FeatureError, GeometryError
11
+ from pyogrio.geopandas import read_dataframe, write_dataframe, PANDAS_GE_20
12
+ from pyogrio.raw import (
13
+ DRIVERS_NO_MIXED_DIMENSIONS,
14
+ DRIVERS_NO_MIXED_SINGLE_MULTI,
15
+ )
16
+ from pyogrio.tests.conftest import (
17
+ ALL_EXTS,
18
+ DRIVERS,
19
+ requires_pyarrow_api,
20
+ requires_arrow_write_api,
21
+ requires_gdal_geos,
22
+ )
23
+ from pyogrio._compat import PANDAS_GE_15, HAS_ARROW_WRITE_API
24
+
25
+ try:
26
+ import pandas as pd
27
+ from pandas.testing import (
28
+ assert_frame_equal,
29
+ assert_index_equal,
30
+ assert_series_equal,
31
+ )
32
+
33
+ import geopandas as gp
34
+ from geopandas.array import from_wkt
35
+ from geopandas.testing import assert_geodataframe_equal
36
+
37
+ import shapely # if geopandas is present, shapely is expected to be present
38
+ from shapely.geometry import Point
39
+
40
+ except ImportError:
41
+ pass
42
+
43
+
44
+ pytest.importorskip("geopandas")
45
+
46
+
47
+ @pytest.fixture(
48
+ scope="session",
49
+ params=[
50
+ False,
51
+ pytest.param(True, marks=requires_pyarrow_api),
52
+ ],
53
+ )
54
+ def use_arrow(request):
55
+ return request.param
56
+
57
+
58
+ @pytest.fixture(autouse=True)
59
+ def skip_if_no_arrow_write_api(request):
60
+ # automatically skip tests with use_arrow=True and that require Arrow write
61
+ # API (marked with `@pytest.mark.requires_arrow_write_api`) if it is not available
62
+ use_arrow = (
63
+ request.getfixturevalue("use_arrow")
64
+ if "use_arrow" in request.fixturenames
65
+ else False
66
+ )
67
+ if (
68
+ use_arrow
69
+ and not HAS_ARROW_WRITE_API
70
+ and request.node.get_closest_marker("requires_arrow_write_api")
71
+ ):
72
+ pytest.skip("GDAL>=3.8 required for Arrow write API")
73
+
74
+
75
+ def spatialite_available(path):
76
+ try:
77
+ _ = read_dataframe(
78
+ path, sql="select spatialite_version();", sql_dialect="SQLITE"
79
+ )
80
+ return True
81
+ except Exception:
82
+ return False
83
+
84
+
85
+ @pytest.mark.parametrize("encoding", ["utf-8", "cp1252", None])
86
+ def test_read_csv_encoding(tmp_path, encoding):
87
+ # Write csv test file. Depending on the os this will be written in a different
88
+ # encoding: for linux and macos this is utf-8, for windows it is cp1252.
89
+ csv_path = tmp_path / "test.csv"
90
+ with open(csv_path, "w", encoding=encoding) as csv:
91
+ csv.write("näme,city\n")
92
+ csv.write("Wilhelm Röntgen,Zürich\n")
93
+
94
+ # Read csv. The data should be read with the same default encoding as the csv file
95
+ # was written in, but should have been converted to utf-8 in the dataframe returned.
96
+ # Hence, the asserts below, with strings in utf-8, be OK.
97
+ df = read_dataframe(csv_path, encoding=encoding)
98
+
99
+ assert len(df) == 1
100
+ assert df.columns.tolist() == ["näme", "city"]
101
+ assert df.city.tolist() == ["Zürich"]
102
+ assert df.näme.tolist() == ["Wilhelm Röntgen"]
103
+
104
+
105
+ @pytest.mark.skipif(
106
+ locale.getpreferredencoding().upper() == "UTF-8",
107
+ reason="test requires non-UTF-8 default platform",
108
+ )
109
+ def test_read_csv_platform_encoding(tmp_path):
110
+ """verify that read defaults to platform encoding; only works on Windows (CP1252)"""
111
+ csv_path = tmp_path / "test.csv"
112
+ with open(csv_path, "w", encoding=locale.getpreferredencoding()) as csv:
113
+ csv.write("näme,city\n")
114
+ csv.write("Wilhelm Röntgen,Zürich\n")
115
+
116
+ df = read_dataframe(csv_path)
117
+
118
+ assert len(df) == 1
119
+ assert df.columns.tolist() == ["näme", "city"]
120
+ assert df.city.tolist() == ["Zürich"]
121
+ assert df.näme.tolist() == ["Wilhelm Röntgen"]
122
+
123
+
124
+ def test_read_dataframe(naturalearth_lowres_all_ext):
125
+ df = read_dataframe(naturalearth_lowres_all_ext)
126
+
127
+ assert df.crs == "EPSG:4326"
128
+ assert len(df) == 177
129
+ assert df.columns.tolist() == [
130
+ "pop_est",
131
+ "continent",
132
+ "name",
133
+ "iso_a3",
134
+ "gdp_md_est",
135
+ "geometry",
136
+ ]
137
+
138
+
139
+ def test_read_dataframe_vsi(naturalearth_lowres_vsi, use_arrow):
140
+ df = read_dataframe(naturalearth_lowres_vsi[1], use_arrow=use_arrow)
141
+ assert len(df) == 177
142
+
143
+
144
+ @pytest.mark.parametrize(
145
+ "columns, fid_as_index, exp_len", [(None, False, 2), ([], True, 2), ([], False, 0)]
146
+ )
147
+ def test_read_layer_without_geometry(
148
+ test_fgdb_vsi, columns, fid_as_index, use_arrow, exp_len
149
+ ):
150
+ result = read_dataframe(
151
+ test_fgdb_vsi,
152
+ layer="basetable",
153
+ columns=columns,
154
+ fid_as_index=fid_as_index,
155
+ use_arrow=use_arrow,
156
+ )
157
+ assert type(result) is pd.DataFrame
158
+ assert len(result) == exp_len
159
+
160
+
161
+ @pytest.mark.parametrize(
162
+ "naturalearth_lowres, expected_ext",
163
+ [(".gpkg", ".gpkg"), (".shp", ".shp")],
164
+ indirect=["naturalearth_lowres"],
165
+ )
166
+ def test_fixture_naturalearth_lowres(naturalearth_lowres, expected_ext):
167
+ # Test the fixture with "indirect" parameter
168
+ assert naturalearth_lowres.suffix == expected_ext
169
+ df = read_dataframe(naturalearth_lowres)
170
+ assert len(df) == 177
171
+
172
+
173
+ def test_read_no_geometry(naturalearth_lowres_all_ext, use_arrow):
174
+ df = read_dataframe(
175
+ naturalearth_lowres_all_ext, use_arrow=use_arrow, read_geometry=False
176
+ )
177
+ assert isinstance(df, pd.DataFrame)
178
+ assert not isinstance(df, gp.GeoDataFrame)
179
+
180
+
181
+ def test_read_no_geometry_no_columns_no_fids(naturalearth_lowres, use_arrow):
182
+ with pytest.raises(
183
+ ValueError,
184
+ match=(
185
+ "at least one of read_geometry or return_fids must be True or columns must "
186
+ "be None or non-empty"
187
+ ),
188
+ ):
189
+ _ = read_dataframe(
190
+ naturalearth_lowres,
191
+ columns=[],
192
+ read_geometry=False,
193
+ fid_as_index=False,
194
+ use_arrow=use_arrow,
195
+ )
196
+
197
+
198
+ def test_read_force_2d(test_fgdb_vsi, use_arrow):
199
+ with pytest.warns(
200
+ UserWarning, match=r"Measured \(M\) geometry types are not supported"
201
+ ):
202
+ df = read_dataframe(test_fgdb_vsi, layer="test_lines", max_features=1)
203
+ assert df.iloc[0].geometry.has_z
204
+
205
+ df = read_dataframe(
206
+ test_fgdb_vsi,
207
+ layer="test_lines",
208
+ force_2d=True,
209
+ max_features=1,
210
+ use_arrow=use_arrow,
211
+ )
212
+ assert not df.iloc[0].geometry.has_z
213
+
214
+
215
+ @pytest.mark.filterwarnings("ignore: Measured")
216
+ @pytest.mark.filterwarnings("ignore: More than one layer found in")
217
+ def test_read_layer(test_fgdb_vsi, use_arrow):
218
+ layers = list_layers(test_fgdb_vsi)
219
+ kwargs = {"use_arrow": use_arrow, "read_geometry": False, "max_features": 1}
220
+
221
+ # The first layer is read by default (NOTE: first layer has no features)
222
+ df = read_dataframe(test_fgdb_vsi, **kwargs)
223
+ df2 = read_dataframe(test_fgdb_vsi, layer=layers[0][0], **kwargs)
224
+ assert_frame_equal(df, df2)
225
+
226
+ # Reading a specific layer should return that layer.
227
+ # Detected here by a known column.
228
+ df = read_dataframe(test_fgdb_vsi, layer="test_lines", **kwargs)
229
+ assert "RIVER_MILE" in df.columns
230
+
231
+
232
+ def test_read_layer_invalid(naturalearth_lowres_all_ext, use_arrow):
233
+ with pytest.raises(DataLayerError, match="Layer 'wrong' could not be opened"):
234
+ read_dataframe(naturalearth_lowres_all_ext, layer="wrong", use_arrow=use_arrow)
235
+
236
+
237
+ @pytest.mark.filterwarnings("ignore: Measured")
238
+ def test_read_datetime(test_fgdb_vsi, use_arrow):
239
+ df = read_dataframe(
240
+ test_fgdb_vsi, layer="test_lines", use_arrow=use_arrow, max_features=1
241
+ )
242
+ if PANDAS_GE_20:
243
+ # starting with pandas 2.0, it preserves the passed datetime resolution
244
+ assert df.SURVEY_DAT.dtype.name == "datetime64[ms]"
245
+ else:
246
+ assert df.SURVEY_DAT.dtype.name == "datetime64[ns]"
247
+
248
+
249
+ @pytest.mark.filterwarnings("ignore: Non-conformant content for record 1 in column ")
250
+ @pytest.mark.requires_arrow_write_api
251
+ def test_read_datetime_tz(test_datetime_tz, tmp_path, use_arrow):
252
+ df = read_dataframe(test_datetime_tz)
253
+ # Make the index non-consecutive to test this case as well. Added for issue
254
+ # https://github.com/geopandas/pyogrio/issues/324
255
+ df = df.set_index(np.array([0, 2]))
256
+ raw_expected = ["2020-01-01T09:00:00.123-05:00", "2020-01-01T10:00:00-05:00"]
257
+
258
+ if PANDAS_GE_20:
259
+ expected = pd.to_datetime(raw_expected, format="ISO8601").as_unit("ms")
260
+ else:
261
+ expected = pd.to_datetime(raw_expected)
262
+ expected = pd.Series(expected, name="datetime_col")
263
+ assert_series_equal(df.datetime_col, expected, check_index=False)
264
+ # test write and read round trips
265
+ fpath = tmp_path / "test.gpkg"
266
+ write_dataframe(df, fpath, use_arrow=use_arrow)
267
+ df_read = read_dataframe(fpath, use_arrow=use_arrow)
268
+ if use_arrow:
269
+ # with Arrow, the datetimes are always read as UTC
270
+ expected = expected.dt.tz_convert("UTC")
271
+ assert_series_equal(df_read.datetime_col, expected)
272
+
273
+
274
+ @pytest.mark.filterwarnings(
275
+ "ignore: Non-conformant content for record 1 in column dates"
276
+ )
277
+ @pytest.mark.requires_arrow_write_api
278
+ def test_write_datetime_mixed_offset(tmp_path, use_arrow):
279
+ # Australian Summer Time AEDT (GMT+11), Standard Time AEST (GMT+10)
280
+ dates = ["2023-01-01 11:00:01.111", "2023-06-01 10:00:01.111"]
281
+ naive_col = pd.Series(pd.to_datetime(dates), name="dates")
282
+ localised_col = naive_col.dt.tz_localize("Australia/Sydney")
283
+ utc_col = localised_col.dt.tz_convert("UTC")
284
+ if PANDAS_GE_20:
285
+ utc_col = utc_col.dt.as_unit("ms")
286
+
287
+ df = gp.GeoDataFrame(
288
+ {"dates": localised_col, "geometry": [Point(1, 1), Point(1, 1)]},
289
+ crs="EPSG:4326",
290
+ )
291
+ fpath = tmp_path / "test.gpkg"
292
+ write_dataframe(df, fpath, use_arrow=use_arrow)
293
+ result = read_dataframe(fpath, use_arrow=use_arrow)
294
+ # GDAL tz only encodes offsets, not timezones
295
+ # check multiple offsets are read as utc datetime instead of string values
296
+ assert_series_equal(result["dates"], utc_col)
297
+
298
+
299
+ @pytest.mark.filterwarnings(
300
+ "ignore: Non-conformant content for record 1 in column dates"
301
+ )
302
+ @pytest.mark.requires_arrow_write_api
303
+ def test_read_write_datetime_tz_with_nulls(tmp_path, use_arrow):
304
+ dates_raw = ["2020-01-01T09:00:00.123-05:00", "2020-01-01T10:00:00-05:00", pd.NaT]
305
+ if PANDAS_GE_20:
306
+ dates = pd.to_datetime(dates_raw, format="ISO8601").as_unit("ms")
307
+ else:
308
+ dates = pd.to_datetime(dates_raw)
309
+ df = gp.GeoDataFrame(
310
+ {"dates": dates, "geometry": [Point(1, 1), Point(1, 1), Point(1, 1)]},
311
+ crs="EPSG:4326",
312
+ )
313
+ fpath = tmp_path / "test.gpkg"
314
+ write_dataframe(df, fpath, use_arrow=use_arrow)
315
+ result = read_dataframe(fpath, use_arrow=use_arrow)
316
+ if use_arrow:
317
+ # with Arrow, the datetimes are always read as UTC
318
+ df["dates"] = df["dates"].dt.tz_convert("UTC")
319
+ assert_geodataframe_equal(df, result)
320
+
321
+
322
+ def test_read_null_values(test_fgdb_vsi, use_arrow):
323
+ df = read_dataframe(
324
+ test_fgdb_vsi, layer="basetable_2", use_arrow=use_arrow, read_geometry=False
325
+ )
326
+
327
+ # make sure that Null values are preserved
328
+ assert df.SEGMENT_NAME.isnull().max()
329
+ assert df.loc[df.SEGMENT_NAME.isnull()].SEGMENT_NAME.iloc[0] is None
330
+
331
+
332
+ def test_read_fid_as_index(naturalearth_lowres_all_ext, use_arrow):
333
+ kwargs = {"use_arrow": use_arrow, "skip_features": 2, "max_features": 2}
334
+
335
+ # default is to not set FIDs as index
336
+ df = read_dataframe(naturalearth_lowres_all_ext, **kwargs)
337
+ assert_index_equal(df.index, pd.RangeIndex(0, 2))
338
+
339
+ df = read_dataframe(naturalearth_lowres_all_ext, fid_as_index=False, **kwargs)
340
+ assert_index_equal(df.index, pd.RangeIndex(0, 2))
341
+
342
+ df = read_dataframe(
343
+ naturalearth_lowres_all_ext,
344
+ fid_as_index=True,
345
+ **kwargs,
346
+ )
347
+ if naturalearth_lowres_all_ext.suffix in [".gpkg"]:
348
+ # File format where fid starts at 1
349
+ assert_index_equal(df.index, pd.Index([3, 4], name="fid"))
350
+ else:
351
+ # File format where fid starts at 0
352
+ assert_index_equal(df.index, pd.Index([2, 3], name="fid"))
353
+
354
+
355
+ def test_read_fid_as_index_only(naturalearth_lowres, use_arrow):
356
+ df = read_dataframe(
357
+ naturalearth_lowres,
358
+ columns=[],
359
+ read_geometry=False,
360
+ fid_as_index=True,
361
+ use_arrow=use_arrow,
362
+ )
363
+ assert df is not None
364
+ assert len(df) == 177
365
+ assert len(df.columns) == 0
366
+
367
+
368
+ def test_read_where(naturalearth_lowres_all_ext, use_arrow):
369
+ # empty filter should return full set of records
370
+ df = read_dataframe(naturalearth_lowres_all_ext, use_arrow=use_arrow, where="")
371
+ assert len(df) == 177
372
+
373
+ # should return singular item
374
+ df = read_dataframe(
375
+ naturalearth_lowres_all_ext, use_arrow=use_arrow, where="iso_a3 = 'CAN'"
376
+ )
377
+ assert len(df) == 1
378
+ assert df.iloc[0].iso_a3 == "CAN"
379
+
380
+ df = read_dataframe(
381
+ naturalearth_lowres_all_ext,
382
+ use_arrow=use_arrow,
383
+ where="iso_a3 IN ('CAN', 'USA', 'MEX')",
384
+ )
385
+ assert len(df) == 3
386
+ assert len(set(df.iso_a3.unique()).difference(["CAN", "USA", "MEX"])) == 0
387
+
388
+ # should return items within range
389
+ df = read_dataframe(
390
+ naturalearth_lowres_all_ext,
391
+ use_arrow=use_arrow,
392
+ where="POP_EST >= 10000000 AND POP_EST < 100000000",
393
+ )
394
+ assert len(df) == 75
395
+ assert df.pop_est.min() >= 10000000
396
+ assert df.pop_est.max() < 100000000
397
+
398
+ # should match no items
399
+ df = read_dataframe(
400
+ naturalearth_lowres_all_ext, use_arrow=use_arrow, where="ISO_A3 = 'INVALID'"
401
+ )
402
+ assert len(df) == 0
403
+
404
+
405
+ def test_read_where_invalid(request, naturalearth_lowres_all_ext, use_arrow):
406
+ if use_arrow and naturalearth_lowres_all_ext.suffix == ".gpkg":
407
+ # https://github.com/OSGeo/gdal/issues/8492
408
+ request.node.add_marker(pytest.mark.xfail(reason="GDAL doesn't error for GPGK"))
409
+ with pytest.raises(ValueError, match="Invalid SQL"):
410
+ read_dataframe(
411
+ naturalearth_lowres_all_ext, use_arrow=use_arrow, where="invalid"
412
+ )
413
+
414
+
415
+ def test_read_where_ignored_field(naturalearth_lowres, use_arrow):
416
+ # column included in where is not also included in list of columns, which means
417
+ # GDAL will return no features
418
+ # NOTE: this behavior is inconsistent across drivers so only shapefiles are
419
+ # tested for this
420
+ df = read_dataframe(
421
+ naturalearth_lowres,
422
+ where=""" "iso_a3" = 'CAN' """,
423
+ columns=["name"],
424
+ use_arrow=use_arrow,
425
+ )
426
+
427
+ assert len(df) == 0
428
+
429
+
430
+ @pytest.mark.parametrize("bbox", [(1,), (1, 2), (1, 2, 3)])
431
+ def test_read_bbox_invalid(naturalearth_lowres_all_ext, bbox, use_arrow):
432
+ with pytest.raises(ValueError, match="Invalid bbox"):
433
+ read_dataframe(naturalearth_lowres_all_ext, use_arrow=use_arrow, bbox=bbox)
434
+
435
+
436
+ @pytest.mark.parametrize(
437
+ "bbox,expected",
438
+ [
439
+ ((0, 0, 0.00001, 0.00001), []),
440
+ ((-85, 8, -80, 10), ["PAN", "CRI"]),
441
+ ((-104, 54, -105, 55), ["CAN"]),
442
+ ],
443
+ )
444
+ def test_read_bbox(naturalearth_lowres_all_ext, use_arrow, bbox, expected):
445
+ if (
446
+ use_arrow
447
+ and __gdal_version__ < (3, 8, 0)
448
+ and naturalearth_lowres_all_ext.suffix == ".gpkg"
449
+ ):
450
+ pytest.xfail(reason="GDAL bug: https://github.com/OSGeo/gdal/issues/8347")
451
+
452
+ df = read_dataframe(naturalearth_lowres_all_ext, use_arrow=use_arrow, bbox=bbox)
453
+
454
+ assert np.array_equal(df.iso_a3, expected)
455
+
456
+
457
+ def test_read_bbox_sql(naturalearth_lowres_all_ext, use_arrow):
458
+ df = read_dataframe(
459
+ naturalearth_lowres_all_ext,
460
+ use_arrow=use_arrow,
461
+ bbox=(-180, 50, -100, 90),
462
+ sql="SELECT * from naturalearth_lowres where iso_a3 not in ('USA', 'RUS')",
463
+ )
464
+ assert len(df) == 1
465
+ assert np.array_equal(df.iso_a3, ["CAN"])
466
+
467
+
468
+ def test_read_bbox_where(naturalearth_lowres_all_ext, use_arrow):
469
+ df = read_dataframe(
470
+ naturalearth_lowres_all_ext,
471
+ use_arrow=use_arrow,
472
+ bbox=(-180, 50, -100, 90),
473
+ where="iso_a3 not in ('USA', 'RUS')",
474
+ )
475
+ assert len(df) == 1
476
+ assert np.array_equal(df.iso_a3, ["CAN"])
477
+
478
+
479
+ @pytest.mark.parametrize(
480
+ "mask",
481
+ [
482
+ {"type": "Point", "coordinates": [0, 0]},
483
+ '{"type": "Point", "coordinates": [0, 0]}',
484
+ "invalid",
485
+ ],
486
+ )
487
+ def test_read_mask_invalid(naturalearth_lowres, use_arrow, mask):
488
+ with pytest.raises(ValueError, match="'mask' parameter must be a Shapely geometry"):
489
+ read_dataframe(naturalearth_lowres, use_arrow=use_arrow, mask=mask)
490
+
491
+
492
+ def test_read_bbox_mask_invalid(naturalearth_lowres, use_arrow):
493
+ with pytest.raises(ValueError, match="cannot set both 'bbox' and 'mask'"):
494
+ read_dataframe(
495
+ naturalearth_lowres,
496
+ use_arrow=use_arrow,
497
+ bbox=(-85, 8, -80, 10),
498
+ mask=shapely.Point(-105, 55),
499
+ )
500
+
501
+
502
+ @pytest.mark.parametrize(
503
+ "mask,expected",
504
+ [
505
+ (shapely.Point(-105, 55), ["CAN"]),
506
+ (shapely.box(-85, 8, -80, 10), ["PAN", "CRI"]),
507
+ (
508
+ shapely.Polygon(
509
+ (
510
+ [6.101929483362767, 50.97085041206964],
511
+ [5.773001596839322, 50.90661120482673],
512
+ [5.593156133704326, 50.642648747710325],
513
+ [6.059271089606312, 50.686051894002475],
514
+ [6.374064065737485, 50.851481340346965],
515
+ [6.101929483362767, 50.97085041206964],
516
+ )
517
+ ),
518
+ ["DEU", "BEL", "NLD"],
519
+ ),
520
+ (
521
+ shapely.GeometryCollection(
522
+ [shapely.Point(-7.7, 53), shapely.box(-85, 8, -80, 10)]
523
+ ),
524
+ ["PAN", "CRI", "IRL"],
525
+ ),
526
+ ],
527
+ )
528
+ def test_read_mask(
529
+ naturalearth_lowres_all_ext,
530
+ use_arrow,
531
+ mask,
532
+ expected,
533
+ ):
534
+ if (
535
+ use_arrow
536
+ and __gdal_version__ < (3, 8, 0)
537
+ and naturalearth_lowres_all_ext.suffix == ".gpkg"
538
+ ):
539
+ pytest.xfail(reason="GDAL bug: https://github.com/OSGeo/gdal/issues/8347")
540
+
541
+ df = read_dataframe(naturalearth_lowres_all_ext, use_arrow=use_arrow, mask=mask)
542
+
543
+ assert len(df) == len(expected)
544
+ assert np.array_equal(df.iso_a3, expected)
545
+
546
+
547
+ def test_read_mask_sql(naturalearth_lowres_all_ext, use_arrow):
548
+ df = read_dataframe(
549
+ naturalearth_lowres_all_ext,
550
+ use_arrow=use_arrow,
551
+ mask=shapely.box(-180, 50, -100, 90),
552
+ sql="SELECT * from naturalearth_lowres where iso_a3 not in ('USA', 'RUS')",
553
+ )
554
+ assert len(df) == 1
555
+ assert np.array_equal(df.iso_a3, ["CAN"])
556
+
557
+
558
+ def test_read_mask_where(naturalearth_lowres_all_ext, use_arrow):
559
+ df = read_dataframe(
560
+ naturalearth_lowres_all_ext,
561
+ use_arrow=use_arrow,
562
+ mask=shapely.box(-180, 50, -100, 90),
563
+ where="iso_a3 not in ('USA', 'RUS')",
564
+ )
565
+ assert len(df) == 1
566
+ assert np.array_equal(df.iso_a3, ["CAN"])
567
+
568
+
569
+ @pytest.mark.parametrize("fids", [[1, 5, 10], np.array([1, 5, 10], dtype=np.int64)])
570
+ def test_read_fids(naturalearth_lowres_all_ext, fids, use_arrow):
571
+ # ensure keyword is properly passed through
572
+ df = read_dataframe(
573
+ naturalearth_lowres_all_ext, fids=fids, fid_as_index=True, use_arrow=use_arrow
574
+ )
575
+ assert len(df) == 3
576
+ assert np.array_equal(fids, df.index.values)
577
+
578
+
579
+ @requires_pyarrow_api
580
+ def test_read_fids_arrow_max_exception(naturalearth_lowres):
581
+ # Maximum number at time of writing is 4997 for "OGRSQL". For e.g. for SQLite based
582
+ # formats like Geopackage, there is no limit.
583
+ nb_fids = 4998
584
+ fids = range(nb_fids)
585
+ with pytest.raises(ValueError, match=f"error applying filter for {nb_fids} fids"):
586
+ _ = read_dataframe(naturalearth_lowres, fids=fids, use_arrow=True)
587
+
588
+
589
+ @requires_pyarrow_api
590
+ @pytest.mark.skipif(
591
+ __gdal_version__ >= (3, 8, 0), reason="GDAL >= 3.8.0 does not need to warn"
592
+ )
593
+ def test_read_fids_arrow_warning_old_gdal(naturalearth_lowres_all_ext):
594
+ # A warning should be given for old GDAL versions, except for some file formats.
595
+ if naturalearth_lowres_all_ext.suffix not in [".gpkg", ".geojson"]:
596
+ handler = pytest.warns(
597
+ UserWarning,
598
+ match="Using 'fids' and 'use_arrow=True' with GDAL < 3.8 can be slow",
599
+ )
600
+ else:
601
+ handler = contextlib.nullcontext()
602
+
603
+ with handler:
604
+ df = read_dataframe(naturalearth_lowres_all_ext, fids=[22], use_arrow=True)
605
+ assert len(df) == 1
606
+
607
+
608
+ def test_read_fids_force_2d(test_fgdb_vsi):
609
+ with pytest.warns(
610
+ UserWarning, match=r"Measured \(M\) geometry types are not supported"
611
+ ):
612
+ df = read_dataframe(test_fgdb_vsi, layer="test_lines", fids=[22])
613
+ assert len(df) == 1
614
+ assert df.iloc[0].geometry.has_z
615
+
616
+ df = read_dataframe(test_fgdb_vsi, layer="test_lines", force_2d=True, fids=[22])
617
+ assert len(df) == 1
618
+ assert not df.iloc[0].geometry.has_z
619
+
620
+
621
+ @pytest.mark.parametrize("skip_features", [10, 200])
622
+ def test_read_skip_features(naturalearth_lowres_all_ext, use_arrow, skip_features):
623
+ ext = naturalearth_lowres_all_ext.suffix
624
+ expected = (
625
+ read_dataframe(naturalearth_lowres_all_ext)
626
+ .iloc[skip_features:]
627
+ .reset_index(drop=True)
628
+ )
629
+
630
+ df = read_dataframe(
631
+ naturalearth_lowres_all_ext, skip_features=skip_features, use_arrow=use_arrow
632
+ )
633
+ assert len(df) == len(expected)
634
+
635
+ # Coordinates are not precisely equal when written to JSON
636
+ # dtypes do not necessarily round-trip precisely through JSON
637
+ is_json = ext in [".geojson", ".geojsonl"]
638
+ # In .geojsonl the vertices are reordered, so normalize
639
+ is_jsons = ext == ".geojsonl"
640
+
641
+ assert_geodataframe_equal(
642
+ df,
643
+ expected,
644
+ check_less_precise=is_json,
645
+ check_index_type=False,
646
+ check_dtype=not is_json,
647
+ normalize=is_jsons,
648
+ )
649
+
650
+
651
+ def test_read_negative_skip_features(naturalearth_lowres, use_arrow):
652
+ with pytest.raises(ValueError, match="'skip_features' must be >= 0"):
653
+ read_dataframe(naturalearth_lowres, skip_features=-1, use_arrow=use_arrow)
654
+
655
+
656
+ @pytest.mark.parametrize("max_features", [10, 100])
657
+ def test_read_max_features(naturalearth_lowres_all_ext, use_arrow, max_features):
658
+ ext = naturalearth_lowres_all_ext.suffix
659
+ expected = read_dataframe(naturalearth_lowres_all_ext).iloc[:max_features]
660
+ df = read_dataframe(
661
+ naturalearth_lowres_all_ext, max_features=max_features, use_arrow=use_arrow
662
+ )
663
+
664
+ assert len(df) == len(expected)
665
+
666
+ # Coordinates are not precisely equal when written to JSON
667
+ # dtypes do not necessarily round-trip precisely through JSON
668
+ is_json = ext in [".geojson", ".geojsonl"]
669
+ # In .geojsonl the vertices are reordered, so normalize
670
+ is_jsons = ext == ".geojsonl"
671
+
672
+ assert_geodataframe_equal(
673
+ df,
674
+ expected,
675
+ check_less_precise=is_json,
676
+ check_index_type=False,
677
+ check_dtype=not is_json,
678
+ normalize=is_jsons,
679
+ )
680
+
681
+
682
+ def test_read_negative_max_features(naturalearth_lowres, use_arrow):
683
+ with pytest.raises(ValueError, match="'max_features' must be >= 0"):
684
+ read_dataframe(naturalearth_lowres, max_features=-1, use_arrow=use_arrow)
685
+
686
+
687
+ def test_read_non_existent_file(use_arrow):
688
+ # ensure consistent error type / message from GDAL
689
+ with pytest.raises(DataSourceError, match="No such file or directory"):
690
+ read_dataframe("non-existent.shp", use_arrow=use_arrow)
691
+
692
+ with pytest.raises(DataSourceError, match="does not exist in the file system"):
693
+ read_dataframe("/vsizip/non-existent.zip", use_arrow=use_arrow)
694
+
695
+ with pytest.raises(DataSourceError, match="does not exist in the file system"):
696
+ read_dataframe("zip:///non-existent.zip", use_arrow=use_arrow)
697
+
698
+
699
+ def test_read_sql(naturalearth_lowres_all_ext, use_arrow):
700
+ # The geometry column cannot be specified when using the
701
+ # default OGRSQL dialect but is returned nonetheless, so 4 columns.
702
+ sql = "SELECT iso_a3 AS iso_a3_renamed, name, pop_est FROM naturalearth_lowres"
703
+ df = read_dataframe(
704
+ naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL", use_arrow=use_arrow
705
+ )
706
+ assert len(df.columns) == 4
707
+ assert len(df) == 177
708
+
709
+ # Should return single row
710
+ sql = "SELECT * FROM naturalearth_lowres WHERE iso_a3 = 'CAN'"
711
+ df = read_dataframe(
712
+ naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL", use_arrow=use_arrow
713
+ )
714
+ assert len(df) == 1
715
+ assert len(df.columns) == 6
716
+ assert df.iloc[0].iso_a3 == "CAN"
717
+
718
+ sql = """SELECT *
719
+ FROM naturalearth_lowres
720
+ WHERE iso_a3 IN ('CAN', 'USA', 'MEX')"""
721
+ df = read_dataframe(
722
+ naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL", use_arrow=use_arrow
723
+ )
724
+ assert len(df.columns) == 6
725
+ assert len(df) == 3
726
+ assert df.iso_a3.tolist() == ["CAN", "USA", "MEX"]
727
+
728
+ sql = """SELECT *
729
+ FROM naturalearth_lowres
730
+ WHERE iso_a3 IN ('CAN', 'USA', 'MEX')
731
+ ORDER BY name"""
732
+ df = read_dataframe(
733
+ naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL", use_arrow=use_arrow
734
+ )
735
+ assert len(df.columns) == 6
736
+ assert len(df) == 3
737
+ assert df.iso_a3.tolist() == ["CAN", "MEX", "USA"]
738
+
739
+ # Should return items within range.
740
+ sql = """SELECT *
741
+ FROM naturalearth_lowres
742
+ WHERE POP_EST >= 10000000 AND POP_EST < 100000000"""
743
+ df = read_dataframe(
744
+ naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL", use_arrow=use_arrow
745
+ )
746
+ assert len(df) == 75
747
+ assert len(df.columns) == 6
748
+ assert df.pop_est.min() >= 10000000
749
+ assert df.pop_est.max() < 100000000
750
+
751
+ # Should match no items.
752
+ sql = "SELECT * FROM naturalearth_lowres WHERE ISO_A3 = 'INVALID'"
753
+ df = read_dataframe(
754
+ naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL", use_arrow=use_arrow
755
+ )
756
+ assert len(df) == 0
757
+
758
+
759
+ def test_read_sql_invalid(naturalearth_lowres_all_ext, use_arrow):
760
+ if naturalearth_lowres_all_ext.suffix == ".gpkg":
761
+ with pytest.raises(Exception, match="In ExecuteSQL().*"):
762
+ read_dataframe(
763
+ naturalearth_lowres_all_ext, sql="invalid", use_arrow=use_arrow
764
+ )
765
+ else:
766
+ with pytest.raises(Exception, match="SQL Expression Parsing Error"):
767
+ read_dataframe(
768
+ naturalearth_lowres_all_ext, sql="invalid", use_arrow=use_arrow
769
+ )
770
+
771
+ with pytest.raises(
772
+ ValueError, match="'sql' paramater cannot be combined with 'layer'"
773
+ ):
774
+ read_dataframe(
775
+ naturalearth_lowres_all_ext,
776
+ sql="whatever",
777
+ layer="invalid",
778
+ use_arrow=use_arrow,
779
+ )
780
+
781
+
782
+ def test_read_sql_columns_where(naturalearth_lowres_all_ext, use_arrow):
783
+ sql = "SELECT iso_a3 AS iso_a3_renamed, name, pop_est FROM naturalearth_lowres"
784
+ df = read_dataframe(
785
+ naturalearth_lowres_all_ext,
786
+ sql=sql,
787
+ sql_dialect="OGRSQL",
788
+ columns=["iso_a3_renamed", "name"],
789
+ where="iso_a3_renamed IN ('CAN', 'USA', 'MEX')",
790
+ use_arrow=use_arrow,
791
+ )
792
+ assert len(df.columns) == 3
793
+ assert len(df) == 3
794
+ assert df.iso_a3_renamed.tolist() == ["CAN", "USA", "MEX"]
795
+
796
+
797
+ def test_read_sql_columns_where_bbox(naturalearth_lowres_all_ext, use_arrow):
798
+ sql = "SELECT iso_a3 AS iso_a3_renamed, name, pop_est FROM naturalearth_lowres"
799
+ df = read_dataframe(
800
+ naturalearth_lowres_all_ext,
801
+ sql=sql,
802
+ sql_dialect="OGRSQL",
803
+ columns=["iso_a3_renamed", "name"],
804
+ where="iso_a3_renamed IN ('CRI', 'PAN')",
805
+ bbox=(-85, 8, -80, 10),
806
+ use_arrow=use_arrow,
807
+ )
808
+ assert len(df.columns) == 3
809
+ assert len(df) == 2
810
+ assert df.iso_a3_renamed.tolist() == ["PAN", "CRI"]
811
+
812
+
813
+ def test_read_sql_skip_max(naturalearth_lowres_all_ext, use_arrow):
814
+ sql = """SELECT *
815
+ FROM naturalearth_lowres
816
+ WHERE iso_a3 IN ('CAN', 'MEX', 'USA')
817
+ ORDER BY name"""
818
+ df = read_dataframe(
819
+ naturalearth_lowres_all_ext,
820
+ sql=sql,
821
+ skip_features=1,
822
+ max_features=1,
823
+ sql_dialect="OGRSQL",
824
+ use_arrow=use_arrow,
825
+ )
826
+ assert len(df.columns) == 6
827
+ assert len(df) == 1
828
+ assert df.iso_a3.tolist() == ["MEX"]
829
+
830
+ sql = "SELECT * FROM naturalearth_lowres LIMIT 1"
831
+ df = read_dataframe(
832
+ naturalearth_lowres_all_ext,
833
+ sql=sql,
834
+ max_features=3,
835
+ sql_dialect="OGRSQL",
836
+ use_arrow=use_arrow,
837
+ )
838
+ assert len(df) == 1
839
+
840
+ sql = "SELECT * FROM naturalearth_lowres LIMIT 1"
841
+ df = read_dataframe(
842
+ naturalearth_lowres_all_ext,
843
+ sql=sql,
844
+ sql_dialect="OGRSQL",
845
+ skip_features=1,
846
+ use_arrow=use_arrow,
847
+ )
848
+ assert len(df) == 0
849
+
850
+
851
+ @requires_gdal_geos
852
+ @pytest.mark.parametrize(
853
+ "naturalearth_lowres",
854
+ [ext for ext in ALL_EXTS if ext != ".gpkg"],
855
+ indirect=["naturalearth_lowres"],
856
+ )
857
+ def test_read_sql_dialect_sqlite_nogpkg(naturalearth_lowres, use_arrow):
858
+ # Should return singular item
859
+ sql = "SELECT * FROM naturalearth_lowres WHERE iso_a3 = 'CAN'"
860
+ df = read_dataframe(
861
+ naturalearth_lowres, sql=sql, sql_dialect="SQLITE", use_arrow=use_arrow
862
+ )
863
+ assert len(df) == 1
864
+ assert len(df.columns) == 6
865
+ assert df.iloc[0].iso_a3 == "CAN"
866
+ area_canada = df.iloc[0].geometry.area
867
+
868
+ # Use spatialite function
869
+ sql = """SELECT ST_Buffer(geometry, 5) AS geometry, name, pop_est, iso_a3
870
+ FROM naturalearth_lowres
871
+ WHERE ISO_A3 = 'CAN'"""
872
+ df = read_dataframe(
873
+ naturalearth_lowres, sql=sql, sql_dialect="SQLITE", use_arrow=use_arrow
874
+ )
875
+ assert len(df) == 1
876
+ assert len(df.columns) == 4
877
+ assert df.iloc[0].geometry.area > area_canada
878
+
879
+
880
+ @requires_gdal_geos
881
+ @pytest.mark.parametrize(
882
+ "naturalearth_lowres", [".gpkg"], indirect=["naturalearth_lowres"]
883
+ )
884
+ def test_read_sql_dialect_sqlite_gpkg(naturalearth_lowres, use_arrow):
885
+ # "INDIRECT_SQL" prohibits GDAL from passing the SQL statement to sqlite.
886
+ # Because the statement is processed within GDAL it is possible to use
887
+ # spatialite functions even if sqlite isn't built with spatialite support.
888
+ sql = "SELECT * FROM naturalearth_lowres WHERE iso_a3 = 'CAN'"
889
+ df = read_dataframe(
890
+ naturalearth_lowres, sql=sql, sql_dialect="INDIRECT_SQLITE", use_arrow=use_arrow
891
+ )
892
+ assert len(df) == 1
893
+ assert len(df.columns) == 6
894
+ assert df.iloc[0].iso_a3 == "CAN"
895
+ area_canada = df.iloc[0].geometry.area
896
+
897
+ # Use spatialite function
898
+ sql = """SELECT ST_Buffer(geom, 5) AS geometry, name, pop_est, iso_a3
899
+ FROM naturalearth_lowres
900
+ WHERE ISO_A3 = 'CAN'"""
901
+ df = read_dataframe(
902
+ naturalearth_lowres, sql=sql, sql_dialect="INDIRECT_SQLITE", use_arrow=use_arrow
903
+ )
904
+ assert len(df) == 1
905
+ assert len(df.columns) == 4
906
+ assert df.iloc[0].geometry.area > area_canada
907
+
908
+
909
+ @pytest.mark.parametrize("encoding", ["utf-8", "cp1252", None])
910
+ def test_write_csv_encoding(tmp_path, encoding):
911
+ """Test if write_dataframe uses the default encoding correctly."""
912
+ # Write csv test file. Depending on the os this will be written in a different
913
+ # encoding: for linux and macos this is utf-8, for windows it is cp1252.
914
+ csv_path = tmp_path / "test.csv"
915
+
916
+ with open(csv_path, "w", encoding=encoding) as csv:
917
+ csv.write("näme,city\n")
918
+ csv.write("Wilhelm Röntgen,Zürich\n")
919
+
920
+ # Write csv test file with the same data using write_dataframe. It should use the
921
+ # same encoding as above.
922
+ df = pd.DataFrame({"näme": ["Wilhelm Röntgen"], "city": ["Zürich"]})
923
+ csv_pyogrio_path = tmp_path / "test_pyogrio.csv"
924
+ write_dataframe(df, csv_pyogrio_path, encoding=encoding)
925
+
926
+ # Check if the text files written both ways can be read again and give same result.
927
+ with open(csv_path, "r", encoding=encoding) as csv:
928
+ csv_str = csv.read()
929
+ with open(csv_pyogrio_path, "r", encoding=encoding) as csv_pyogrio:
930
+ csv_pyogrio_str = csv_pyogrio.read()
931
+ assert csv_str == csv_pyogrio_str
932
+
933
+ # Check if they files are binary identical, to be 100% sure they were written with
934
+ # the same encoding.
935
+ with open(csv_path, "rb") as csv:
936
+ csv_bytes = csv.read()
937
+ with open(csv_pyogrio_path, "rb") as csv_pyogrio:
938
+ csv_pyogrio_bytes = csv_pyogrio.read()
939
+ assert csv_bytes == csv_pyogrio_bytes
940
+
941
+
942
+ @pytest.mark.parametrize("ext", ALL_EXTS)
943
+ @pytest.mark.requires_arrow_write_api
944
+ def test_write_dataframe(tmp_path, naturalearth_lowres, ext, use_arrow):
945
+ input_gdf = read_dataframe(naturalearth_lowres)
946
+ output_path = tmp_path / f"test{ext}"
947
+
948
+ if ext == ".fgb":
949
+ # For .fgb, spatial_index=False to avoid the rows being reordered
950
+ write_dataframe(
951
+ input_gdf, output_path, use_arrow=use_arrow, spatial_index=False
952
+ )
953
+ else:
954
+ write_dataframe(input_gdf, output_path, use_arrow=use_arrow)
955
+
956
+ assert output_path.exists()
957
+ result_gdf = read_dataframe(output_path)
958
+
959
+ geometry_types = result_gdf.geometry.type.unique()
960
+ if DRIVERS[ext] in DRIVERS_NO_MIXED_SINGLE_MULTI:
961
+ assert list(geometry_types) == ["MultiPolygon"]
962
+ else:
963
+ assert set(geometry_types) == set(["MultiPolygon", "Polygon"])
964
+
965
+ # Coordinates are not precisely equal when written to JSON
966
+ # dtypes do not necessarily round-trip precisely through JSON
967
+ is_json = ext in [".geojson", ".geojsonl"]
968
+ # In .geojsonl the vertices are reordered, so normalize
969
+ is_jsons = ext == ".geojsonl"
970
+
971
+ assert_geodataframe_equal(
972
+ result_gdf,
973
+ input_gdf,
974
+ check_less_precise=is_json,
975
+ check_index_type=False,
976
+ check_dtype=not is_json,
977
+ normalize=is_jsons,
978
+ )
979
+
980
+
981
+ @pytest.mark.filterwarnings("ignore:.*No SRS set on layer.*")
982
+ @pytest.mark.parametrize("write_geodf", [True, False])
983
+ @pytest.mark.parametrize("ext", [ext for ext in ALL_EXTS + [".xlsx"] if ext != ".fgb"])
984
+ @pytest.mark.requires_arrow_write_api
985
+ def test_write_dataframe_no_geom(
986
+ request, tmp_path, naturalearth_lowres, write_geodf, ext, use_arrow
987
+ ):
988
+ """Test writing a (geo)dataframe without a geometry column.
989
+
990
+ FlatGeobuf (.fgb) doesn't seem to support this, and just writes an empty file.
991
+ """
992
+ # Prepare test data
993
+ input_df = read_dataframe(naturalearth_lowres, read_geometry=False)
994
+ if write_geodf:
995
+ input_df = gp.GeoDataFrame(input_df)
996
+
997
+ output_path = tmp_path / f"test{ext}"
998
+
999
+ # A shapefile without geometry column results in only a .dbf file.
1000
+ if ext == ".shp":
1001
+ output_path = output_path.with_suffix(".dbf")
1002
+
1003
+ # Determine driver
1004
+ driver = DRIVERS[ext] if ext != ".xlsx" else "XLSX"
1005
+
1006
+ write_dataframe(input_df, output_path, use_arrow=use_arrow, driver=driver)
1007
+
1008
+ assert output_path.exists()
1009
+ result_df = read_dataframe(output_path)
1010
+
1011
+ assert isinstance(result_df, pd.DataFrame)
1012
+
1013
+ # some dtypes do not round-trip precisely through these file types
1014
+ check_dtype = ext not in [".geojson", ".geojsonl", ".xlsx"]
1015
+
1016
+ if ext in [".gpkg", ".shp", ".xlsx"]:
1017
+ # These file types return a DataFrame when read.
1018
+ assert not isinstance(result_df, gp.GeoDataFrame)
1019
+ if isinstance(input_df, gp.GeoDataFrame):
1020
+ input_df = pd.DataFrame(input_df)
1021
+
1022
+ pd.testing.assert_frame_equal(
1023
+ result_df, input_df, check_index_type=False, check_dtype=check_dtype
1024
+ )
1025
+ else:
1026
+ # These file types return a GeoDataFrame with None Geometries when read.
1027
+ input_none_geom_gdf = gp.GeoDataFrame(
1028
+ input_df, geometry=np.repeat(None, len(input_df)), crs=4326
1029
+ )
1030
+ assert_geodataframe_equal(
1031
+ result_df,
1032
+ input_none_geom_gdf,
1033
+ check_index_type=False,
1034
+ check_dtype=check_dtype,
1035
+ )
1036
+
1037
+
1038
+ @pytest.mark.requires_arrow_write_api
1039
+ def test_write_dataframe_index(tmp_path, naturalearth_lowres, use_arrow):
1040
+ # dataframe writing ignores the index
1041
+ input_gdf = read_dataframe(naturalearth_lowres)
1042
+ input_gdf = input_gdf.set_index("iso_a3")
1043
+
1044
+ output_path = tmp_path / "test.shp"
1045
+ write_dataframe(input_gdf, output_path, use_arrow=use_arrow)
1046
+
1047
+ result_gdf = read_dataframe(output_path)
1048
+ assert isinstance(result_gdf.index, pd.RangeIndex)
1049
+ assert_geodataframe_equal(result_gdf, input_gdf.reset_index(drop=True))
1050
+
1051
+
1052
+ @pytest.mark.parametrize("ext", [ext for ext in ALL_EXTS if ext not in ".geojsonl"])
1053
+ @pytest.mark.requires_arrow_write_api
1054
+ def test_write_empty_dataframe(tmp_path, ext, use_arrow):
1055
+ expected = gp.GeoDataFrame(geometry=[], crs=4326)
1056
+
1057
+ filename = tmp_path / f"test{ext}"
1058
+ write_dataframe(expected, filename, use_arrow=use_arrow)
1059
+
1060
+ assert filename.exists()
1061
+ df = read_dataframe(filename)
1062
+ assert_geodataframe_equal(df, expected)
1063
+
1064
+
1065
+ @pytest.mark.parametrize("ext", [".geojsonl", ".geojsons"])
1066
+ @pytest.mark.requires_arrow_write_api
1067
+ def test_write_read_empty_dataframe_unsupported(tmp_path, ext, use_arrow):
1068
+ # Writing empty dataframe to .geojsons or .geojsonl results logically in a 0 byte
1069
+ # file, but gdal isn't able to read those again at the time of writing.
1070
+ # Issue logged here: https://github.com/geopandas/pyogrio/issues/94
1071
+ expected = gp.GeoDataFrame(geometry=[], crs=4326)
1072
+
1073
+ filename = tmp_path / f"test{ext}"
1074
+ write_dataframe(expected, filename, use_arrow=use_arrow)
1075
+
1076
+ assert filename.exists()
1077
+ with pytest.raises(
1078
+ Exception, match=".* not recognized as( being in)? a supported file format."
1079
+ ):
1080
+ _ = read_dataframe(filename, use_arrow=use_arrow)
1081
+
1082
+
1083
+ @pytest.mark.requires_arrow_write_api
1084
+ def test_write_dataframe_gpkg_multiple_layers(tmp_path, naturalearth_lowres, use_arrow):
1085
+ input_gdf = read_dataframe(naturalearth_lowres)
1086
+ filename = tmp_path / "test.gpkg"
1087
+
1088
+ write_dataframe(
1089
+ input_gdf,
1090
+ filename,
1091
+ layer="first",
1092
+ promote_to_multi=True,
1093
+ use_arrow=use_arrow,
1094
+ )
1095
+
1096
+ assert filename.exists()
1097
+ assert np.array_equal(list_layers(filename), [["first", "MultiPolygon"]])
1098
+
1099
+ write_dataframe(
1100
+ input_gdf,
1101
+ filename,
1102
+ layer="second",
1103
+ promote_to_multi=True,
1104
+ use_arrow=use_arrow,
1105
+ )
1106
+ assert np.array_equal(
1107
+ list_layers(filename),
1108
+ [["first", "MultiPolygon"], ["second", "MultiPolygon"]],
1109
+ )
1110
+
1111
+
1112
+ @pytest.mark.parametrize("ext", ALL_EXTS)
1113
+ @pytest.mark.requires_arrow_write_api
1114
+ def test_write_dataframe_append(request, tmp_path, naturalearth_lowres, ext, use_arrow):
1115
+ if ext == ".fgb" and __gdal_version__ <= (3, 5, 0):
1116
+ pytest.skip("Append to FlatGeobuf fails for GDAL <= 3.5.0")
1117
+
1118
+ if ext in (".geojsonl", ".geojsons") and __gdal_version__ <= (3, 6, 0):
1119
+ pytest.skip("Append to GeoJSONSeq only available for GDAL >= 3.6.0")
1120
+
1121
+ if use_arrow and ext.startswith(".geojson"):
1122
+ # Bug in GDAL when appending int64 to GeoJSON
1123
+ # (https://github.com/OSGeo/gdal/issues/9792)
1124
+ request.node.add_marker(
1125
+ pytest.mark.xfail(reason="Bugs with append when writing Arrow to GeoJSON")
1126
+ )
1127
+
1128
+ input_gdf = read_dataframe(naturalearth_lowres)
1129
+ filename = tmp_path / f"test{ext}"
1130
+
1131
+ write_dataframe(input_gdf, filename, use_arrow=use_arrow)
1132
+
1133
+ filename.exists()
1134
+ assert len(read_dataframe(filename)) == 177
1135
+
1136
+ write_dataframe(input_gdf, filename, use_arrow=use_arrow, append=True)
1137
+ assert len(read_dataframe(filename)) == 354
1138
+
1139
+
1140
+ @pytest.mark.parametrize("spatial_index", [False, True])
1141
+ @pytest.mark.requires_arrow_write_api
1142
+ def test_write_dataframe_gdal_options(
1143
+ tmp_path, naturalearth_lowres, spatial_index, use_arrow
1144
+ ):
1145
+ df = read_dataframe(naturalearth_lowres)
1146
+
1147
+ outfilename1 = tmp_path / "test1.shp"
1148
+ write_dataframe(
1149
+ df,
1150
+ outfilename1,
1151
+ use_arrow=use_arrow,
1152
+ SPATIAL_INDEX="YES" if spatial_index else "NO",
1153
+ )
1154
+ assert outfilename1.exists() is True
1155
+ index_filename1 = tmp_path / "test1.qix"
1156
+ assert index_filename1.exists() is spatial_index
1157
+
1158
+ # using explicit layer_options instead
1159
+ outfilename2 = tmp_path / "test2.shp"
1160
+ write_dataframe(
1161
+ df,
1162
+ outfilename2,
1163
+ use_arrow=use_arrow,
1164
+ layer_options=dict(spatial_index=spatial_index),
1165
+ )
1166
+ assert outfilename2.exists() is True
1167
+ index_filename2 = tmp_path / "test2.qix"
1168
+ assert index_filename2.exists() is spatial_index
1169
+
1170
+
1171
+ @pytest.mark.requires_arrow_write_api
1172
+ def test_write_dataframe_gdal_options_unknown(tmp_path, naturalearth_lowres, use_arrow):
1173
+ df = read_dataframe(naturalearth_lowres)
1174
+
1175
+ # geojson has no spatial index, so passing keyword should raise
1176
+ outfilename = tmp_path / "test.geojson"
1177
+ with pytest.raises(ValueError, match="unrecognized option 'SPATIAL_INDEX'"):
1178
+ write_dataframe(df, outfilename, use_arrow=use_arrow, spatial_index=True)
1179
+
1180
+
1181
+ def _get_gpkg_table_names(path):
1182
+ import sqlite3
1183
+
1184
+ con = sqlite3.connect(path)
1185
+ cursor = con.cursor()
1186
+ cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
1187
+ result = cursor.fetchall()
1188
+ return [res[0] for res in result]
1189
+
1190
+
1191
+ @pytest.mark.requires_arrow_write_api
1192
+ def test_write_dataframe_gdal_options_dataset(tmp_path, naturalearth_lowres, use_arrow):
1193
+ df = read_dataframe(naturalearth_lowres)
1194
+
1195
+ test_default_filename = tmp_path / "test_default.gpkg"
1196
+ write_dataframe(df, test_default_filename, use_arrow=use_arrow)
1197
+ assert "gpkg_ogr_contents" in _get_gpkg_table_names(test_default_filename)
1198
+
1199
+ test_no_contents_filename = tmp_path / "test_no_contents.gpkg"
1200
+ write_dataframe(
1201
+ df, test_default_filename, use_arrow=use_arrow, ADD_GPKG_OGR_CONTENTS="NO"
1202
+ )
1203
+ assert "gpkg_ogr_contents" not in _get_gpkg_table_names(test_no_contents_filename)
1204
+
1205
+ test_no_contents_filename2 = tmp_path / "test_no_contents2.gpkg"
1206
+ write_dataframe(
1207
+ df,
1208
+ test_no_contents_filename2,
1209
+ use_arrow=use_arrow,
1210
+ dataset_options=dict(add_gpkg_ogr_contents=False),
1211
+ )
1212
+ assert "gpkg_ogr_contents" not in _get_gpkg_table_names(test_no_contents_filename2)
1213
+
1214
+
1215
+ @pytest.mark.parametrize(
1216
+ "ext, promote_to_multi, expected_geometry_types, expected_geometry_type",
1217
+ [
1218
+ (".fgb", None, ["MultiPolygon"], "MultiPolygon"),
1219
+ (".fgb", True, ["MultiPolygon"], "MultiPolygon"),
1220
+ (".fgb", False, ["MultiPolygon", "Polygon"], "Unknown"),
1221
+ (".geojson", None, ["MultiPolygon", "Polygon"], "Unknown"),
1222
+ (".geojson", True, ["MultiPolygon"], "MultiPolygon"),
1223
+ (".geojson", False, ["MultiPolygon", "Polygon"], "Unknown"),
1224
+ ],
1225
+ )
1226
+ @pytest.mark.requires_arrow_write_api
1227
+ def test_write_dataframe_promote_to_multi(
1228
+ tmp_path,
1229
+ naturalearth_lowres,
1230
+ ext,
1231
+ promote_to_multi,
1232
+ expected_geometry_types,
1233
+ expected_geometry_type,
1234
+ use_arrow,
1235
+ ):
1236
+ input_gdf = read_dataframe(naturalearth_lowres)
1237
+
1238
+ output_path = tmp_path / f"test_promote{ext}"
1239
+ write_dataframe(
1240
+ input_gdf, output_path, use_arrow=use_arrow, promote_to_multi=promote_to_multi
1241
+ )
1242
+
1243
+ assert output_path.exists()
1244
+ output_gdf = read_dataframe(output_path)
1245
+ geometry_types = sorted(output_gdf.geometry.type.unique())
1246
+ assert geometry_types == expected_geometry_types
1247
+ assert read_info(output_path)["geometry_type"] == expected_geometry_type
1248
+
1249
+
1250
+ @pytest.mark.parametrize(
1251
+ "ext, promote_to_multi, geometry_type, "
1252
+ "expected_geometry_types, expected_geometry_type",
1253
+ [
1254
+ (".fgb", None, "Unknown", ["MultiPolygon"], "Unknown"),
1255
+ (".geojson", False, "Unknown", ["MultiPolygon", "Polygon"], "Unknown"),
1256
+ (".geojson", None, "Unknown", ["MultiPolygon", "Polygon"], "Unknown"),
1257
+ (".geojson", None, "Polygon", ["MultiPolygon", "Polygon"], "Unknown"),
1258
+ (".geojson", None, "MultiPolygon", ["MultiPolygon", "Polygon"], "Unknown"),
1259
+ (".geojson", None, "Point", ["MultiPolygon", "Polygon"], "Unknown"),
1260
+ (".geojson", True, "Unknown", ["MultiPolygon"], "MultiPolygon"),
1261
+ (".gpkg", False, "Unknown", ["MultiPolygon", "Polygon"], "Unknown"),
1262
+ (".gpkg", None, "Unknown", ["MultiPolygon"], "Unknown"),
1263
+ (".gpkg", None, "Polygon", ["MultiPolygon"], "Polygon"),
1264
+ (".gpkg", None, "MultiPolygon", ["MultiPolygon"], "MultiPolygon"),
1265
+ (".gpkg", None, "Point", ["MultiPolygon"], "Point"),
1266
+ (".gpkg", True, "Unknown", ["MultiPolygon"], "Unknown"),
1267
+ (".shp", False, "Unknown", ["MultiPolygon", "Polygon"], "Polygon"),
1268
+ (".shp", None, "Unknown", ["MultiPolygon", "Polygon"], "Polygon"),
1269
+ (".shp", None, "Polygon", ["MultiPolygon", "Polygon"], "Polygon"),
1270
+ (".shp", None, "MultiPolygon", ["MultiPolygon", "Polygon"], "Polygon"),
1271
+ (".shp", True, "Unknown", ["MultiPolygon", "Polygon"], "Polygon"),
1272
+ ],
1273
+ )
1274
+ @pytest.mark.requires_arrow_write_api
1275
+ def test_write_dataframe_promote_to_multi_layer_geom_type(
1276
+ tmp_path,
1277
+ naturalearth_lowres,
1278
+ ext,
1279
+ promote_to_multi,
1280
+ geometry_type,
1281
+ expected_geometry_types,
1282
+ expected_geometry_type,
1283
+ use_arrow,
1284
+ ):
1285
+ input_gdf = read_dataframe(naturalearth_lowres)
1286
+
1287
+ output_path = tmp_path / f"test_promote_layer_geom_type{ext}"
1288
+
1289
+ if ext == ".gpkg" and geometry_type in ("Polygon", "Point"):
1290
+ ctx = pytest.warns(
1291
+ RuntimeWarning, match="A geometry of type MULTIPOLYGON is inserted"
1292
+ )
1293
+ else:
1294
+ ctx = contextlib.nullcontext()
1295
+
1296
+ with ctx:
1297
+ write_dataframe(
1298
+ input_gdf,
1299
+ output_path,
1300
+ use_arrow=use_arrow,
1301
+ promote_to_multi=promote_to_multi,
1302
+ geometry_type=geometry_type,
1303
+ )
1304
+
1305
+ assert output_path.exists()
1306
+ output_gdf = read_dataframe(output_path)
1307
+ geometry_types = sorted(output_gdf.geometry.type.unique())
1308
+ assert geometry_types == expected_geometry_types
1309
+ assert read_info(output_path)["geometry_type"] == expected_geometry_type
1310
+
1311
+
1312
+ @pytest.mark.parametrize(
1313
+ "ext, promote_to_multi, geometry_type, expected_raises_match",
1314
+ [
1315
+ (".fgb", False, "MultiPolygon", "Mismatched geometry type"),
1316
+ (".fgb", False, "Polygon", "Mismatched geometry type"),
1317
+ (".fgb", None, "Point", "Mismatched geometry type"),
1318
+ (".fgb", None, "Polygon", "Mismatched geometry type"),
1319
+ (
1320
+ ".shp",
1321
+ None,
1322
+ "Point",
1323
+ "Could not add feature to layer at index|Error while writing batch to OGR layer",
1324
+ ),
1325
+ ],
1326
+ )
1327
+ @pytest.mark.requires_arrow_write_api
1328
+ def test_write_dataframe_promote_to_multi_layer_geom_type_invalid(
1329
+ tmp_path,
1330
+ naturalearth_lowres,
1331
+ ext,
1332
+ promote_to_multi,
1333
+ geometry_type,
1334
+ expected_raises_match,
1335
+ use_arrow,
1336
+ ):
1337
+ input_gdf = read_dataframe(naturalearth_lowres)
1338
+
1339
+ output_path = tmp_path / f"test{ext}"
1340
+ with pytest.raises((FeatureError, DataLayerError), match=expected_raises_match):
1341
+ write_dataframe(
1342
+ input_gdf,
1343
+ output_path,
1344
+ use_arrow=use_arrow,
1345
+ promote_to_multi=promote_to_multi,
1346
+ geometry_type=geometry_type,
1347
+ )
1348
+
1349
+
1350
+ @pytest.mark.requires_arrow_write_api
1351
+ def test_write_dataframe_layer_geom_type_invalid(
1352
+ tmp_path, naturalearth_lowres, use_arrow
1353
+ ):
1354
+ df = read_dataframe(naturalearth_lowres)
1355
+
1356
+ filename = tmp_path / "test.geojson"
1357
+ with pytest.raises(
1358
+ GeometryError, match="Geometry type is not supported: NotSupported"
1359
+ ):
1360
+ write_dataframe(df, filename, use_arrow=use_arrow, geometry_type="NotSupported")
1361
+
1362
+
1363
+ @pytest.mark.parametrize("ext", [ext for ext in ALL_EXTS if ext not in ".shp"])
1364
+ @pytest.mark.requires_arrow_write_api
1365
+ def test_write_dataframe_truly_mixed(tmp_path, ext, use_arrow):
1366
+ geometry = [
1367
+ shapely.Point(0, 0),
1368
+ shapely.LineString([(0, 0), (1, 1)]),
1369
+ shapely.box(0, 0, 1, 1),
1370
+ shapely.MultiPoint([shapely.Point(1, 1), shapely.Point(2, 2)]),
1371
+ shapely.MultiLineString(
1372
+ [shapely.LineString([(1, 1), (2, 2)]), shapely.LineString([(2, 2), (3, 3)])]
1373
+ ),
1374
+ shapely.MultiPolygon([shapely.box(1, 1, 2, 2), shapely.box(2, 2, 3, 3)]),
1375
+ ]
1376
+
1377
+ df = gp.GeoDataFrame(
1378
+ {"col": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]}, geometry=geometry, crs="EPSG:4326"
1379
+ )
1380
+
1381
+ filename = tmp_path / f"test{ext}"
1382
+
1383
+ if ext == ".fgb":
1384
+ # For .fgb, spatial_index=False to avoid the rows being reordered
1385
+ write_dataframe(df, filename, use_arrow=use_arrow, spatial_index=False)
1386
+ else:
1387
+ write_dataframe(df, filename, use_arrow=use_arrow)
1388
+
1389
+ # Drivers that support mixed geometries will default to "Unknown" geometry type
1390
+ assert read_info(filename)["geometry_type"] == "Unknown"
1391
+ result = read_dataframe(filename)
1392
+ assert_geodataframe_equal(result, df, check_geom_type=True)
1393
+
1394
+
1395
+ @pytest.mark.requires_arrow_write_api
1396
+ def test_write_dataframe_truly_mixed_invalid(tmp_path, use_arrow):
1397
+ # Shapefile doesn't support generic "Geometry" / "Unknown" type
1398
+ # for mixed geometries
1399
+
1400
+ df = gp.GeoDataFrame(
1401
+ {"col": [1.0, 2.0, 3.0]},
1402
+ geometry=[
1403
+ shapely.Point(0, 0),
1404
+ shapely.LineString([(0, 0), (1, 1)]),
1405
+ shapely.box(0, 0, 1, 1),
1406
+ ],
1407
+ crs="EPSG:4326",
1408
+ )
1409
+
1410
+ # ensure error message from GDAL is included
1411
+ msg = (
1412
+ "Could not add feature to layer at index 1: Attempt to "
1413
+ r"write non-point \(LINESTRING\) geometry to point shapefile."
1414
+ # DataLayerError when using Arrow
1415
+ "|Error while writing batch to OGR layer: Attempt to "
1416
+ r"write non-point \(LINESTRING\) geometry to point shapefile."
1417
+ )
1418
+ with pytest.raises((FeatureError, DataLayerError), match=msg):
1419
+ write_dataframe(df, tmp_path / "test.shp", use_arrow=use_arrow)
1420
+
1421
+
1422
+ @pytest.mark.parametrize("ext", [ext for ext in ALL_EXTS if ext not in ".fgb"])
1423
+ @pytest.mark.parametrize(
1424
+ "geoms",
1425
+ [
1426
+ [None, shapely.Point(1, 1)],
1427
+ [shapely.Point(1, 1), None],
1428
+ [None, shapely.Point(1, 1, 2)],
1429
+ [None, None],
1430
+ ],
1431
+ )
1432
+ @pytest.mark.requires_arrow_write_api
1433
+ def test_write_dataframe_infer_geometry_with_nulls(tmp_path, geoms, ext, use_arrow):
1434
+ filename = tmp_path / f"test{ext}"
1435
+
1436
+ df = gp.GeoDataFrame({"col": [1.0, 2.0]}, geometry=geoms, crs="EPSG:4326")
1437
+ write_dataframe(df, filename, use_arrow=use_arrow)
1438
+ result = read_dataframe(filename)
1439
+ assert_geodataframe_equal(result, df)
1440
+
1441
+
1442
+ @pytest.mark.filterwarnings(
1443
+ "ignore: You will likely lose important projection information"
1444
+ )
1445
+ @pytest.mark.requires_arrow_write_api
1446
+ def test_custom_crs_io(tmp_path, naturalearth_lowres_all_ext, use_arrow):
1447
+ df = read_dataframe(naturalearth_lowres_all_ext)
1448
+ # project Belgium to a custom Albers Equal Area projection
1449
+ expected = (
1450
+ df.loc[df.name == "Belgium"]
1451
+ .reset_index(drop=True)
1452
+ .to_crs("+proj=aea +lat_1=49.5 +lat_2=51.5 +lon_0=4.3")
1453
+ )
1454
+ filename = tmp_path / "test.shp"
1455
+ write_dataframe(expected, filename, use_arrow=use_arrow)
1456
+
1457
+ assert filename.exists()
1458
+
1459
+ df = read_dataframe(filename)
1460
+
1461
+ crs = df.crs.to_dict()
1462
+ assert crs["lat_1"] == 49.5
1463
+ assert crs["lat_2"] == 51.5
1464
+ assert crs["lon_0"] == 4.3
1465
+ assert df.crs.equals(expected.crs)
1466
+
1467
+
1468
+ def test_write_read_mixed_column_values(tmp_path):
1469
+ # use_arrow=True is tested separately below
1470
+ mixed_values = ["test", 1.0, 1, datetime.now(), None, np.nan]
1471
+ geoms = [shapely.Point(0, 0) for _ in mixed_values]
1472
+ test_gdf = gp.GeoDataFrame(
1473
+ {"geometry": geoms, "mixed": mixed_values}, crs="epsg:31370"
1474
+ )
1475
+ output_path = tmp_path / "test_write_mixed_column.gpkg"
1476
+ write_dataframe(test_gdf, output_path)
1477
+ output_gdf = read_dataframe(output_path)
1478
+ assert len(test_gdf) == len(output_gdf)
1479
+ for idx, value in enumerate(mixed_values):
1480
+ if value in (None, np.nan):
1481
+ assert output_gdf["mixed"][idx] is None
1482
+ else:
1483
+ assert output_gdf["mixed"][idx] == str(value)
1484
+
1485
+
1486
+ @requires_arrow_write_api
1487
+ def test_write_read_mixed_column_values_arrow(tmp_path):
1488
+ # Arrow cannot represent a column of mixed types
1489
+ mixed_values = ["test", 1.0, 1, datetime.now(), None, np.nan]
1490
+ geoms = [shapely.Point(0, 0) for _ in mixed_values]
1491
+ test_gdf = gp.GeoDataFrame(
1492
+ {"geometry": geoms, "mixed": mixed_values}, crs="epsg:31370"
1493
+ )
1494
+ output_path = tmp_path / "test_write_mixed_column.gpkg"
1495
+ with pytest.raises(TypeError, match=".*Conversion failed for column"):
1496
+ write_dataframe(test_gdf, output_path, use_arrow=True)
1497
+
1498
+
1499
+ @pytest.mark.requires_arrow_write_api
1500
+ def test_write_read_null(tmp_path, use_arrow):
1501
+ output_path = tmp_path / "test_write_nan.gpkg"
1502
+ geom = shapely.Point(0, 0)
1503
+ test_data = {
1504
+ "geometry": [geom, geom, geom],
1505
+ "float64": [1.0, None, np.nan],
1506
+ "object_str": ["test", None, np.nan],
1507
+ }
1508
+ test_gdf = gp.GeoDataFrame(test_data, crs="epsg:31370")
1509
+ write_dataframe(test_gdf, output_path, use_arrow=use_arrow)
1510
+ result_gdf = read_dataframe(output_path)
1511
+ assert len(test_gdf) == len(result_gdf)
1512
+ assert result_gdf["float64"][0] == 1.0
1513
+ assert pd.isna(result_gdf["float64"][1])
1514
+ assert pd.isna(result_gdf["float64"][2])
1515
+ assert result_gdf["object_str"][0] == "test"
1516
+ assert result_gdf["object_str"][1] is None
1517
+ assert result_gdf["object_str"][2] is None
1518
+
1519
+
1520
+ @pytest.mark.parametrize(
1521
+ "wkt,geom_types",
1522
+ [
1523
+ ("Point Z (0 0 0)", ["2.5D Point", "Point Z"]),
1524
+ ("LineString Z (0 0 0, 1 1 0)", ["2.5D LineString", "LineString Z"]),
1525
+ ("Polygon Z ((0 0 0, 0 1 0, 1 1 0, 0 0 0))", ["2.5D Polygon", "Polygon Z"]),
1526
+ ("MultiPoint Z (0 0 0, 1 1 0)", ["2.5D MultiPoint", "MultiPoint Z"]),
1527
+ (
1528
+ "MultiLineString Z ((0 0 0, 1 1 0), (2 2 2, 3 3 2))",
1529
+ ["2.5D MultiLineString", "MultiLineString Z"],
1530
+ ),
1531
+ (
1532
+ "MultiPolygon Z (((0 0 0, 0 1 0, 1 1 0, 0 0 0)), ((1 1 1, 1 2 1, 2 2 1, 1 1 1)))",
1533
+ ["2.5D MultiPolygon", "MultiPolygon Z"],
1534
+ ),
1535
+ (
1536
+ "GeometryCollection Z (Point Z (0 0 0))",
1537
+ ["2.5D GeometryCollection", "GeometryCollection Z"],
1538
+ ),
1539
+ ],
1540
+ )
1541
+ @pytest.mark.requires_arrow_write_api
1542
+ def test_write_geometry_z_types(tmp_path, wkt, geom_types, use_arrow):
1543
+ filename = tmp_path / "test.fgb"
1544
+ gdf = gp.GeoDataFrame(geometry=from_wkt([wkt]), crs="EPSG:4326")
1545
+ for geom_type in geom_types:
1546
+ write_dataframe(gdf, filename, use_arrow=use_arrow, geometry_type=geom_type)
1547
+ df = read_dataframe(filename)
1548
+ assert_geodataframe_equal(df, gdf)
1549
+
1550
+
1551
+ @pytest.mark.parametrize("ext", ALL_EXTS)
1552
+ @pytest.mark.parametrize(
1553
+ "test_descr, exp_geometry_type, mixed_dimensions, wkt",
1554
+ [
1555
+ ("1 Point Z", "Point Z", False, ["Point Z (0 0 0)"]),
1556
+ ("1 LineString Z", "LineString Z", False, ["LineString Z (0 0 0, 1 1 0)"]),
1557
+ (
1558
+ "1 Polygon Z",
1559
+ "Polygon Z",
1560
+ False,
1561
+ ["Polygon Z ((0 0 0, 0 1 0, 1 1 0, 0 0 0))"],
1562
+ ),
1563
+ ("1 MultiPoint Z", "MultiPoint Z", False, ["MultiPoint Z (0 0 0, 1 1 0)"]),
1564
+ (
1565
+ "1 MultiLineString Z",
1566
+ "MultiLineString Z",
1567
+ False,
1568
+ ["MultiLineString Z ((0 0 0, 1 1 0), (2 2 2, 3 3 2))"],
1569
+ ),
1570
+ (
1571
+ "1 MultiLinePolygon Z",
1572
+ "MultiPolygon Z",
1573
+ False,
1574
+ [
1575
+ "MultiPolygon Z (((0 0 0, 0 1 0, 1 1 0, 0 0 0)), ((1 1 1, 1 2 1, 2 2 1, 1 1 1)))"
1576
+ ],
1577
+ ),
1578
+ (
1579
+ "1 GeometryCollection Z",
1580
+ "GeometryCollection Z",
1581
+ False,
1582
+ ["GeometryCollection Z (Point Z (0 0 0))"],
1583
+ ),
1584
+ ("Point Z + Point", "Point Z", True, ["Point Z (0 0 0)", "Point (0 0)"]),
1585
+ ("Point Z + None", "Point Z", False, ["Point Z (0 0 0)", None]),
1586
+ (
1587
+ "Point Z + LineString Z",
1588
+ "Unknown",
1589
+ False,
1590
+ ["LineString Z (0 0 0, 1 1 0)", "Point Z (0 0 0)"],
1591
+ ),
1592
+ (
1593
+ "Point Z + LineString",
1594
+ "Unknown",
1595
+ True,
1596
+ ["LineString (0 0, 1 1)", "Point Z (0 0 0)"],
1597
+ ),
1598
+ ],
1599
+ )
1600
+ @pytest.mark.requires_arrow_write_api
1601
+ def test_write_geometry_z_types_auto(
1602
+ tmp_path, ext, test_descr, exp_geometry_type, mixed_dimensions, wkt, use_arrow
1603
+ ):
1604
+ # Shapefile has some different behaviour that other file types
1605
+ if ext == ".shp":
1606
+ if exp_geometry_type in ("GeometryCollection Z", "Unknown"):
1607
+ pytest.skip(f"ext {ext} doesn't support {exp_geometry_type}")
1608
+ elif exp_geometry_type == "MultiLineString Z":
1609
+ exp_geometry_type = "LineString Z"
1610
+ elif exp_geometry_type == "MultiPolygon Z":
1611
+ exp_geometry_type = "Polygon Z"
1612
+
1613
+ column_data = {}
1614
+ column_data["test_descr"] = [test_descr] * len(wkt)
1615
+ column_data["idx"] = [str(idx) for idx in range(len(wkt))]
1616
+ gdf = gp.GeoDataFrame(column_data, geometry=from_wkt(wkt), crs="EPSG:4326")
1617
+ filename = tmp_path / f"test{ext}"
1618
+
1619
+ if ext == ".fgb":
1620
+ # writing empty / null geometries not allowed by FlatGeobuf for
1621
+ # GDAL >= 3.6.4 and were simply not written previously
1622
+ gdf = gdf.loc[~(gdf.geometry.isna() | gdf.geometry.is_empty)]
1623
+
1624
+ if mixed_dimensions and DRIVERS[ext] in DRIVERS_NO_MIXED_DIMENSIONS:
1625
+ with pytest.raises(
1626
+ DataSourceError,
1627
+ match=("Mixed 2D and 3D coordinates are not supported by"),
1628
+ ):
1629
+ write_dataframe(gdf, filename, use_arrow=use_arrow)
1630
+ return
1631
+ else:
1632
+ write_dataframe(gdf, filename, use_arrow=use_arrow)
1633
+
1634
+ info = read_info(filename)
1635
+ assert info["geometry_type"] == exp_geometry_type
1636
+
1637
+ result_gdf = read_dataframe(filename)
1638
+ if ext == ".geojsonl":
1639
+ result_gdf.crs = "EPSG:4326"
1640
+
1641
+ assert_geodataframe_equal(gdf, result_gdf)
1642
+
1643
+
1644
+ @pytest.mark.parametrize(
1645
+ "on_invalid, message",
1646
+ [
1647
+ (
1648
+ "warn",
1649
+ "Invalid WKB: geometry is returned as None. IllegalArgumentException: "
1650
+ "Invalid number of points in LinearRing found 2 - must be 0 or >=",
1651
+ ),
1652
+ ("raise", "Invalid number of points in LinearRing found 2 - must be 0 or >="),
1653
+ ("ignore", None),
1654
+ ],
1655
+ )
1656
+ def test_read_invalid_shp(data_dir, use_arrow, on_invalid, message):
1657
+ if on_invalid == "raise":
1658
+ handler = pytest.raises(shapely.errors.GEOSException, match=message)
1659
+ elif on_invalid == "warn":
1660
+ handler = pytest.warns(match=message)
1661
+ elif on_invalid == "ignore":
1662
+ handler = contextlib.nullcontext()
1663
+ else:
1664
+ raise ValueError(f"unknown value for on_invalid: {on_invalid}")
1665
+
1666
+ with handler:
1667
+ df = read_dataframe(
1668
+ data_dir / "poly_not_enough_points.shp.zip",
1669
+ use_arrow=use_arrow,
1670
+ on_invalid=on_invalid,
1671
+ )
1672
+ df.geometry.isnull().all()
1673
+
1674
+
1675
+ def test_read_multisurface(data_dir, use_arrow):
1676
+ if use_arrow:
1677
+ with pytest.raises(shapely.errors.GEOSException):
1678
+ # TODO(Arrow)
1679
+ # shapely fails parsing the WKB
1680
+ read_dataframe(data_dir / "test_multisurface.gpkg", use_arrow=True)
1681
+ else:
1682
+ df = read_dataframe(data_dir / "test_multisurface.gpkg")
1683
+
1684
+ # MultiSurface should be converted to MultiPolygon
1685
+ assert df.geometry.type.tolist() == ["MultiPolygon"]
1686
+
1687
+
1688
+ def test_read_dataset_kwargs(data_dir, use_arrow):
1689
+ filename = data_dir / "test_nested.geojson"
1690
+
1691
+ # by default, nested data are not flattened
1692
+ df = read_dataframe(filename, use_arrow=use_arrow)
1693
+
1694
+ expected = gp.GeoDataFrame(
1695
+ {
1696
+ "top_level": ["A"],
1697
+ "intermediate_level": ['{ "bottom_level": "B" }'],
1698
+ },
1699
+ geometry=[shapely.Point(0, 0)],
1700
+ crs="EPSG:4326",
1701
+ )
1702
+
1703
+ assert_geodataframe_equal(df, expected)
1704
+
1705
+ df = read_dataframe(filename, use_arrow=use_arrow, FLATTEN_NESTED_ATTRIBUTES="YES")
1706
+
1707
+ expected = gp.GeoDataFrame(
1708
+ {
1709
+ "top_level": ["A"],
1710
+ "intermediate_level_bottom_level": ["B"],
1711
+ },
1712
+ geometry=[shapely.Point(0, 0)],
1713
+ crs="EPSG:4326",
1714
+ )
1715
+
1716
+ assert_geodataframe_equal(df, expected)
1717
+
1718
+
1719
+ def test_read_invalid_dataset_kwargs(naturalearth_lowres, use_arrow):
1720
+ with pytest.warns(RuntimeWarning, match="does not support open option INVALID"):
1721
+ read_dataframe(naturalearth_lowres, use_arrow=use_arrow, INVALID="YES")
1722
+
1723
+
1724
+ @pytest.mark.requires_arrow_write_api
1725
+ def test_write_nullable_dtypes(tmp_path, use_arrow):
1726
+ path = tmp_path / "test_nullable_dtypes.gpkg"
1727
+ test_data = {
1728
+ "col1": pd.Series([1, 2, 3], dtype="int64"),
1729
+ "col2": pd.Series([1, 2, None], dtype="Int64"),
1730
+ "col3": pd.Series([0.1, None, 0.3], dtype="Float32"),
1731
+ "col4": pd.Series([True, False, None], dtype="boolean"),
1732
+ "col5": pd.Series(["a", None, "b"], dtype="string"),
1733
+ }
1734
+ input_gdf = gp.GeoDataFrame(
1735
+ test_data, geometry=[shapely.Point(0, 0)] * 3, crs="epsg:31370"
1736
+ )
1737
+ write_dataframe(input_gdf, path, use_arrow=use_arrow)
1738
+ output_gdf = read_dataframe(path)
1739
+ # We read it back as default (non-nullable) numpy dtypes, so we cast
1740
+ # to those for the expected result
1741
+ expected = input_gdf.copy()
1742
+ expected["col2"] = expected["col2"].astype("float64")
1743
+ expected["col3"] = expected["col3"].astype("float32")
1744
+ expected["col4"] = expected["col4"].astype("float64")
1745
+ expected["col5"] = expected["col5"].astype(object)
1746
+ expected.loc[1, "col5"] = None # pandas converts to pd.NA on line above
1747
+ assert_geodataframe_equal(output_gdf, expected)
1748
+
1749
+
1750
+ @pytest.mark.parametrize(
1751
+ "metadata_type", ["dataset_metadata", "layer_metadata", "metadata"]
1752
+ )
1753
+ @pytest.mark.requires_arrow_write_api
1754
+ def test_metadata_io(tmp_path, naturalearth_lowres, metadata_type, use_arrow):
1755
+ metadata = {"level": metadata_type}
1756
+
1757
+ df = read_dataframe(naturalearth_lowres)
1758
+
1759
+ filename = tmp_path / "test.gpkg"
1760
+ write_dataframe(df, filename, use_arrow=use_arrow, **{metadata_type: metadata})
1761
+
1762
+ metadata_key = "layer_metadata" if metadata_type == "metadata" else metadata_type
1763
+
1764
+ assert read_info(filename)[metadata_key] == metadata
1765
+
1766
+
1767
+ @pytest.mark.parametrize("metadata_type", ["dataset_metadata", "layer_metadata"])
1768
+ @pytest.mark.parametrize(
1769
+ "metadata",
1770
+ [
1771
+ {1: 2},
1772
+ {"key": None},
1773
+ {"key": 1},
1774
+ ],
1775
+ )
1776
+ @pytest.mark.requires_arrow_write_api
1777
+ def test_invalid_metadata(
1778
+ tmp_path, naturalearth_lowres, metadata_type, metadata, use_arrow
1779
+ ):
1780
+ df = read_dataframe(naturalearth_lowres)
1781
+ with pytest.raises(ValueError, match="must be a string"):
1782
+ write_dataframe(
1783
+ df, tmp_path / "test.gpkg", use_arrow=use_arrow, **{metadata_type: metadata}
1784
+ )
1785
+
1786
+
1787
+ @pytest.mark.parametrize("metadata_type", ["dataset_metadata", "layer_metadata"])
1788
+ @pytest.mark.requires_arrow_write_api
1789
+ def test_metadata_unsupported(tmp_path, naturalearth_lowres, metadata_type, use_arrow):
1790
+ """metadata is silently ignored"""
1791
+
1792
+ filename = tmp_path / "test.geojson"
1793
+ write_dataframe(
1794
+ read_dataframe(naturalearth_lowres),
1795
+ filename,
1796
+ use_arrow=use_arrow,
1797
+ **{metadata_type: {"key": "value"}},
1798
+ )
1799
+
1800
+ metadata_key = "layer_metadata" if metadata_type == "metadata" else metadata_type
1801
+
1802
+ assert read_info(filename)[metadata_key] is None
1803
+
1804
+
1805
+ @pytest.mark.skipif(not PANDAS_GE_15, reason="ArrowDtype requires pandas 1.5+")
1806
+ def test_read_dataframe_arrow_dtypes(tmp_path):
1807
+ # https://github.com/geopandas/pyogrio/issues/319 - ensure arrow binary
1808
+ # column can be converted with from_wkb in case of missing values
1809
+ pytest.importorskip("pyarrow")
1810
+ filename = tmp_path / "test.gpkg"
1811
+ df = gp.GeoDataFrame(
1812
+ {"col": [1.0, 2.0]}, geometry=[Point(1, 1), None], crs="EPSG:4326"
1813
+ )
1814
+ write_dataframe(df, filename)
1815
+
1816
+ result = read_dataframe(
1817
+ filename,
1818
+ use_arrow=True,
1819
+ arrow_to_pandas_kwargs={
1820
+ "types_mapper": lambda pa_dtype: pd.ArrowDtype(pa_dtype)
1821
+ },
1822
+ )
1823
+ assert isinstance(result["col"].dtype, pd.ArrowDtype)
1824
+ result["col"] = result["col"].astype("float64")
1825
+ assert_geodataframe_equal(result, df)
1826
+
1827
+
1828
+ @requires_pyarrow_api
1829
+ @pytest.mark.skipif(
1830
+ __gdal_version__ < (3, 8, 3), reason="Arrow bool value bug fixed in GDAL >= 3.8.3"
1831
+ )
1832
+ @pytest.mark.parametrize("ext", ALL_EXTS)
1833
+ def test_arrow_bool_roundtrip(tmp_path, ext):
1834
+ filename = tmp_path / f"test{ext}"
1835
+
1836
+ kwargs = {}
1837
+
1838
+ if ext == ".fgb":
1839
+ # For .fgb, spatial_index=False to avoid the rows being reordered
1840
+ kwargs["spatial_index"] = False
1841
+
1842
+ df = gp.GeoDataFrame(
1843
+ {"bool_col": [True, False, True, False, True], "geometry": [Point(0, 0)] * 5},
1844
+ crs="EPSG:4326",
1845
+ )
1846
+
1847
+ write_dataframe(df, filename, **kwargs)
1848
+ result = read_dataframe(filename, use_arrow=True)
1849
+ # Shapefiles do not support bool columns; these are returned as int32
1850
+ assert_geodataframe_equal(result, df, check_dtype=ext != ".shp")
1851
+
1852
+
1853
+ @requires_pyarrow_api
1854
+ @pytest.mark.skipif(
1855
+ __gdal_version__ >= (3, 8, 3), reason="Arrow bool value bug fixed in GDAL >= 3.8.3"
1856
+ )
1857
+ @pytest.mark.parametrize("ext", ALL_EXTS)
1858
+ def test_arrow_bool_exception(tmp_path, ext):
1859
+ filename = tmp_path / f"test{ext}"
1860
+
1861
+ df = gp.GeoDataFrame(
1862
+ {"bool_col": [True, False, True, False, True], "geometry": [Point(0, 0)] * 5},
1863
+ crs="EPSG:4326",
1864
+ )
1865
+
1866
+ write_dataframe(df, filename)
1867
+
1868
+ if ext in {".fgb", ".gpkg"}:
1869
+ # only raise exception for GPKG / FGB
1870
+ with pytest.raises(
1871
+ RuntimeError,
1872
+ match="GDAL < 3.8.3 does not correctly read boolean data values using "
1873
+ "the Arrow API",
1874
+ ):
1875
+ read_dataframe(filename, use_arrow=True)
1876
+
1877
+ # do not raise exception if no bool columns are read
1878
+ read_dataframe(filename, use_arrow=True, columns=[])
1879
+
1880
+ else:
1881
+ _ = read_dataframe(filename, use_arrow=True)
1882
+
1883
+
1884
+ @pytest.mark.filterwarnings("ignore:File /vsimem:RuntimeWarning")
1885
+ @pytest.mark.parametrize("driver", ["GeoJSON", "GPKG"])
1886
+ def test_write_memory(naturalearth_lowres, driver):
1887
+ df = read_dataframe(naturalearth_lowres)
1888
+
1889
+ buffer = BytesIO()
1890
+ write_dataframe(df, buffer, driver=driver, layer="test")
1891
+
1892
+ assert len(buffer.getbuffer()) > 0
1893
+
1894
+ actual = read_dataframe(buffer)
1895
+ assert len(actual) == len(df)
1896
+
1897
+ is_json = driver == "GeoJSON"
1898
+
1899
+ assert_geodataframe_equal(
1900
+ actual,
1901
+ df,
1902
+ check_less_precise=is_json,
1903
+ check_index_type=False,
1904
+ check_dtype=not is_json,
1905
+ )
1906
+
1907
+
1908
+ def test_write_memory_driver_required(naturalearth_lowres):
1909
+ df = read_dataframe(naturalearth_lowres)
1910
+
1911
+ buffer = BytesIO()
1912
+
1913
+ with pytest.raises(
1914
+ ValueError,
1915
+ match="driver must be provided to write to in-memory file",
1916
+ ):
1917
+ write_dataframe(df.head(1), buffer, driver=None, layer="test")
1918
+
1919
+
1920
+ @pytest.mark.parametrize("driver", ["ESRI Shapefile", "OpenFileGDB"])
1921
+ def test_write_memory_unsupported_driver(naturalearth_lowres, driver):
1922
+ if driver == "OpenFileGDB" and __gdal_version__ < (3, 6, 0):
1923
+ pytest.skip("OpenFileGDB write support only available for GDAL >= 3.6.0")
1924
+
1925
+ df = read_dataframe(naturalearth_lowres)
1926
+
1927
+ buffer = BytesIO()
1928
+
1929
+ with pytest.raises(
1930
+ ValueError, match=f"writing to in-memory file is not supported for {driver}"
1931
+ ):
1932
+ write_dataframe(df, buffer, driver=driver, layer="test")
1933
+
1934
+
1935
+ @pytest.mark.parametrize("driver", ["GeoJSON", "GPKG"])
1936
+ def test_write_memory_append_unsupported(naturalearth_lowres, driver):
1937
+ df = read_dataframe(naturalearth_lowres)
1938
+
1939
+ buffer = BytesIO()
1940
+
1941
+ with pytest.raises(
1942
+ NotImplementedError, match="append is not supported for in-memory files"
1943
+ ):
1944
+ write_dataframe(df.head(1), buffer, driver=driver, layer="test", append=True)
1945
+
1946
+
1947
+ def test_write_memory_existing_unsupported(naturalearth_lowres):
1948
+ df = read_dataframe(naturalearth_lowres)
1949
+
1950
+ buffer = BytesIO(b"0000")
1951
+ with pytest.raises(
1952
+ NotImplementedError,
1953
+ match="writing to existing in-memory object is not supported",
1954
+ ):
1955
+ write_dataframe(df.head(1), buffer, driver="GeoJSON", layer="test")
1956
+
1957
+
1958
+ @pytest.mark.parametrize("ext", ["gpkg", "geojson"])
1959
+ def test_non_utf8_encoding_io(tmp_path, ext, encoded_text):
1960
+ """Verify that we write non-UTF data to the data source
1961
+
1962
+ IMPORTANT: this may not be valid for the data source and will likely render
1963
+ them unusable in other tools, but should successfully roundtrip unless we
1964
+ disable writing using other encodings.
1965
+
1966
+ NOTE: FlatGeobuff driver cannot handle non-UTF data in GDAL >= 3.9
1967
+
1968
+ NOTE: pyarrow cannot handle non-UTF-8 characters in this way
1969
+ """
1970
+
1971
+ encoding, text = encoded_text
1972
+ output_path = tmp_path / f"test.{ext}"
1973
+
1974
+ df = gp.GeoDataFrame({text: [text], "geometry": [Point(0, 0)]}, crs="EPSG:4326")
1975
+ write_dataframe(df, output_path, encoding=encoding)
1976
+
1977
+ # cannot open these files without specifying encoding
1978
+ with pytest.raises(UnicodeDecodeError):
1979
+ read_dataframe(output_path)
1980
+
1981
+ # must provide encoding to read these properly
1982
+ actual = read_dataframe(output_path, encoding=encoding)
1983
+ assert actual.columns[0] == text
1984
+ assert actual[text].values[0] == text
1985
+
1986
+
1987
+ @requires_pyarrow_api
1988
+ @pytest.mark.parametrize("ext", ["gpkg", "geojson"])
1989
+ def test_non_utf8_encoding_io_arrow_exception(tmp_path, ext, encoded_text):
1990
+ encoding, text = encoded_text
1991
+ output_path = tmp_path / f"test.{ext}"
1992
+
1993
+ df = gp.GeoDataFrame({text: [text], "geometry": [Point(0, 0)]}, crs="EPSG:4326")
1994
+ write_dataframe(df, output_path, encoding=encoding)
1995
+
1996
+ # cannot open these files without specifying encoding
1997
+ with pytest.raises(UnicodeDecodeError):
1998
+ read_dataframe(output_path)
1999
+
2000
+ with pytest.raises(
2001
+ ValueError, match="non-UTF-8 encoding is not supported for Arrow"
2002
+ ):
2003
+ read_dataframe(output_path, encoding=encoding, use_arrow=True)
2004
+
2005
+
2006
+ def test_non_utf8_encoding_io_shapefile(tmp_path, encoded_text, use_arrow):
2007
+ encoding, text = encoded_text
2008
+
2009
+ output_path = tmp_path / "test.shp"
2010
+
2011
+ df = gp.GeoDataFrame({text: [text], "geometry": [Point(0, 0)]}, crs="EPSG:4326")
2012
+ write_dataframe(df, output_path, encoding=encoding)
2013
+
2014
+ # NOTE: GDAL automatically creates a cpg file with the encoding name, which
2015
+ # means that if we read this without specifying the encoding it uses the
2016
+ # correct one
2017
+ actual = read_dataframe(output_path, use_arrow=use_arrow)
2018
+ assert actual.columns[0] == text
2019
+ assert actual[text].values[0] == text
2020
+
2021
+ # verify that if cpg file is not present, that user-provided encoding must be used
2022
+ output_path.with_suffix(".cpg").unlink()
2023
+
2024
+ # We will assume ISO-8859-1, which is wrong
2025
+ miscoded = text.encode(encoding).decode("ISO-8859-1")
2026
+
2027
+ if use_arrow:
2028
+ # pyarrow cannot decode column name with incorrect encoding
2029
+ with pytest.raises(UnicodeDecodeError):
2030
+ read_dataframe(output_path, use_arrow=True)
2031
+ else:
2032
+ bad = read_dataframe(output_path, use_arrow=False)
2033
+ assert bad.columns[0] == miscoded
2034
+ assert bad[miscoded].values[0] == miscoded
2035
+
2036
+ # If encoding is provided, that should yield correct text
2037
+ actual = read_dataframe(output_path, encoding=encoding, use_arrow=use_arrow)
2038
+ assert actual.columns[0] == text
2039
+ assert actual[text].values[0] == text
2040
+
2041
+ # if ENCODING open option, that should yield correct text
2042
+ actual = read_dataframe(output_path, use_arrow=use_arrow, ENCODING=encoding)
2043
+ assert actual.columns[0] == text
2044
+ assert actual[text].values[0] == text
2045
+
2046
+
2047
+ def test_encoding_read_option_collision_shapefile(naturalearth_lowres, use_arrow):
2048
+ """Providing both encoding parameter and ENCODING open option (even if blank) is not allowed"""
2049
+
2050
+ with pytest.raises(
2051
+ ValueError, match='cannot provide both encoding parameter and "ENCODING" option'
2052
+ ):
2053
+ read_dataframe(
2054
+ naturalearth_lowres, encoding="CP936", ENCODING="", use_arrow=use_arrow
2055
+ )
2056
+
2057
+
2058
+ def test_encoding_write_layer_option_collision_shapefile(tmp_path, encoded_text):
2059
+ """Providing both encoding parameter and ENCODING layer creation option (even if blank) is not allowed"""
2060
+ encoding, text = encoded_text
2061
+
2062
+ output_path = tmp_path / "test.shp"
2063
+ df = gp.GeoDataFrame({text: [text], "geometry": [Point(0, 0)]}, crs="EPSG:4326")
2064
+
2065
+ with pytest.raises(
2066
+ ValueError,
2067
+ match='cannot provide both encoding parameter and "ENCODING" layer creation option',
2068
+ ):
2069
+ write_dataframe(
2070
+ df, output_path, encoding=encoding, layer_options={"ENCODING": ""}
2071
+ )
2072
+
2073
+
2074
+ def test_non_utf8_encoding_shapefile_sql(tmp_path, use_arrow):
2075
+ encoding = "CP936"
2076
+
2077
+ output_path = tmp_path / "test.shp"
2078
+
2079
+ mandarin = "中文"
2080
+ df = gp.GeoDataFrame(
2081
+ {mandarin: mandarin, "geometry": [Point(0, 0)]}, crs="EPSG:4326"
2082
+ )
2083
+ write_dataframe(df, output_path, encoding=encoding)
2084
+
2085
+ actual = read_dataframe(
2086
+ output_path,
2087
+ sql=f"select * from test where \"{mandarin}\" = '{mandarin}'",
2088
+ use_arrow=use_arrow,
2089
+ )
2090
+ assert actual.columns[0] == mandarin
2091
+ assert actual[mandarin].values[0] == mandarin
2092
+
2093
+ actual = read_dataframe(
2094
+ output_path,
2095
+ sql=f"select * from test where \"{mandarin}\" = '{mandarin}'",
2096
+ encoding=encoding,
2097
+ use_arrow=use_arrow,
2098
+ )
2099
+ assert actual.columns[0] == mandarin
2100
+ assert actual[mandarin].values[0] == mandarin
2101
+
2102
+
2103
+ @pytest.mark.requires_arrow_write_api
2104
+ def test_write_kml_file_coordinate_order(tmp_path, use_arrow):
2105
+ # confirm KML coordinates are written in lon, lat order even if CRS axis specifies otherwise
2106
+ points = [Point(10, 20), Point(30, 40), Point(50, 60)]
2107
+ gdf = gp.GeoDataFrame(geometry=points, crs="EPSG:4326")
2108
+ output_path = tmp_path / "test.kml"
2109
+ write_dataframe(
2110
+ gdf, output_path, layer="tmp_layer", driver="KML", use_arrow=use_arrow
2111
+ )
2112
+
2113
+ gdf_in = read_dataframe(output_path, use_arrow=use_arrow)
2114
+
2115
+ assert np.array_equal(gdf_in.geometry.values, points)
2116
+
2117
+ if "LIBKML" in list_drivers():
2118
+ # test appending to the existing file only if LIBKML is available
2119
+ # as it appears to fall back on LIBKML driver when appending.
2120
+ points_append = [Point(70, 80), Point(90, 100), Point(110, 120)]
2121
+ gdf_append = gp.GeoDataFrame(geometry=points_append, crs="EPSG:4326")
2122
+
2123
+ write_dataframe(
2124
+ gdf_append,
2125
+ output_path,
2126
+ layer="tmp_layer",
2127
+ driver="KML",
2128
+ use_arrow=use_arrow,
2129
+ append=True,
2130
+ )
2131
+ # force_2d used to only compare xy geometry as z-dimension is undesirably
2132
+ # introduced when the kml file is over-written.
2133
+ gdf_in_appended = read_dataframe(
2134
+ output_path, use_arrow=use_arrow, force_2d=True
2135
+ )
2136
+
2137
+ assert np.array_equal(gdf_in_appended.geometry.values, points + points_append)
2138
+
2139
+
2140
+ @pytest.mark.requires_arrow_write_api
2141
+ def test_write_geojson_rfc7946_coordinates(tmp_path, use_arrow):
2142
+ points = [Point(10, 20), Point(30, 40), Point(50, 60)]
2143
+ gdf = gp.GeoDataFrame(geometry=points, crs="EPSG:4326")
2144
+ output_path = tmp_path / "test.geojson"
2145
+ write_dataframe(
2146
+ gdf,
2147
+ output_path,
2148
+ layer="tmp_layer",
2149
+ driver="GeoJSON",
2150
+ RFC7946=True,
2151
+ use_arrow=use_arrow,
2152
+ )
2153
+
2154
+ gdf_in = read_dataframe(output_path, use_arrow=use_arrow)
2155
+
2156
+ assert np.array_equal(gdf_in.geometry.values, points)
2157
+
2158
+ # test appending to the existing file
2159
+
2160
+ points_append = [Point(70, 80), Point(90, 100), Point(110, 120)]
2161
+ gdf_append = gp.GeoDataFrame(geometry=points_append, crs="EPSG:4326")
2162
+
2163
+ write_dataframe(
2164
+ gdf_append,
2165
+ output_path,
2166
+ layer="tmp_layer",
2167
+ driver="GeoJSON",
2168
+ RFC7946=True,
2169
+ use_arrow=use_arrow,
2170
+ append=True,
2171
+ )
2172
+
2173
+ gdf_in_appended = read_dataframe(output_path, use_arrow=use_arrow)
2174
+ assert np.array_equal(gdf_in_appended.geometry.values, points + points_append)