pyogrio 0.10.0__cp39-cp39-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyogrio might be problematic. Click here for more details.

Files changed (223) hide show
  1. pyogrio/__init__.py +55 -0
  2. pyogrio/_compat.py +47 -0
  3. pyogrio/_env.py +59 -0
  4. pyogrio/_err.cpython-39-x86_64-linux-gnu.so +0 -0
  5. pyogrio/_geometry.cpython-39-x86_64-linux-gnu.so +0 -0
  6. pyogrio/_io.cpython-39-x86_64-linux-gnu.so +0 -0
  7. pyogrio/_ogr.cpython-39-x86_64-linux-gnu.so +0 -0
  8. pyogrio/_version.py +21 -0
  9. pyogrio/_vsi.cpython-39-x86_64-linux-gnu.so +0 -0
  10. pyogrio/core.py +386 -0
  11. pyogrio/errors.py +25 -0
  12. pyogrio/gdal_data/GDAL-targets-release.cmake +19 -0
  13. pyogrio/gdal_data/GDAL-targets.cmake +105 -0
  14. pyogrio/gdal_data/GDALConfig.cmake +24 -0
  15. pyogrio/gdal_data/GDALConfigVersion.cmake +85 -0
  16. pyogrio/gdal_data/GDALLogoBW.svg +138 -0
  17. pyogrio/gdal_data/GDALLogoColor.svg +126 -0
  18. pyogrio/gdal_data/GDALLogoGS.svg +126 -0
  19. pyogrio/gdal_data/LICENSE.TXT +467 -0
  20. pyogrio/gdal_data/MM_m_idofic.csv +321 -0
  21. pyogrio/gdal_data/copyright +467 -0
  22. pyogrio/gdal_data/cubewerx_extra.wkt +48 -0
  23. pyogrio/gdal_data/default.rsc +0 -0
  24. pyogrio/gdal_data/ecw_cs.wkt +1453 -0
  25. pyogrio/gdal_data/eedaconf.json +23 -0
  26. pyogrio/gdal_data/epsg.wkt +1 -0
  27. pyogrio/gdal_data/esri_StatePlane_extra.wkt +631 -0
  28. pyogrio/gdal_data/gdalicon.png +0 -0
  29. pyogrio/gdal_data/gdalinfo_output.schema.json +346 -0
  30. pyogrio/gdal_data/gdalmdiminfo_output.schema.json +321 -0
  31. pyogrio/gdal_data/gdaltileindex.xsd +269 -0
  32. pyogrio/gdal_data/gdalvrt.xsd +880 -0
  33. pyogrio/gdal_data/gfs.xsd +246 -0
  34. pyogrio/gdal_data/gml_registry.xml +117 -0
  35. pyogrio/gdal_data/gml_registry.xsd +66 -0
  36. pyogrio/gdal_data/grib2_center.csv +251 -0
  37. pyogrio/gdal_data/grib2_process.csv +102 -0
  38. pyogrio/gdal_data/grib2_subcenter.csv +63 -0
  39. pyogrio/gdal_data/grib2_table_4_2_0_0.csv +261 -0
  40. pyogrio/gdal_data/grib2_table_4_2_0_1.csv +261 -0
  41. pyogrio/gdal_data/grib2_table_4_2_0_13.csv +261 -0
  42. pyogrio/gdal_data/grib2_table_4_2_0_14.csv +261 -0
  43. pyogrio/gdal_data/grib2_table_4_2_0_15.csv +261 -0
  44. pyogrio/gdal_data/grib2_table_4_2_0_16.csv +261 -0
  45. pyogrio/gdal_data/grib2_table_4_2_0_17.csv +11 -0
  46. pyogrio/gdal_data/grib2_table_4_2_0_18.csv +261 -0
  47. pyogrio/gdal_data/grib2_table_4_2_0_19.csv +261 -0
  48. pyogrio/gdal_data/grib2_table_4_2_0_190.csv +261 -0
  49. pyogrio/gdal_data/grib2_table_4_2_0_191.csv +261 -0
  50. pyogrio/gdal_data/grib2_table_4_2_0_2.csv +261 -0
  51. pyogrio/gdal_data/grib2_table_4_2_0_20.csv +261 -0
  52. pyogrio/gdal_data/grib2_table_4_2_0_21.csv +261 -0
  53. pyogrio/gdal_data/grib2_table_4_2_0_3.csv +261 -0
  54. pyogrio/gdal_data/grib2_table_4_2_0_4.csv +261 -0
  55. pyogrio/gdal_data/grib2_table_4_2_0_5.csv +261 -0
  56. pyogrio/gdal_data/grib2_table_4_2_0_6.csv +261 -0
  57. pyogrio/gdal_data/grib2_table_4_2_0_7.csv +261 -0
  58. pyogrio/gdal_data/grib2_table_4_2_10_0.csv +261 -0
  59. pyogrio/gdal_data/grib2_table_4_2_10_1.csv +261 -0
  60. pyogrio/gdal_data/grib2_table_4_2_10_191.csv +261 -0
  61. pyogrio/gdal_data/grib2_table_4_2_10_2.csv +261 -0
  62. pyogrio/gdal_data/grib2_table_4_2_10_3.csv +261 -0
  63. pyogrio/gdal_data/grib2_table_4_2_10_4.csv +261 -0
  64. pyogrio/gdal_data/grib2_table_4_2_1_0.csv +261 -0
  65. pyogrio/gdal_data/grib2_table_4_2_1_1.csv +261 -0
  66. pyogrio/gdal_data/grib2_table_4_2_1_2.csv +261 -0
  67. pyogrio/gdal_data/grib2_table_4_2_20_0.csv +261 -0
  68. pyogrio/gdal_data/grib2_table_4_2_20_1.csv +261 -0
  69. pyogrio/gdal_data/grib2_table_4_2_20_2.csv +261 -0
  70. pyogrio/gdal_data/grib2_table_4_2_2_0.csv +261 -0
  71. pyogrio/gdal_data/grib2_table_4_2_2_3.csv +261 -0
  72. pyogrio/gdal_data/grib2_table_4_2_2_4.csv +261 -0
  73. pyogrio/gdal_data/grib2_table_4_2_2_5.csv +261 -0
  74. pyogrio/gdal_data/grib2_table_4_2_2_6.csv +261 -0
  75. pyogrio/gdal_data/grib2_table_4_2_3_0.csv +261 -0
  76. pyogrio/gdal_data/grib2_table_4_2_3_1.csv +261 -0
  77. pyogrio/gdal_data/grib2_table_4_2_3_2.csv +28 -0
  78. pyogrio/gdal_data/grib2_table_4_2_3_3.csv +8 -0
  79. pyogrio/gdal_data/grib2_table_4_2_3_4.csv +14 -0
  80. pyogrio/gdal_data/grib2_table_4_2_3_5.csv +11 -0
  81. pyogrio/gdal_data/grib2_table_4_2_3_6.csv +11 -0
  82. pyogrio/gdal_data/grib2_table_4_2_4_0.csv +261 -0
  83. pyogrio/gdal_data/grib2_table_4_2_4_1.csv +261 -0
  84. pyogrio/gdal_data/grib2_table_4_2_4_10.csv +261 -0
  85. pyogrio/gdal_data/grib2_table_4_2_4_2.csv +261 -0
  86. pyogrio/gdal_data/grib2_table_4_2_4_3.csv +261 -0
  87. pyogrio/gdal_data/grib2_table_4_2_4_4.csv +261 -0
  88. pyogrio/gdal_data/grib2_table_4_2_4_5.csv +261 -0
  89. pyogrio/gdal_data/grib2_table_4_2_4_6.csv +261 -0
  90. pyogrio/gdal_data/grib2_table_4_2_4_7.csv +261 -0
  91. pyogrio/gdal_data/grib2_table_4_2_4_8.csv +261 -0
  92. pyogrio/gdal_data/grib2_table_4_2_4_9.csv +261 -0
  93. pyogrio/gdal_data/grib2_table_4_2_local_Canada.csv +5 -0
  94. pyogrio/gdal_data/grib2_table_4_2_local_HPC.csv +2 -0
  95. pyogrio/gdal_data/grib2_table_4_2_local_MRMS.csv +175 -0
  96. pyogrio/gdal_data/grib2_table_4_2_local_NCEP.csv +401 -0
  97. pyogrio/gdal_data/grib2_table_4_2_local_NDFD.csv +38 -0
  98. pyogrio/gdal_data/grib2_table_4_2_local_index.csv +7 -0
  99. pyogrio/gdal_data/grib2_table_4_5.csv +261 -0
  100. pyogrio/gdal_data/grib2_table_versions.csv +3 -0
  101. pyogrio/gdal_data/gt_datum.csv +229 -0
  102. pyogrio/gdal_data/gt_ellips.csv +24 -0
  103. pyogrio/gdal_data/header.dxf +1124 -0
  104. pyogrio/gdal_data/inspire_cp_BasicPropertyUnit.gfs +57 -0
  105. pyogrio/gdal_data/inspire_cp_CadastralBoundary.gfs +60 -0
  106. pyogrio/gdal_data/inspire_cp_CadastralParcel.gfs +81 -0
  107. pyogrio/gdal_data/inspire_cp_CadastralZoning.gfs +161 -0
  108. pyogrio/gdal_data/jpfgdgml_AdmArea.gfs +59 -0
  109. pyogrio/gdal_data/jpfgdgml_AdmBdry.gfs +49 -0
  110. pyogrio/gdal_data/jpfgdgml_AdmPt.gfs +59 -0
  111. pyogrio/gdal_data/jpfgdgml_BldA.gfs +54 -0
  112. pyogrio/gdal_data/jpfgdgml_BldL.gfs +54 -0
  113. pyogrio/gdal_data/jpfgdgml_Cntr.gfs +54 -0
  114. pyogrio/gdal_data/jpfgdgml_CommBdry.gfs +49 -0
  115. pyogrio/gdal_data/jpfgdgml_CommPt.gfs +59 -0
  116. pyogrio/gdal_data/jpfgdgml_Cstline.gfs +54 -0
  117. pyogrio/gdal_data/jpfgdgml_ElevPt.gfs +54 -0
  118. pyogrio/gdal_data/jpfgdgml_GCP.gfs +94 -0
  119. pyogrio/gdal_data/jpfgdgml_LeveeEdge.gfs +49 -0
  120. pyogrio/gdal_data/jpfgdgml_RailCL.gfs +54 -0
  121. pyogrio/gdal_data/jpfgdgml_RdASL.gfs +44 -0
  122. pyogrio/gdal_data/jpfgdgml_RdArea.gfs +54 -0
  123. pyogrio/gdal_data/jpfgdgml_RdCompt.gfs +59 -0
  124. pyogrio/gdal_data/jpfgdgml_RdEdg.gfs +59 -0
  125. pyogrio/gdal_data/jpfgdgml_RdMgtBdry.gfs +49 -0
  126. pyogrio/gdal_data/jpfgdgml_RdSgmtA.gfs +59 -0
  127. pyogrio/gdal_data/jpfgdgml_RvrMgtBdry.gfs +49 -0
  128. pyogrio/gdal_data/jpfgdgml_SBAPt.gfs +49 -0
  129. pyogrio/gdal_data/jpfgdgml_SBArea.gfs +54 -0
  130. pyogrio/gdal_data/jpfgdgml_SBBdry.gfs +44 -0
  131. pyogrio/gdal_data/jpfgdgml_WA.gfs +54 -0
  132. pyogrio/gdal_data/jpfgdgml_WL.gfs +54 -0
  133. pyogrio/gdal_data/jpfgdgml_WStrA.gfs +54 -0
  134. pyogrio/gdal_data/jpfgdgml_WStrL.gfs +54 -0
  135. pyogrio/gdal_data/nitf_spec.xml +3306 -0
  136. pyogrio/gdal_data/nitf_spec.xsd +189 -0
  137. pyogrio/gdal_data/ogrinfo_output.schema.json +528 -0
  138. pyogrio/gdal_data/ogrvrt.xsd +546 -0
  139. pyogrio/gdal_data/osmconf.ini +132 -0
  140. pyogrio/gdal_data/ozi_datum.csv +131 -0
  141. pyogrio/gdal_data/ozi_ellips.csv +35 -0
  142. pyogrio/gdal_data/pci_datum.txt +530 -0
  143. pyogrio/gdal_data/pci_ellips.txt +129 -0
  144. pyogrio/gdal_data/pdfcomposition.xsd +721 -0
  145. pyogrio/gdal_data/pds4_template.xml +65 -0
  146. pyogrio/gdal_data/plscenesconf.json +1985 -0
  147. pyogrio/gdal_data/ruian_vf_ob_v1.gfs +1455 -0
  148. pyogrio/gdal_data/ruian_vf_st_uvoh_v1.gfs +86 -0
  149. pyogrio/gdal_data/ruian_vf_st_v1.gfs +1489 -0
  150. pyogrio/gdal_data/ruian_vf_v1.gfs +2126 -0
  151. pyogrio/gdal_data/s57agencies.csv +249 -0
  152. pyogrio/gdal_data/s57attributes.csv +484 -0
  153. pyogrio/gdal_data/s57expectedinput.csv +1008 -0
  154. pyogrio/gdal_data/s57objectclasses.csv +287 -0
  155. pyogrio/gdal_data/seed_2d.dgn +0 -0
  156. pyogrio/gdal_data/seed_3d.dgn +0 -0
  157. pyogrio/gdal_data/stateplane.csv +259 -0
  158. pyogrio/gdal_data/tms_LINZAntarticaMapTileGrid.json +190 -0
  159. pyogrio/gdal_data/tms_MapML_APSTILE.json +268 -0
  160. pyogrio/gdal_data/tms_MapML_CBMTILE.json +346 -0
  161. pyogrio/gdal_data/tms_NZTM2000.json +243 -0
  162. pyogrio/gdal_data/trailer.dxf +434 -0
  163. pyogrio/gdal_data/usage +4 -0
  164. pyogrio/gdal_data/vcpkg-cmake-wrapper.cmake +23 -0
  165. pyogrio/gdal_data/vcpkg.spdx.json +264 -0
  166. pyogrio/gdal_data/vcpkg_abi_info.txt +41 -0
  167. pyogrio/gdal_data/vdv452.xml +367 -0
  168. pyogrio/gdal_data/vdv452.xsd +63 -0
  169. pyogrio/gdal_data/vicar.json +164 -0
  170. pyogrio/geopandas.py +683 -0
  171. pyogrio/proj_data/CH +22 -0
  172. pyogrio/proj_data/GL27 +23 -0
  173. pyogrio/proj_data/ITRF2000 +24 -0
  174. pyogrio/proj_data/ITRF2008 +94 -0
  175. pyogrio/proj_data/ITRF2014 +55 -0
  176. pyogrio/proj_data/copyright +34 -0
  177. pyogrio/proj_data/deformation_model.schema.json +582 -0
  178. pyogrio/proj_data/nad.lst +142 -0
  179. pyogrio/proj_data/nad27 +810 -0
  180. pyogrio/proj_data/nad83 +745 -0
  181. pyogrio/proj_data/other.extra +53 -0
  182. pyogrio/proj_data/proj-config-version.cmake +44 -0
  183. pyogrio/proj_data/proj-config.cmake +79 -0
  184. pyogrio/proj_data/proj-targets-release.cmake +19 -0
  185. pyogrio/proj_data/proj-targets.cmake +107 -0
  186. pyogrio/proj_data/proj.db +0 -0
  187. pyogrio/proj_data/proj.ini +51 -0
  188. pyogrio/proj_data/proj4-targets-release.cmake +19 -0
  189. pyogrio/proj_data/proj4-targets.cmake +107 -0
  190. pyogrio/proj_data/projjson.schema.json +1174 -0
  191. pyogrio/proj_data/triangulation.schema.json +214 -0
  192. pyogrio/proj_data/usage +4 -0
  193. pyogrio/proj_data/vcpkg.spdx.json +198 -0
  194. pyogrio/proj_data/vcpkg_abi_info.txt +27 -0
  195. pyogrio/proj_data/world +214 -0
  196. pyogrio/raw.py +887 -0
  197. pyogrio/tests/__init__.py +0 -0
  198. pyogrio/tests/conftest.py +398 -0
  199. pyogrio/tests/fixtures/README.md +108 -0
  200. pyogrio/tests/fixtures/curve.gpkg +0 -0
  201. pyogrio/tests/fixtures/curvepolygon.gpkg +0 -0
  202. pyogrio/tests/fixtures/line_zm.gpkg +0 -0
  203. pyogrio/tests/fixtures/multisurface.gpkg +0 -0
  204. pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.cpg +1 -0
  205. pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.dbf +0 -0
  206. pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.prj +1 -0
  207. pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.shp +0 -0
  208. pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.shx +0 -0
  209. pyogrio/tests/fixtures/sample.osm.pbf +0 -0
  210. pyogrio/tests/fixtures/test_gpkg_nulls.gpkg +0 -0
  211. pyogrio/tests/test_arrow.py +1195 -0
  212. pyogrio/tests/test_core.py +678 -0
  213. pyogrio/tests/test_geopandas_io.py +2314 -0
  214. pyogrio/tests/test_path.py +364 -0
  215. pyogrio/tests/test_raw_io.py +1515 -0
  216. pyogrio/tests/test_util.py +56 -0
  217. pyogrio/util.py +247 -0
  218. pyogrio-0.10.0.dist-info/LICENSE +21 -0
  219. pyogrio-0.10.0.dist-info/METADATA +129 -0
  220. pyogrio-0.10.0.dist-info/RECORD +223 -0
  221. pyogrio-0.10.0.dist-info/WHEEL +5 -0
  222. pyogrio-0.10.0.dist-info/top_level.txt +1 -0
  223. pyogrio.libs/libgdal-44263852.so.35.3.9.1 +0 -0
@@ -0,0 +1,1515 @@
1
+ import contextlib
2
+ import ctypes
3
+ import json
4
+ import sys
5
+ from io import BytesIO
6
+ from zipfile import ZipFile
7
+
8
+ import numpy as np
9
+ from numpy import array_equal
10
+
11
+ import pyogrio
12
+ from pyogrio import (
13
+ __gdal_version__,
14
+ get_gdal_config_option,
15
+ list_drivers,
16
+ list_layers,
17
+ read_info,
18
+ set_gdal_config_options,
19
+ )
20
+ from pyogrio._compat import HAS_PYARROW, HAS_SHAPELY
21
+ from pyogrio.errors import DataLayerError, DataSourceError, FeatureError
22
+ from pyogrio.raw import open_arrow, read, write
23
+ from pyogrio.tests.conftest import (
24
+ DRIVER_EXT,
25
+ DRIVERS,
26
+ prepare_testfile,
27
+ requires_arrow_api,
28
+ requires_pyarrow_api,
29
+ requires_shapely,
30
+ )
31
+
32
+ import pytest
33
+
34
+ try:
35
+ import shapely
36
+ except ImportError:
37
+ pass
38
+
39
+
40
+ def test_read(naturalearth_lowres):
41
+ meta, _, geometry, fields = read(naturalearth_lowres)
42
+
43
+ assert meta["crs"] == "EPSG:4326"
44
+ assert meta["geometry_type"] == "Polygon"
45
+ assert meta["encoding"] == "UTF-8"
46
+ assert meta["fields"].shape == (5,)
47
+
48
+ assert meta["fields"].tolist() == [
49
+ "pop_est",
50
+ "continent",
51
+ "name",
52
+ "iso_a3",
53
+ "gdp_md_est",
54
+ ]
55
+
56
+ assert len(fields) == 5
57
+ assert len(geometry) == len(fields[0])
58
+
59
+ # quick test that WKB is a Polygon type
60
+ assert geometry[0][:6] == b"\x01\x06\x00\x00\x00\x03"
61
+
62
+
63
+ @pytest.mark.parametrize("ext", DRIVERS)
64
+ def test_read_autodetect_driver(tmp_path, naturalearth_lowres, ext):
65
+ # Test all supported autodetect drivers
66
+ testfile = prepare_testfile(naturalearth_lowres, dst_dir=tmp_path, ext=ext)
67
+
68
+ assert testfile.suffix == ext
69
+ assert testfile.exists()
70
+ meta, _, geometry, fields = read(testfile)
71
+
72
+ assert meta["crs"] == "EPSG:4326"
73
+ assert meta["geometry_type"] in ("MultiPolygon", "Polygon", "Unknown")
74
+ assert meta["encoding"] == "UTF-8"
75
+ assert meta["fields"].shape == (5,)
76
+
77
+ assert meta["fields"].tolist() == [
78
+ "pop_est",
79
+ "continent",
80
+ "name",
81
+ "iso_a3",
82
+ "gdp_md_est",
83
+ ]
84
+
85
+ assert len(fields) == 5
86
+ assert len(geometry) == len(fields[0])
87
+
88
+
89
+ def test_read_arrow_unspecified_layer_warning(data_dir):
90
+ """Reading a multi-layer file without specifying a layer gives a warning."""
91
+ with pytest.warns(UserWarning, match="More than one layer found "):
92
+ read(data_dir / "sample.osm.pbf")
93
+
94
+
95
+ def test_read_invalid_layer(naturalearth_lowres):
96
+ with pytest.raises(DataLayerError, match="Layer 'invalid' could not be opened"):
97
+ read(naturalearth_lowres, layer="invalid")
98
+
99
+ with pytest.raises(DataLayerError, match="Layer '-1' could not be opened"):
100
+ read(naturalearth_lowres, layer=-1)
101
+
102
+ with pytest.raises(DataLayerError, match="Layer '2' could not be opened"):
103
+ read(naturalearth_lowres, layer=2)
104
+
105
+
106
+ def test_vsi_read_layers(naturalearth_lowres_vsi):
107
+ _, naturalearth_lowres_vsi = naturalearth_lowres_vsi
108
+ assert array_equal(
109
+ list_layers(naturalearth_lowres_vsi), [["naturalearth_lowres", "Polygon"]]
110
+ )
111
+
112
+ geometry = read(naturalearth_lowres_vsi)[2]
113
+ assert geometry.shape == (177,)
114
+
115
+
116
+ def test_read_no_geometry(naturalearth_lowres):
117
+ geometry = read(naturalearth_lowres, read_geometry=False)[2]
118
+
119
+ assert geometry is None
120
+
121
+
122
+ @requires_shapely
123
+ def test_read_no_geometry__mask(naturalearth_lowres):
124
+ geometry, fields = read(
125
+ naturalearth_lowres,
126
+ read_geometry=False,
127
+ mask=shapely.Point(-105, 55),
128
+ )[2:]
129
+
130
+ assert np.array_equal(fields[3], ["CAN"])
131
+ assert geometry is None
132
+
133
+
134
+ def test_read_no_geometry__bbox(naturalearth_lowres):
135
+ geometry, fields = read(
136
+ naturalearth_lowres,
137
+ read_geometry=False,
138
+ bbox=(-109.0, 55.0, -109.0, 55.0),
139
+ )[2:]
140
+
141
+ assert np.array_equal(fields[3], ["CAN"])
142
+ assert geometry is None
143
+
144
+
145
+ def test_read_no_geometry_no_columns_no_fids(naturalearth_lowres):
146
+ with pytest.raises(
147
+ ValueError,
148
+ match=(
149
+ "at least one of read_geometry or return_fids must be True or columns must "
150
+ "be None or non-empty"
151
+ ),
152
+ ):
153
+ _ = read(
154
+ naturalearth_lowres, columns=[], read_geometry=False, return_fids=False
155
+ )
156
+
157
+
158
+ def test_read_columns(naturalearth_lowres):
159
+ columns = ["NAME", "NAME_LONG"]
160
+ meta, _, geometry, fields = read(
161
+ naturalearth_lowres, columns=columns, read_geometry=False
162
+ )
163
+ array_equal(meta["fields"], columns)
164
+
165
+ # Repeats should be dropped
166
+ columns = ["NAME", "NAME_LONG", "NAME"]
167
+ meta, _, geometry, fields = read(
168
+ naturalearth_lowres, columns=columns, read_geometry=False
169
+ )
170
+ array_equal(meta["fields"], columns[:2])
171
+
172
+
173
+ @pytest.mark.parametrize("skip_features", [10, 200])
174
+ def test_read_skip_features(naturalearth_lowres_all_ext, skip_features):
175
+ expected_geometry, expected_fields = read(naturalearth_lowres_all_ext)[2:]
176
+ geometry, fields = read(naturalearth_lowres_all_ext, skip_features=skip_features)[
177
+ 2:
178
+ ]
179
+
180
+ # skipping more features than available in layer returns empty arrays
181
+ expected_count = max(len(expected_geometry) - skip_features, 0)
182
+
183
+ assert len(geometry) == expected_count
184
+ assert len(fields[0]) == expected_count
185
+
186
+ assert np.array_equal(geometry, expected_geometry[skip_features:])
187
+ # Last field has more variable data
188
+ assert np.array_equal(fields[-1], expected_fields[-1][skip_features:])
189
+
190
+
191
+ def test_read_negative_skip_features(naturalearth_lowres):
192
+ with pytest.raises(ValueError, match="'skip_features' must be >= 0"):
193
+ read(naturalearth_lowres, skip_features=-1)
194
+
195
+
196
+ def test_read_max_features(naturalearth_lowres):
197
+ expected_geometry, expected_fields = read(naturalearth_lowres)[2:]
198
+ geometry, fields = read(naturalearth_lowres, max_features=2)[2:]
199
+
200
+ assert len(geometry) == 2
201
+ assert len(fields[0]) == 2
202
+
203
+ assert np.array_equal(geometry, expected_geometry[:2])
204
+ assert np.array_equal(fields[-1], expected_fields[-1][:2])
205
+
206
+
207
+ def test_read_negative_max_features(naturalearth_lowres):
208
+ with pytest.raises(ValueError, match="'max_features' must be >= 0"):
209
+ read(naturalearth_lowres, max_features=-1)
210
+
211
+
212
+ def test_read_where(naturalearth_lowres):
213
+ # empty filter should return full set of records
214
+ geometry, fields = read(naturalearth_lowres, where="")[2:]
215
+ assert len(geometry) == 177
216
+ assert len(fields) == 5
217
+ assert len(fields[0]) == 177
218
+
219
+ # should return singular item
220
+ geometry, fields = read(naturalearth_lowres, where="iso_a3 = 'CAN'")[2:]
221
+ assert len(geometry) == 1
222
+ assert len(fields) == 5
223
+ assert len(fields[0]) == 1
224
+ assert fields[3] == "CAN"
225
+
226
+ # should return items within range
227
+ geometry, fields = read(
228
+ naturalearth_lowres, where="POP_EST >= 10000000 AND POP_EST < 100000000"
229
+ )[2:]
230
+ assert len(geometry) == 75
231
+ assert min(fields[0]) >= 10000000
232
+ assert max(fields[0]) < 100000000
233
+
234
+ # should match no items
235
+ geometry, fields = read(naturalearth_lowres, where="iso_a3 = 'INVALID'")[2:]
236
+ assert len(geometry) == 0
237
+
238
+
239
+ def test_read_where_invalid(naturalearth_lowres):
240
+ with pytest.raises(ValueError, match="Invalid SQL"):
241
+ read(naturalearth_lowres, where="invalid")
242
+
243
+
244
+ @pytest.mark.parametrize("bbox", [(1,), (1, 2), (1, 2, 3)])
245
+ def test_read_bbox_invalid(naturalearth_lowres, bbox):
246
+ with pytest.raises(ValueError, match="Invalid bbox"):
247
+ read(naturalearth_lowres, bbox=bbox)
248
+
249
+
250
+ def test_read_bbox(naturalearth_lowres_all_ext):
251
+ # should return no features
252
+ geometry, fields = read(naturalearth_lowres_all_ext, bbox=(0, 0, 0.00001, 0.00001))[
253
+ 2:
254
+ ]
255
+
256
+ assert len(geometry) == 0
257
+
258
+ geometry, fields = read(naturalearth_lowres_all_ext, bbox=(-85, 8, -80, 10))[2:]
259
+
260
+ assert len(geometry) == 2
261
+ assert np.array_equal(fields[3], ["PAN", "CRI"])
262
+
263
+
264
+ def test_read_bbox_sql(naturalearth_lowres_all_ext):
265
+ fields = read(
266
+ naturalearth_lowres_all_ext,
267
+ bbox=(-180, 50, -100, 90),
268
+ sql="SELECT * from naturalearth_lowres where iso_a3 not in ('USA', 'RUS')",
269
+ )[3]
270
+ assert len(fields[3]) == 1
271
+ assert np.array_equal(fields[3], ["CAN"])
272
+
273
+
274
+ def test_read_bbox_where(naturalearth_lowres_all_ext):
275
+ fields = read(
276
+ naturalearth_lowres_all_ext,
277
+ bbox=(-180, 50, -100, 90),
278
+ where="iso_a3 not in ('USA', 'RUS')",
279
+ )[3]
280
+ assert len(fields[3]) == 1
281
+ assert np.array_equal(fields[3], ["CAN"])
282
+
283
+
284
+ @requires_shapely
285
+ @pytest.mark.parametrize(
286
+ "mask",
287
+ [
288
+ {"type": "Point", "coordinates": [0, 0]},
289
+ '{"type": "Point", "coordinates": [0, 0]}',
290
+ "invalid",
291
+ ],
292
+ )
293
+ def test_read_mask_invalid(naturalearth_lowres, mask):
294
+ with pytest.raises(ValueError, match="'mask' parameter must be a Shapely geometry"):
295
+ read(naturalearth_lowres, mask=mask)
296
+
297
+
298
+ @requires_shapely
299
+ def test_read_bbox_mask_invalid(naturalearth_lowres):
300
+ with pytest.raises(ValueError, match="cannot set both 'bbox' and 'mask'"):
301
+ read(naturalearth_lowres, bbox=(-85, 8, -80, 10), mask=shapely.Point(-105, 55))
302
+
303
+
304
+ @requires_shapely
305
+ @pytest.mark.parametrize(
306
+ "mask,expected",
307
+ [
308
+ ("POINT (-105 55)", ["CAN"]),
309
+ ("POLYGON ((-80 8, -80 10, -85 10, -85 8, -80 8))", ["PAN", "CRI"]),
310
+ (
311
+ """POLYGON ((
312
+ 6.101929 50.97085,
313
+ 5.773002 50.906611,
314
+ 5.593156 50.642649,
315
+ 6.059271 50.686052,
316
+ 6.374064 50.851481,
317
+ 6.101929 50.97085
318
+ ))""",
319
+ ["DEU", "BEL", "NLD"],
320
+ ),
321
+ (
322
+ """GEOMETRYCOLLECTION (
323
+ POINT (-7.7 53),
324
+ POLYGON ((-80 8, -80 10, -85 10, -85 8, -80 8))
325
+ )""",
326
+ ["PAN", "CRI", "IRL"],
327
+ ),
328
+ ],
329
+ )
330
+ def test_read_mask(naturalearth_lowres_all_ext, mask, expected):
331
+ mask = shapely.from_wkt(mask)
332
+
333
+ geometry, fields = read(naturalearth_lowres_all_ext, mask=mask)[2:]
334
+
335
+ assert np.array_equal(fields[3], expected)
336
+ assert len(geometry) == len(expected)
337
+
338
+
339
+ @requires_shapely
340
+ def test_read_mask_sql(naturalearth_lowres_all_ext):
341
+ fields = read(
342
+ naturalearth_lowres_all_ext,
343
+ mask=shapely.box(-180, 50, -100, 90),
344
+ sql="SELECT * from naturalearth_lowres where iso_a3 not in ('USA', 'RUS')",
345
+ )[3]
346
+ assert len(fields[3]) == 1
347
+ assert np.array_equal(fields[3], ["CAN"])
348
+
349
+
350
+ @requires_shapely
351
+ def test_read_mask_where(naturalearth_lowres_all_ext):
352
+ fields = read(
353
+ naturalearth_lowres_all_ext,
354
+ mask=shapely.box(-180, 50, -100, 90),
355
+ where="iso_a3 not in ('USA', 'RUS')",
356
+ )[3]
357
+ assert len(fields[3]) == 1
358
+ assert np.array_equal(fields[3], ["CAN"])
359
+
360
+
361
+ def test_read_fids(naturalearth_lowres):
362
+ expected_fids, expected_geometry, expected_fields = read(
363
+ naturalearth_lowres, return_fids=True
364
+ )[1:]
365
+ subset = [0, 10, 5]
366
+
367
+ for fids in [subset, np.array(subset)]:
368
+ index, geometry, fields = read(
369
+ naturalearth_lowres, fids=subset, return_fids=True
370
+ )[1:]
371
+
372
+ assert len(fids) == 3
373
+ assert len(geometry) == 3
374
+ assert len(fields[0]) == 3
375
+
376
+ assert np.array_equal(index, expected_fids[subset])
377
+ assert np.array_equal(geometry, expected_geometry[subset])
378
+ assert np.array_equal(fields[-1], expected_fields[-1][subset])
379
+
380
+
381
+ def test_read_fids_out_of_bounds(naturalearth_lowres):
382
+ with pytest.raises(
383
+ FeatureError,
384
+ match=r"Attempt to read shape with feature id \(-1\) out of available range",
385
+ ):
386
+ read(naturalearth_lowres, fids=[-1])
387
+
388
+ with pytest.raises(
389
+ FeatureError,
390
+ match=r"Attempt to read shape with feature id \(200\) out of available range",
391
+ ):
392
+ read(naturalearth_lowres, fids=[200])
393
+
394
+
395
+ def test_read_fids_unsupported_keywords(naturalearth_lowres):
396
+ with pytest.raises(ValueError, match="cannot set both 'fids' and any of"):
397
+ read(naturalearth_lowres, fids=[1], where="iso_a3 = 'CAN'")
398
+
399
+ with pytest.raises(ValueError, match="cannot set both 'fids' and any of"):
400
+ read(naturalearth_lowres, fids=[1], bbox=(-140, 20, -100, 45))
401
+
402
+ with pytest.raises(ValueError, match="cannot set both 'fids' and any of"):
403
+ read(naturalearth_lowres, fids=[1], skip_features=5)
404
+
405
+ with pytest.raises(ValueError, match="cannot set both 'fids' and any of"):
406
+ read(naturalearth_lowres, fids=[1], max_features=5)
407
+
408
+ with pytest.raises(ValueError, match="cannot set both 'fids' and any of"):
409
+ read(naturalearth_lowres, fids=[1], bbox=(0, 0, 0.0001, 0.0001))
410
+
411
+ if HAS_SHAPELY:
412
+ with pytest.raises(ValueError, match="cannot set both 'fids' and any of"):
413
+ read(naturalearth_lowres, fids=[1], mask=shapely.Point(0, 0))
414
+
415
+
416
+ def test_read_return_fids(naturalearth_lowres):
417
+ # default is to not return fids
418
+ fids = read(naturalearth_lowres)[1]
419
+ assert fids is None
420
+
421
+ fids = read(naturalearth_lowres, return_fids=False)[1]
422
+ assert fids is None
423
+
424
+ fids = read(naturalearth_lowres, return_fids=True, skip_features=2, max_features=2)[
425
+ 1
426
+ ]
427
+ assert fids is not None
428
+ assert fids.dtype == np.int64
429
+ # Note: shapefile FIDS start at 0
430
+ assert np.array_equal(fids, np.array([2, 3], dtype="int64"))
431
+
432
+
433
+ def test_read_return_only_fids(naturalearth_lowres):
434
+ _, fids, geometry, field_data = read(
435
+ naturalearth_lowres, columns=[], read_geometry=False, return_fids=True
436
+ )
437
+ assert fids is not None
438
+ assert len(fids) == 177
439
+ assert geometry is None
440
+ assert len(field_data) == 0
441
+
442
+
443
+ @pytest.mark.parametrize("encoding", [None, "ISO-8859-1"])
444
+ def test_write_shp(tmp_path, naturalearth_lowres, encoding):
445
+ meta, _, geometry, field_data = read(naturalearth_lowres)
446
+
447
+ filename = tmp_path / "test.shp"
448
+ meta["encoding"] = encoding
449
+ write(filename, geometry, field_data, **meta)
450
+
451
+ assert filename.exists()
452
+ for ext in (".dbf", ".prj"):
453
+ assert filename.with_suffix(ext).exists()
454
+
455
+ # We write shapefiles in UTF-8 by default on all platforms
456
+ expected_encoding = encoding if encoding is not None else "UTF-8"
457
+ with open(filename.with_suffix(".cpg")) as cpg_file:
458
+ result_encoding = cpg_file.read()
459
+ assert result_encoding == expected_encoding
460
+
461
+
462
+ def test_write_gpkg(tmp_path, naturalearth_lowres):
463
+ meta, _, geometry, field_data = read(naturalearth_lowres)
464
+ meta.update({"geometry_type": "MultiPolygon"})
465
+
466
+ filename = tmp_path / "test.gpkg"
467
+ write(filename, geometry, field_data, driver="GPKG", **meta)
468
+
469
+ assert filename.exists()
470
+
471
+
472
+ def test_write_gpkg_multiple_layers(tmp_path, naturalearth_lowres):
473
+ meta, _, geometry, field_data = read(naturalearth_lowres)
474
+ meta["geometry_type"] = "MultiPolygon"
475
+
476
+ filename = tmp_path / "test.gpkg"
477
+ write(filename, geometry, field_data, driver="GPKG", layer="first", **meta)
478
+
479
+ assert filename.exists()
480
+
481
+ assert np.array_equal(list_layers(filename), [["first", "MultiPolygon"]])
482
+
483
+ write(filename, geometry, field_data, driver="GPKG", layer="second", **meta)
484
+
485
+ assert np.array_equal(
486
+ list_layers(filename), [["first", "MultiPolygon"], ["second", "MultiPolygon"]]
487
+ )
488
+
489
+
490
+ def test_write_geojson(tmp_path, naturalearth_lowres):
491
+ meta, _, geometry, field_data = read(naturalearth_lowres)
492
+
493
+ filename = tmp_path / "test.json"
494
+ write(filename, geometry, field_data, driver="GeoJSON", **meta)
495
+
496
+ assert filename.exists()
497
+
498
+ data = json.loads(open(filename).read())
499
+
500
+ assert data["type"] == "FeatureCollection"
501
+ assert data["name"] == "test"
502
+ assert "crs" in data
503
+ assert len(data["features"]) == len(geometry)
504
+ assert not len(
505
+ set(meta["fields"]).difference(data["features"][0]["properties"].keys())
506
+ )
507
+
508
+
509
+ def test_write_no_fields(tmp_path, naturalearth_lowres):
510
+ """Test writing file with no fields/attribute columns."""
511
+ # Prepare test data
512
+ meta, _, geometry, field_data = read(naturalearth_lowres)
513
+ field_data = None
514
+ meta["fields"] = None
515
+ # naturalearth_lowres actually contains MultiPolygons. A shapefile doesn't make the
516
+ # distinction, so the metadata just reports Polygon. GPKG does, so override here to
517
+ # avoid GDAL warnings.
518
+ meta["geometry_type"] = "MultiPolygon"
519
+
520
+ # Test
521
+ filename = tmp_path / "test.gpkg"
522
+ write(filename, geometry, field_data, driver="GPKG", **meta)
523
+
524
+ # Check result
525
+ assert filename.exists()
526
+ meta, _, geometry, fields = read(filename)
527
+
528
+ assert meta["crs"] == "EPSG:4326"
529
+ assert meta["geometry_type"] == "MultiPolygon"
530
+ assert meta["encoding"] == "UTF-8"
531
+ assert meta["fields"].shape == (0,)
532
+ assert len(fields) == 0
533
+ assert len(geometry) == 177
534
+
535
+ # quick test that WKB is a Polygon type
536
+ assert geometry[0][:6] == b"\x01\x06\x00\x00\x00\x03"
537
+
538
+
539
+ def test_write_no_geom(tmp_path, naturalearth_lowres):
540
+ """Test writing file with no geometry column."""
541
+ # Prepare test data
542
+ meta, _, geometry, field_data = read(naturalearth_lowres)
543
+ geometry = None
544
+ meta["geometry_type"] = None
545
+
546
+ # Test
547
+ filename = tmp_path / "test.gpkg"
548
+ write(filename, geometry, field_data, driver="GPKG", **meta)
549
+
550
+ # Check result
551
+ assert filename.exists()
552
+ meta, _, geometry, fields = read(filename)
553
+
554
+ assert meta["crs"] is None
555
+ assert meta["geometry_type"] is None
556
+ assert meta["encoding"] == "UTF-8"
557
+ assert meta["fields"].shape == (5,)
558
+
559
+ assert meta["fields"].tolist() == [
560
+ "pop_est",
561
+ "continent",
562
+ "name",
563
+ "iso_a3",
564
+ "gdp_md_est",
565
+ ]
566
+
567
+ assert len(fields) == 5
568
+ assert len(fields[0]) == 177
569
+
570
+
571
+ def test_write_no_geom_data(tmp_path, naturalearth_lowres):
572
+ """Test writing file with no geometry data passed but a geometry_type specified.
573
+
574
+ In this case the geometry_type is ignored, so a file without geometry column is
575
+ written.
576
+ """
577
+ # Prepare test data
578
+ meta, _, geometry, field_data = read(naturalearth_lowres)
579
+ # If geometry data is set to None, meta["geometry_type"] is ignored and so no
580
+ # geometry column will be created.
581
+ geometry = None
582
+
583
+ # Test
584
+ filename = tmp_path / "test.gpkg"
585
+ write(filename, geometry, field_data, driver="GPKG", **meta)
586
+
587
+ # Check result
588
+ assert filename.exists()
589
+ result_meta, _, result_geometry, result_field_data = read(filename)
590
+
591
+ assert result_meta["crs"] is None
592
+ assert result_meta["geometry_type"] is None
593
+ assert result_meta["encoding"] == "UTF-8"
594
+ assert result_meta["fields"].shape == (5,)
595
+
596
+ assert result_meta["fields"].tolist() == [
597
+ "pop_est",
598
+ "continent",
599
+ "name",
600
+ "iso_a3",
601
+ "gdp_md_est",
602
+ ]
603
+
604
+ assert len(result_field_data) == 5
605
+ assert len(result_field_data[0]) == 177
606
+ assert result_geometry is None
607
+
608
+
609
+ def test_write_no_geom_no_fields():
610
+ """Test writing file with no geometry column nor fields -> error."""
611
+ with pytest.raises(
612
+ ValueError,
613
+ match="You must provide at least a geometry column or a field",
614
+ ):
615
+ write("test.gpkg", geometry=None, field_data=None, fields=None)
616
+
617
+
618
+ @pytest.mark.skipif(
619
+ __gdal_version__ < (3, 6, 0),
620
+ reason="OpenFileGDB write support only available for GDAL >= 3.6.0",
621
+ )
622
+ @pytest.mark.parametrize(
623
+ "write_int64",
624
+ [
625
+ False,
626
+ pytest.param(
627
+ True,
628
+ marks=pytest.mark.skipif(
629
+ __gdal_version__ < (3, 9, 0),
630
+ reason="OpenFileGDB write support for int64 values for GDAL >= 3.9.0",
631
+ ),
632
+ ),
633
+ ],
634
+ )
635
+ def test_write_openfilegdb(tmp_path, write_int64):
636
+ # Point(0, 0)
637
+ expected_geometry = np.array(
638
+ [bytes.fromhex("010100000000000000000000000000000000000000")] * 3, dtype=object
639
+ )
640
+ expected_field_data = [
641
+ np.array([True, False, True], dtype="bool"),
642
+ np.array([1, 2, 3], dtype="int16"),
643
+ np.array([1, 2, 3], dtype="int32"),
644
+ np.array([1, 2, 3], dtype="int64"),
645
+ np.array([1, 2, 3], dtype="float32"),
646
+ np.array([1, 2, 3], dtype="float64"),
647
+ ]
648
+ expected_fields = ["bool", "int16", "int32", "int64", "float32", "float64"]
649
+ expected_meta = {
650
+ "geometry_type": "Point",
651
+ "crs": "EPSG:4326",
652
+ "fields": expected_fields,
653
+ }
654
+
655
+ filename = tmp_path / "test.gdb"
656
+
657
+ # int64 is not supported without additional config: https://gdal.org/en/latest/drivers/vector/openfilegdb.html#bit-integer-field-support
658
+ # it is converted to float64 by default and raises a warning
659
+ # (for GDAL >= 3.9.0 only)
660
+ write_params = (
661
+ {"TARGET_ARCGIS_VERSION": "ARCGIS_PRO_3_2_OR_LATER"} if write_int64 else {}
662
+ )
663
+
664
+ if write_int64 or __gdal_version__ < (3, 9, 0):
665
+ ctx = contextlib.nullcontext()
666
+ else:
667
+ ctx = pytest.warns(
668
+ RuntimeWarning, match="Integer64 will be written as a Float64"
669
+ )
670
+
671
+ with ctx:
672
+ write(
673
+ filename,
674
+ expected_geometry,
675
+ expected_field_data,
676
+ driver="OpenFileGDB",
677
+ **expected_meta,
678
+ **write_params,
679
+ )
680
+
681
+ meta, _, geometry, field_data = read(filename)
682
+
683
+ if not write_int64:
684
+ expected_field_data[3] = expected_field_data[3].astype("float64")
685
+
686
+ # bool types are converted to int32
687
+ expected_field_data[0] = expected_field_data[0].astype("int32")
688
+
689
+ assert meta["crs"] == expected_meta["crs"]
690
+ assert np.array_equal(meta["fields"], expected_meta["fields"])
691
+
692
+ assert np.array_equal(geometry, expected_geometry)
693
+ for i in range(len(expected_field_data)):
694
+ assert field_data[i].dtype == expected_field_data[i].dtype
695
+ assert np.array_equal(field_data[i], expected_field_data[i])
696
+
697
+
698
+ @pytest.mark.parametrize("ext", DRIVERS)
699
+ def test_write_append(tmp_path, naturalearth_lowres, ext):
700
+ if ext == ".fgb" and __gdal_version__ <= (3, 5, 0):
701
+ pytest.skip("Append to FlatGeobuf fails for GDAL <= 3.5.0")
702
+
703
+ if ext in (".geojsonl", ".geojsons") and __gdal_version__ < (3, 6, 0):
704
+ pytest.skip("Append to GeoJSONSeq only available for GDAL >= 3.6.0")
705
+
706
+ meta, _, geometry, field_data = read(naturalearth_lowres)
707
+
708
+ # coerce output layer to MultiPolygon to avoid mixed type errors
709
+ meta["geometry_type"] = "MultiPolygon"
710
+
711
+ filename = tmp_path / f"test{ext}"
712
+ write(filename, geometry, field_data, **meta)
713
+
714
+ assert filename.exists()
715
+
716
+ assert read_info(filename)["features"] == 177
717
+
718
+ # write the same records again
719
+ write(filename, geometry, field_data, append=True, **meta)
720
+
721
+ assert read_info(filename)["features"] == 354
722
+
723
+
724
+ @pytest.mark.parametrize("driver,ext", [("GML", ".gml"), ("GeoJSONSeq", ".geojsons")])
725
+ def test_write_append_unsupported(tmp_path, naturalearth_lowres, driver, ext):
726
+ if ext == ".geojsons" and __gdal_version__ >= (3, 6, 0):
727
+ pytest.skip("Append to GeoJSONSeq supported for GDAL >= 3.6.0")
728
+
729
+ meta, _, geometry, field_data = read(naturalearth_lowres)
730
+
731
+ # GML does not support append functionality
732
+ filename = tmp_path / f"test{ext}"
733
+ write(filename, geometry, field_data, driver=driver, **meta)
734
+
735
+ assert filename.exists()
736
+
737
+ assert read_info(filename, force_feature_count=True)["features"] == 177
738
+
739
+ with pytest.raises(DataSourceError):
740
+ write(filename, geometry, field_data, driver=driver, append=True, **meta)
741
+
742
+
743
+ @pytest.mark.skipif(
744
+ __gdal_version__ > (3, 5, 0),
745
+ reason="segfaults on FlatGeobuf limited to GDAL <= 3.5.0",
746
+ )
747
+ def test_write_append_prevent_gdal_segfault(tmp_path, naturalearth_lowres):
748
+ """GDAL <= 3.5.0 segfaults when appending to FlatGeobuf; this test
749
+ verifies that we catch that before segfault"""
750
+ meta, _, geometry, field_data = read(naturalearth_lowres)
751
+ meta["geometry_type"] = "MultiPolygon"
752
+
753
+ filename = tmp_path / "test.fgb"
754
+ write(filename, geometry, field_data, **meta)
755
+
756
+ assert filename.exists()
757
+
758
+ with pytest.raises(
759
+ RuntimeError, # match="append to FlatGeobuf is not supported for GDAL <= 3.5.0"
760
+ ):
761
+ write(filename, geometry, field_data, append=True, **meta)
762
+
763
+
764
+ @pytest.mark.parametrize(
765
+ "driver",
766
+ {
767
+ driver
768
+ for driver in DRIVERS.values()
769
+ if driver not in ("ESRI Shapefile", "GPKG", "GeoJSON")
770
+ },
771
+ )
772
+ def test_write_supported(tmp_path, naturalearth_lowres, driver):
773
+ """Test drivers known to work that are not specifically tested above"""
774
+ meta, _, geometry, field_data = read(naturalearth_lowres, columns=["iso_a3"])
775
+
776
+ # note: naturalearth_lowres contains mixed polygons / multipolygons, which
777
+ # are not supported in mixed form for all drivers. To get around this here
778
+ # we take the first record only.
779
+ meta["geometry_type"] = "MultiPolygon"
780
+
781
+ filename = tmp_path / f"test{DRIVER_EXT[driver]}"
782
+ write(
783
+ filename,
784
+ geometry[:1],
785
+ field_data=[f[:1] for f in field_data],
786
+ driver=driver,
787
+ **meta,
788
+ )
789
+
790
+ assert filename.exists()
791
+
792
+
793
+ @pytest.mark.skipif(
794
+ __gdal_version__ >= (3, 6, 0), reason="OpenFileGDB supports write for GDAL >= 3.6.0"
795
+ )
796
+ def test_write_unsupported(tmp_path, naturalearth_lowres):
797
+ meta, _, geometry, field_data = read(naturalearth_lowres)
798
+
799
+ filename = tmp_path / "test.gdb"
800
+
801
+ with pytest.raises(DataSourceError, match="does not support write functionality"):
802
+ write(filename, geometry, field_data, driver="OpenFileGDB", **meta)
803
+
804
+
805
+ def test_write_gdalclose_error(naturalearth_lowres):
806
+ meta, _, geometry, field_data = read(naturalearth_lowres)
807
+
808
+ filename = "s3://non-existing-bucket/test.geojson"
809
+
810
+ # set config options to avoid errors on open due to GDAL S3 configuration
811
+ set_gdal_config_options(
812
+ {
813
+ "AWS_ACCESS_KEY_ID": "invalid",
814
+ "AWS_SECRET_ACCESS_KEY": "invalid",
815
+ "AWS_NO_SIGN_REQUEST": True,
816
+ }
817
+ )
818
+
819
+ with pytest.raises(DataSourceError, match="Failed to write features to dataset"):
820
+ write(filename, geometry, field_data, **meta)
821
+
822
+
823
+ def assert_equal_result(result1, result2):
824
+ meta1, index1, geometry1, field_data1 = result1
825
+ meta2, index2, geometry2, field_data2 = result2
826
+
827
+ assert np.array_equal(meta1["fields"], meta2["fields"])
828
+ assert np.array_equal(index1, index2)
829
+ assert all(np.array_equal(f1, f2) for f1, f2 in zip(field_data1, field_data2))
830
+
831
+ if HAS_SHAPELY:
832
+ # a plain `assert np.array_equal(geometry1, geometry2)` doesn't work
833
+ # because the WKB values are not exactly equal, therefore parsing with
834
+ # shapely to compare with tolerance
835
+ assert shapely.equals_exact(
836
+ shapely.from_wkb(geometry1), shapely.from_wkb(geometry2), tolerance=0.00001
837
+ ).all()
838
+
839
+
840
+ @pytest.mark.filterwarnings("ignore:File /vsimem:RuntimeWarning") # TODO
841
+ @pytest.mark.parametrize("driver,ext", [("GeoJSON", "geojson"), ("GPKG", "gpkg")])
842
+ def test_read_from_bytes(tmp_path, naturalearth_lowres, driver, ext):
843
+ meta, index, geometry, field_data = read(naturalearth_lowres)
844
+ meta.update({"geometry_type": "Unknown"})
845
+ filename = tmp_path / f"test.{ext}"
846
+ write(filename, geometry, field_data, driver=driver, **meta)
847
+
848
+ with open(filename, "rb") as f:
849
+ buffer = f.read()
850
+
851
+ result2 = read(buffer)
852
+ assert_equal_result((meta, index, geometry, field_data), result2)
853
+
854
+
855
+ def test_read_from_bytes_zipped(naturalearth_lowres_vsi):
856
+ path, vsi_path = naturalearth_lowres_vsi
857
+ meta, index, geometry, field_data = read(vsi_path)
858
+
859
+ with open(path, "rb") as f:
860
+ buffer = f.read()
861
+
862
+ result2 = read(buffer)
863
+ assert_equal_result((meta, index, geometry, field_data), result2)
864
+
865
+
866
+ @pytest.mark.filterwarnings("ignore:File /vsimem:RuntimeWarning") # TODO
867
+ @pytest.mark.parametrize("driver,ext", [("GeoJSON", "geojson"), ("GPKG", "gpkg")])
868
+ def test_read_from_file_like(tmp_path, naturalearth_lowres, driver, ext):
869
+ meta, index, geometry, field_data = read(naturalearth_lowres)
870
+ meta.update({"geometry_type": "Unknown"})
871
+ filename = tmp_path / f"test.{ext}"
872
+ write(filename, geometry, field_data, driver=driver, **meta)
873
+
874
+ with open(filename, "rb") as f:
875
+ result2 = read(f)
876
+
877
+ assert_equal_result((meta, index, geometry, field_data), result2)
878
+
879
+
880
+ def test_read_from_nonseekable_bytes(nonseekable_bytes):
881
+ meta, _, geometry, _ = read(nonseekable_bytes)
882
+ assert meta["fields"].shape == (0,)
883
+ assert len(geometry) == 1
884
+
885
+
886
+ @pytest.mark.parametrize("ext", ["gpkg", "fgb"])
887
+ def test_read_write_data_types_numeric(tmp_path, ext):
888
+ # Point(0, 0)
889
+ geometry = np.array(
890
+ [bytes.fromhex("010100000000000000000000000000000000000000")] * 3, dtype=object
891
+ )
892
+ field_data = [
893
+ np.array([True, False, True], dtype="bool"),
894
+ np.array([1, 2, 3], dtype="int16"),
895
+ np.array([1, 2, 3], dtype="int32"),
896
+ np.array([1, 2, 3], dtype="int64"),
897
+ np.array([1, 2, 3], dtype="float32"),
898
+ np.array([1, 2, 3], dtype="float64"),
899
+ ]
900
+ fields = ["bool", "int16", "int32", "int64", "float32", "float64"]
901
+ meta = {"geometry_type": "Point", "crs": "EPSG:4326", "spatial_index": False}
902
+
903
+ filename = tmp_path / f"test.{ext}"
904
+ write(filename, geometry, field_data, fields, **meta)
905
+ result = read(filename)[3]
906
+ assert all(np.array_equal(f1, f2) for f1, f2 in zip(result, field_data))
907
+ assert all(f1.dtype == f2.dtype for f1, f2 in zip(result, field_data))
908
+
909
+ # other integer data types that don't roundtrip exactly
910
+ # these are generally promoted to a larger integer type except for uint64
911
+ for i, (dtype, result_dtype) in enumerate(
912
+ [
913
+ ("int8", "int16"),
914
+ ("uint8", "int16"),
915
+ ("uint16", "int32"),
916
+ ("uint32", "int64"),
917
+ ("uint64", "int64"),
918
+ ]
919
+ ):
920
+ field_data = [np.array([1, 2, 3], dtype=dtype)]
921
+ filename = tmp_path / f"test{i}.{ext}"
922
+ write(filename, geometry, field_data, ["col"], **meta)
923
+ result = read(filename)[3][0]
924
+ assert np.array_equal(result, np.array([1, 2, 3]))
925
+ assert result.dtype == result_dtype
926
+
927
+
928
+ def test_read_write_datetime(tmp_path):
929
+ field_data = [
930
+ np.array(["2005-02-01", "2005-02-02"], dtype="datetime64[D]"),
931
+ np.array(["2001-01-01T12:00", "2002-02-03T13:56:03"], dtype="datetime64[s]"),
932
+ np.array(
933
+ ["2001-01-01T12:00", "2002-02-03T13:56:03.072"], dtype="datetime64[ms]"
934
+ ),
935
+ np.array(
936
+ ["2001-01-01T12:00", "2002-02-03T13:56:03.072"], dtype="datetime64[ns]"
937
+ ),
938
+ np.array(
939
+ ["2001-01-01T12:00", "2002-02-03T13:56:03.072123456"],
940
+ dtype="datetime64[ns]",
941
+ ),
942
+ # Remark: a None value is automatically converted to np.datetime64("NaT")
943
+ np.array([np.datetime64("NaT"), None], dtype="datetime64[ms]"),
944
+ ]
945
+ fields = [
946
+ "datetime64_d",
947
+ "datetime64_s",
948
+ "datetime64_ms",
949
+ "datetime64_ns",
950
+ "datetime64_precise_ns",
951
+ "datetime64_ms_nat",
952
+ ]
953
+
954
+ # Point(0, 0)
955
+ geometry = np.array(
956
+ [bytes.fromhex("010100000000000000000000000000000000000000")] * 2, dtype=object
957
+ )
958
+ meta = {"geometry_type": "Point", "crs": "EPSG:4326", "spatial_index": False}
959
+
960
+ filename = tmp_path / "test.gpkg"
961
+ write(filename, geometry, field_data, fields, **meta)
962
+ result = read(filename)[3]
963
+ for idx, field in enumerate(fields):
964
+ if field == "datetime64_precise_ns":
965
+ # gdal rounds datetimes to ms
966
+ assert np.array_equal(result[idx], field_data[idx].astype("datetime64[ms]"))
967
+ else:
968
+ assert np.array_equal(result[idx], field_data[idx], equal_nan=True)
969
+
970
+
971
+ @pytest.mark.parametrize("ext", ["gpkg", "fgb"])
972
+ def test_read_write_int64_large(tmp_path, ext):
973
+ # Test if value > max int32 is correctly written and read.
974
+ # Test introduced to validate https://github.com/geopandas/pyogrio/issues/259
975
+ # Point(0, 0)
976
+ geometry = np.array(
977
+ [bytes.fromhex("010100000000000000000000000000000000000000")] * 3, dtype=object
978
+ )
979
+ field_data = [np.array([1, 2192502720, -5], dtype="int64")]
980
+ fields = ["overflow_int64"]
981
+ meta = {"geometry_type": "Point", "crs": "EPSG:4326", "spatial_index": False}
982
+
983
+ filename = tmp_path / f"test.{ext}"
984
+ write(filename, geometry, field_data, fields, **meta)
985
+ result = read(filename)[3]
986
+ assert np.array_equal(result, field_data)
987
+ assert result[0].dtype == field_data[0].dtype
988
+
989
+
990
+ def test_read_data_types_numeric_with_null(test_gpkg_nulls):
991
+ fields = read(test_gpkg_nulls)[3]
992
+
993
+ for i, field in enumerate(fields):
994
+ # last value should be np.nan
995
+ assert np.isnan(field[-1])
996
+
997
+ # all integer fields should be cast to float64; float32 should be preserved
998
+ if i == 9:
999
+ assert field.dtype == "float32"
1000
+ else:
1001
+ assert field.dtype == "float64"
1002
+
1003
+
1004
+ def test_read_unsupported_types(list_field_values_file):
1005
+ fields = read(list_field_values_file)[3]
1006
+ # list field gets skipped, only integer field is read
1007
+ assert len(fields) == 1
1008
+
1009
+ fields = read(list_field_values_file, columns=["int64"])[3]
1010
+ assert len(fields) == 1
1011
+
1012
+
1013
+ def test_read_datetime_millisecond(datetime_file):
1014
+ field = read(datetime_file)[3][0]
1015
+ assert field.dtype == "datetime64[ms]"
1016
+ assert field[0] == np.datetime64("2020-01-01 09:00:00.123")
1017
+ assert field[1] == np.datetime64("2020-01-01 10:00:00.000")
1018
+
1019
+
1020
+ def test_read_unsupported_ext(tmp_path):
1021
+ test_unsupported_path = tmp_path / "test.unsupported"
1022
+ with open(test_unsupported_path, "w") as file:
1023
+ file.write("column1,column2\n")
1024
+ file.write("data1,data2")
1025
+
1026
+ with pytest.raises(
1027
+ DataSourceError, match=".* by prefixing the file path with '<DRIVER>:'.*"
1028
+ ):
1029
+ read(test_unsupported_path)
1030
+
1031
+
1032
+ def test_read_unsupported_ext_with_prefix(tmp_path):
1033
+ test_unsupported_path = tmp_path / "test.unsupported"
1034
+ with open(test_unsupported_path, "w") as file:
1035
+ file.write("column1,column2\n")
1036
+ file.write("data1,data2")
1037
+
1038
+ _, _, _, field_data = read(f"CSV:{test_unsupported_path}")
1039
+ assert len(field_data) == 2
1040
+ assert field_data[0] == "data1"
1041
+
1042
+
1043
+ def test_read_datetime_as_string(datetime_tz_file):
1044
+ field = read(datetime_tz_file)[3][0]
1045
+ assert field.dtype == "datetime64[ms]"
1046
+ # timezone is ignored in numpy layer
1047
+ assert field[0] == np.datetime64("2020-01-01 09:00:00.123")
1048
+ assert field[1] == np.datetime64("2020-01-01 10:00:00.000")
1049
+
1050
+ field = read(datetime_tz_file, datetime_as_string=True)[3][0]
1051
+ assert field.dtype == "object"
1052
+ # GDAL doesn't return strings in ISO format (yet)
1053
+ assert field[0] == "2020/01/01 09:00:00.123-05"
1054
+ assert field[1] == "2020/01/01 10:00:00-05"
1055
+
1056
+
1057
+ @pytest.mark.parametrize("ext", ["gpkg", "geojson"])
1058
+ def test_read_write_null_geometry(tmp_path, ext):
1059
+ # Point(0, 0), null
1060
+ geometry = np.array(
1061
+ [bytes.fromhex("010100000000000000000000000000000000000000"), None],
1062
+ dtype=object,
1063
+ )
1064
+ field_data = [np.array([1, 2], dtype="int32")]
1065
+ fields = ["col"]
1066
+ meta = {"geometry_type": "Point", "crs": "EPSG:4326"}
1067
+ if ext == "gpkg":
1068
+ meta["spatial_index"] = False
1069
+
1070
+ filename = tmp_path / f"test.{ext}"
1071
+ write(filename, geometry, field_data, fields, **meta)
1072
+ result_geometry, result_fields = read(filename)[2:]
1073
+ assert np.array_equal(result_geometry, geometry)
1074
+ assert np.array_equal(result_fields[0], field_data[0])
1075
+
1076
+
1077
+ @pytest.mark.parametrize("dtype", ["float32", "float64"])
1078
+ def test_write_float_nan_null(tmp_path, dtype):
1079
+ # Point(0, 0)
1080
+ geometry = np.array(
1081
+ [bytes.fromhex("010100000000000000000000000000000000000000")] * 2,
1082
+ dtype=object,
1083
+ )
1084
+ field_data = [np.array([1.5, np.nan], dtype=dtype)]
1085
+ fields = ["col"]
1086
+ meta = {"geometry_type": "Point", "crs": "EPSG:4326"}
1087
+ filename = tmp_path / "test.geojson"
1088
+
1089
+ # default nan_as_null=True
1090
+ write(filename, geometry, field_data, fields, **meta)
1091
+ with open(filename) as f:
1092
+ content = f.read()
1093
+ assert '{ "col": null }' in content
1094
+
1095
+ # set to False
1096
+ # by default, GDAL will skip the property for GeoJSON if the value is NaN
1097
+ if dtype == "float32":
1098
+ ctx = pytest.warns(RuntimeWarning, match="NaN of Infinity value found. Skipped")
1099
+ else:
1100
+ ctx = contextlib.nullcontext()
1101
+ with ctx:
1102
+ write(filename, geometry, field_data, fields, **meta, nan_as_null=False)
1103
+ with open(filename) as f:
1104
+ content = f.read()
1105
+ assert '"properties": { }' in content
1106
+
1107
+ # but can instruct GDAL to write NaN to json
1108
+ write(
1109
+ filename,
1110
+ geometry,
1111
+ field_data,
1112
+ fields,
1113
+ **meta,
1114
+ nan_as_null=False,
1115
+ WRITE_NON_FINITE_VALUES="YES",
1116
+ )
1117
+ with open(filename) as f:
1118
+ content = f.read()
1119
+ assert '{ "col": NaN }' in content
1120
+
1121
+
1122
+ @requires_pyarrow_api
1123
+ @pytest.mark.skipif(
1124
+ "Arrow" not in list_drivers(), reason="Arrow driver is not available"
1125
+ )
1126
+ def test_write_float_nan_null_arrow(tmp_path):
1127
+ import pyarrow.feather
1128
+
1129
+ # Point(0, 0)
1130
+ geometry = np.array(
1131
+ [bytes.fromhex("010100000000000000000000000000000000000000")] * 2,
1132
+ dtype=object,
1133
+ )
1134
+ field_data = [np.array([1.5, np.nan], dtype="float64")]
1135
+ fields = ["col"]
1136
+ meta = {"geometry_type": "Point", "crs": "EPSG:4326"}
1137
+ fname = tmp_path / "test.arrow"
1138
+
1139
+ # default nan_as_null=True
1140
+ write(fname, geometry, field_data, fields, driver="Arrow", **meta)
1141
+ table = pyarrow.feather.read_table(fname)
1142
+ assert table["col"].is_null().to_pylist() == [False, True]
1143
+
1144
+ # set to False
1145
+ write(
1146
+ fname, geometry, field_data, fields, driver="Arrow", nan_as_null=False, **meta
1147
+ )
1148
+ table = pyarrow.feather.read_table(fname)
1149
+ assert table["col"].is_null().to_pylist() == [False, False]
1150
+ pc = pytest.importorskip("pyarrow.compute")
1151
+ assert pc.is_nan(table["col"]).to_pylist() == [False, True]
1152
+
1153
+
1154
+ @pytest.mark.filterwarnings("ignore:File /vsimem:RuntimeWarning")
1155
+ @pytest.mark.parametrize("driver", ["GeoJSON", "GPKG"])
1156
+ def test_write_memory(naturalearth_lowres, driver):
1157
+ meta, _, geometry, field_data = read(naturalearth_lowres)
1158
+ meta.update({"geometry_type": "MultiPolygon"})
1159
+
1160
+ buffer = BytesIO()
1161
+ write(buffer, geometry, field_data, driver=driver, layer="test", **meta)
1162
+
1163
+ assert len(buffer.getbuffer()) > 0
1164
+ assert list_layers(buffer)[0][0] == "test"
1165
+
1166
+ actual_meta, _, actual_geometry, actual_field_data = read(buffer)
1167
+
1168
+ assert np.array_equal(actual_meta["fields"], meta["fields"])
1169
+ assert np.array_equal(actual_field_data, field_data)
1170
+ assert len(actual_geometry) == len(geometry)
1171
+
1172
+
1173
+ def test_write_memory_driver_required(naturalearth_lowres):
1174
+ meta, _, geometry, field_data = read(naturalearth_lowres)
1175
+
1176
+ buffer = BytesIO()
1177
+ with pytest.raises(
1178
+ ValueError,
1179
+ match="driver must be provided to write to in-memory file",
1180
+ ):
1181
+ write(buffer, geometry, field_data, driver=None, layer="test", **meta)
1182
+
1183
+
1184
+ @pytest.mark.parametrize("driver", ["ESRI Shapefile", "OpenFileGDB"])
1185
+ def test_write_memory_unsupported_driver(naturalearth_lowres, driver):
1186
+ if driver == "OpenFileGDB" and __gdal_version__ < (3, 6, 0):
1187
+ pytest.skip("OpenFileGDB write support only available for GDAL >= 3.6.0")
1188
+
1189
+ meta, _, geometry, field_data = read(naturalearth_lowres)
1190
+
1191
+ buffer = BytesIO()
1192
+
1193
+ with pytest.raises(
1194
+ ValueError, match=f"writing to in-memory file is not supported for {driver}"
1195
+ ):
1196
+ write(
1197
+ buffer,
1198
+ geometry,
1199
+ field_data,
1200
+ driver=driver,
1201
+ layer="test",
1202
+ append=True,
1203
+ **meta,
1204
+ )
1205
+
1206
+
1207
+ @pytest.mark.parametrize("driver", ["GeoJSON", "GPKG"])
1208
+ def test_write_memory_append_unsupported(naturalearth_lowres, driver):
1209
+ meta, _, geometry, field_data = read(naturalearth_lowres)
1210
+ meta.update({"geometry_type": "MultiPolygon"})
1211
+
1212
+ buffer = BytesIO()
1213
+
1214
+ with pytest.raises(
1215
+ NotImplementedError, match="append is not supported for in-memory files"
1216
+ ):
1217
+ write(
1218
+ buffer,
1219
+ geometry,
1220
+ field_data,
1221
+ driver=driver,
1222
+ layer="test",
1223
+ append=True,
1224
+ **meta,
1225
+ )
1226
+
1227
+
1228
+ def test_write_memory_existing_unsupported(naturalearth_lowres):
1229
+ meta, _, geometry, field_data = read(naturalearth_lowres)
1230
+
1231
+ buffer = BytesIO(b"0000")
1232
+ with pytest.raises(
1233
+ NotImplementedError,
1234
+ match="writing to existing in-memory object is not supported",
1235
+ ):
1236
+ write(buffer, geometry, field_data, driver="GeoJSON", layer="test", **meta)
1237
+
1238
+
1239
+ def test_write_open_file_handle(tmp_path, naturalearth_lowres):
1240
+ """Verify that writing to an open file handle is not currently supported"""
1241
+
1242
+ meta, _, geometry, field_data = read(naturalearth_lowres)
1243
+
1244
+ # verify it fails for regular file handle
1245
+ with pytest.raises(
1246
+ NotImplementedError, match="writing to an open file handle is not yet supported"
1247
+ ):
1248
+ with open(tmp_path / "test.geojson", "wb") as f:
1249
+ write(f, geometry, field_data, driver="GeoJSON", layer="test", **meta)
1250
+
1251
+ # verify it fails for ZipFile
1252
+ with pytest.raises(
1253
+ NotImplementedError, match="writing to an open file handle is not yet supported"
1254
+ ):
1255
+ with ZipFile(tmp_path / "test.geojson.zip", "w") as z:
1256
+ with z.open("test.geojson", "w") as f:
1257
+ write(f, geometry, field_data, driver="GeoJSON", layer="test", **meta)
1258
+
1259
+
1260
+ @pytest.mark.parametrize("ext", ["fgb", "gpkg", "geojson"])
1261
+ @pytest.mark.parametrize(
1262
+ "read_encoding,write_encoding",
1263
+ [
1264
+ pytest.param(
1265
+ None,
1266
+ None,
1267
+ marks=pytest.mark.skipif(
1268
+ sys.platform == "win32", reason="must specify write encoding on Windows"
1269
+ ),
1270
+ ),
1271
+ pytest.param(
1272
+ "UTF-8",
1273
+ None,
1274
+ marks=pytest.mark.skipif(
1275
+ sys.platform == "win32", reason="must specify write encoding on Windows"
1276
+ ),
1277
+ ),
1278
+ (None, "UTF-8"),
1279
+ ("UTF-8", "UTF-8"),
1280
+ ],
1281
+ )
1282
+ def test_encoding_io(tmp_path, ext, read_encoding, write_encoding):
1283
+ # Point(0, 0)
1284
+ geometry = np.array(
1285
+ [bytes.fromhex("010100000000000000000000000000000000000000")], dtype=object
1286
+ )
1287
+ arabic = "العربية"
1288
+ cree = "ᓀᐦᐃᔭᐍᐏᐣ"
1289
+ mandarin = "中文"
1290
+ field_data = [
1291
+ np.array([arabic], dtype=object),
1292
+ np.array([cree], dtype=object),
1293
+ np.array([mandarin], dtype=object),
1294
+ ]
1295
+ fields = [arabic, cree, mandarin]
1296
+ meta = {"geometry_type": "Point", "crs": "EPSG:4326", "encoding": write_encoding}
1297
+
1298
+ filename = tmp_path / f"test.{ext}"
1299
+ write(filename, geometry, field_data, fields, **meta)
1300
+
1301
+ actual_meta, _, _, actual_field_data = read(filename, encoding=read_encoding)
1302
+ assert np.array_equal(fields, actual_meta["fields"])
1303
+ assert np.array_equal(field_data, actual_field_data)
1304
+ assert np.array_equal(fields, read_info(filename, encoding=read_encoding)["fields"])
1305
+
1306
+
1307
+ @pytest.mark.parametrize(
1308
+ "read_encoding,write_encoding",
1309
+ [
1310
+ pytest.param(
1311
+ None,
1312
+ None,
1313
+ marks=pytest.mark.skipif(
1314
+ sys.platform == "win32", reason="must specify write encoding on Windows"
1315
+ ),
1316
+ ),
1317
+ pytest.param(
1318
+ "UTF-8",
1319
+ None,
1320
+ marks=pytest.mark.skipif(
1321
+ sys.platform == "win32", reason="must specify write encoding on Windows"
1322
+ ),
1323
+ ),
1324
+ (None, "UTF-8"),
1325
+ ("UTF-8", "UTF-8"),
1326
+ ],
1327
+ )
1328
+ def test_encoding_io_shapefile(tmp_path, read_encoding, write_encoding):
1329
+ # Point(0, 0)
1330
+ geometry = np.array(
1331
+ [bytes.fromhex("010100000000000000000000000000000000000000")], dtype=object
1332
+ )
1333
+ arabic = "العربية"
1334
+ cree = "ᓀᐦᐃᔭᐍᐏᐣ"
1335
+ mandarin = "中文"
1336
+ field_data = [
1337
+ np.array([arabic], dtype=object),
1338
+ np.array([cree], dtype=object),
1339
+ np.array([mandarin], dtype=object),
1340
+ ]
1341
+
1342
+ # Field names are longer than 10 bytes and get truncated badly (not at UTF-8
1343
+ # character level) by GDAL when output to shapefile, so we have to truncate
1344
+ # before writing
1345
+ fields = [arabic[:5], cree[:3], mandarin]
1346
+ meta = {"geometry_type": "Point", "crs": "EPSG:4326", "encoding": "UTF-8"}
1347
+
1348
+ filename = tmp_path / "test.shp"
1349
+ # NOTE: GDAL automatically creates a cpg file with the encoding name, which
1350
+ # means that if we read this without specifying the encoding it uses the
1351
+ # correct one
1352
+ write(filename, geometry, field_data, fields, **meta)
1353
+
1354
+ actual_meta, _, _, actual_field_data = read(filename, encoding=read_encoding)
1355
+ assert np.array_equal(fields, actual_meta["fields"])
1356
+ assert np.array_equal(field_data, actual_field_data)
1357
+ assert np.array_equal(fields, read_info(filename, encoding=read_encoding)["fields"])
1358
+
1359
+ # verify that if cpg file is not present, that user-provided encoding is used,
1360
+ # otherwise it defaults to ISO-8859-1
1361
+ if read_encoding is not None:
1362
+ filename.with_suffix(".cpg").unlink()
1363
+ actual_meta, _, _, actual_field_data = read(filename, encoding=read_encoding)
1364
+ assert np.array_equal(fields, actual_meta["fields"])
1365
+ assert np.array_equal(field_data, actual_field_data)
1366
+ assert np.array_equal(
1367
+ fields, read_info(filename, encoding=read_encoding)["fields"]
1368
+ )
1369
+
1370
+
1371
+ @pytest.mark.parametrize("ext", ["gpkg", "geojson"])
1372
+ def test_non_utf8_encoding_io(tmp_path, ext, encoded_text):
1373
+ """Verify that we write non-UTF data to the data source
1374
+
1375
+ IMPORTANT: this may not be valid for the data source and will likely render
1376
+ them unusable in other tools, but should successfully roundtrip unless we
1377
+ disable writing using other encodings.
1378
+
1379
+ NOTE: FlatGeobuff driver cannot handle non-UTF data in GDAL >= 3.9
1380
+ """
1381
+ encoding, text = encoded_text
1382
+
1383
+ # Point(0, 0)
1384
+ geometry = np.array(
1385
+ [bytes.fromhex("010100000000000000000000000000000000000000")], dtype=object
1386
+ )
1387
+
1388
+ field_data = [np.array([text], dtype=object)]
1389
+
1390
+ fields = [text]
1391
+ meta = {"geometry_type": "Point", "crs": "EPSG:4326", "encoding": encoding}
1392
+
1393
+ filename = tmp_path / f"test.{ext}"
1394
+ write(filename, geometry, field_data, fields, **meta)
1395
+
1396
+ # cannot open these files without specifying encoding
1397
+ with pytest.raises(UnicodeDecodeError):
1398
+ read(filename)
1399
+
1400
+ with pytest.raises(UnicodeDecodeError):
1401
+ read_info(filename)
1402
+
1403
+ # must provide encoding to read these properly
1404
+ actual_meta, _, _, actual_field_data = read(filename, encoding=encoding)
1405
+ assert actual_meta["fields"][0] == text
1406
+ assert actual_field_data[0] == text
1407
+ assert read_info(filename, encoding=encoding)["fields"][0] == text
1408
+
1409
+
1410
+ def test_non_utf8_encoding_io_shapefile(tmp_path, encoded_text):
1411
+ encoding, text = encoded_text
1412
+
1413
+ # Point(0, 0)
1414
+ geometry = np.array(
1415
+ [bytes.fromhex("010100000000000000000000000000000000000000")], dtype=object
1416
+ )
1417
+
1418
+ field_data = [np.array([text], dtype=object)]
1419
+
1420
+ fields = [text]
1421
+ meta = {"geometry_type": "Point", "crs": "EPSG:4326", "encoding": encoding}
1422
+
1423
+ filename = tmp_path / "test.shp"
1424
+ write(filename, geometry, field_data, fields, **meta)
1425
+
1426
+ # NOTE: GDAL automatically creates a cpg file with the encoding name, which
1427
+ # means that if we read this without specifying the encoding it uses the
1428
+ # correct one
1429
+ actual_meta, _, _, actual_field_data = read(filename)
1430
+ assert actual_meta["fields"][0] == text
1431
+ assert actual_field_data[0] == text
1432
+ assert read_info(filename)["fields"][0] == text
1433
+
1434
+ # verify that if cpg file is not present, that user-provided encoding must be used
1435
+ filename.with_suffix(".cpg").unlink()
1436
+
1437
+ # We will assume ISO-8859-1, which is wrong
1438
+ miscoded = text.encode(encoding).decode("ISO-8859-1")
1439
+ bad_meta, _, _, bad_field_data = read(filename)
1440
+ assert bad_meta["fields"][0] == miscoded
1441
+ assert bad_field_data[0] == miscoded
1442
+ assert read_info(filename)["fields"][0] == miscoded
1443
+
1444
+ # If encoding is provided, that should yield correct text
1445
+ actual_meta, _, _, actual_field_data = read(filename, encoding=encoding)
1446
+ assert actual_meta["fields"][0] == text
1447
+ assert actual_field_data[0] == text
1448
+ assert read_info(filename, encoding=encoding)["fields"][0] == text
1449
+
1450
+ # verify that setting encoding does not corrupt SHAPE_ENCODING option if set
1451
+ # globally (it is ignored during read when encoding is specified by user)
1452
+ try:
1453
+ set_gdal_config_options({"SHAPE_ENCODING": "CP1254"})
1454
+ _ = read(filename, encoding=encoding)
1455
+ assert get_gdal_config_option("SHAPE_ENCODING") == "CP1254"
1456
+
1457
+ finally:
1458
+ # reset to clear between tests
1459
+ set_gdal_config_options({"SHAPE_ENCODING": None})
1460
+
1461
+
1462
+ def test_write_with_mask(tmp_path):
1463
+ # Point(0, 0), null
1464
+ geometry = np.array(
1465
+ [bytes.fromhex("010100000000000000000000000000000000000000")] * 3,
1466
+ dtype=object,
1467
+ )
1468
+ field_data = [np.array([1, 2, 3], dtype="int32")]
1469
+ field_mask = [np.array([False, True, False])]
1470
+ fields = ["col"]
1471
+ meta = {"geometry_type": "Point", "crs": "EPSG:4326"}
1472
+
1473
+ filename = tmp_path / "test.geojson"
1474
+ write(filename, geometry, field_data, fields, field_mask, **meta)
1475
+ result_geometry, result_fields = read(filename)[2:]
1476
+ assert np.array_equal(result_geometry, geometry)
1477
+ np.testing.assert_allclose(result_fields[0], np.array([1, np.nan, 3]))
1478
+
1479
+ # wrong length for mask
1480
+ field_mask = [np.array([False, True])]
1481
+ with pytest.raises(ValueError):
1482
+ write(filename, geometry, field_data, fields, field_mask, **meta)
1483
+
1484
+ # wrong number of mask arrays
1485
+ field_mask = [np.array([False, True, False])] * 2
1486
+ with pytest.raises(ValueError):
1487
+ write(filename, geometry, field_data, fields, field_mask, **meta)
1488
+
1489
+
1490
+ @requires_arrow_api
1491
+ def test_open_arrow_capsule_protocol_without_pyarrow(naturalearth_lowres):
1492
+ # this test is included here instead of test_arrow.py to ensure we also run
1493
+ # it when pyarrow is not installed
1494
+
1495
+ with open_arrow(naturalearth_lowres) as (meta, reader):
1496
+ assert isinstance(meta, dict)
1497
+ assert isinstance(reader, pyogrio._io._ArrowStream)
1498
+ capsule = reader.__arrow_c_stream__()
1499
+ assert (
1500
+ ctypes.pythonapi.PyCapsule_IsValid(
1501
+ ctypes.py_object(capsule), b"arrow_array_stream"
1502
+ )
1503
+ == 1
1504
+ )
1505
+
1506
+
1507
+ @pytest.mark.skipif(HAS_PYARROW, reason="pyarrow is installed")
1508
+ @requires_arrow_api
1509
+ def test_open_arrow_error_no_pyarrow(naturalearth_lowres):
1510
+ # this test is included here instead of test_arrow.py to ensure we run
1511
+ # it when pyarrow is not installed
1512
+
1513
+ with pytest.raises(ImportError):
1514
+ with open_arrow(naturalearth_lowres, use_pyarrow=True) as _:
1515
+ pass