pyogrio 0.9.0__cp38-cp38-macosx_12_0_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyogrio might be problematic. Click here for more details.

Files changed (239) hide show
  1. pyogrio/.dylibs/libgdal.34.3.8.5.dylib +0 -0
  2. pyogrio/__init__.py +48 -0
  3. pyogrio/_compat.py +41 -0
  4. pyogrio/_env.py +61 -0
  5. pyogrio/_err.cpython-38-darwin.so +0 -0
  6. pyogrio/_err.pxd +4 -0
  7. pyogrio/_err.pyx +250 -0
  8. pyogrio/_geometry.cpython-38-darwin.so +0 -0
  9. pyogrio/_geometry.pxd +4 -0
  10. pyogrio/_geometry.pyx +129 -0
  11. pyogrio/_io.cpython-38-darwin.so +0 -0
  12. pyogrio/_io.pxd +0 -0
  13. pyogrio/_io.pyx +2742 -0
  14. pyogrio/_ogr.cpython-38-darwin.so +0 -0
  15. pyogrio/_ogr.pxd +444 -0
  16. pyogrio/_ogr.pyx +346 -0
  17. pyogrio/_version.py +21 -0
  18. pyogrio/_vsi.cpython-38-darwin.so +0 -0
  19. pyogrio/_vsi.pxd +4 -0
  20. pyogrio/_vsi.pyx +140 -0
  21. pyogrio/arrow_bridge.h +115 -0
  22. pyogrio/core.py +320 -0
  23. pyogrio/errors.py +32 -0
  24. pyogrio/gdal_data/GDAL-targets-release.cmake +19 -0
  25. pyogrio/gdal_data/GDAL-targets.cmake +105 -0
  26. pyogrio/gdal_data/GDALConfig.cmake +25 -0
  27. pyogrio/gdal_data/GDALConfigVersion.cmake +85 -0
  28. pyogrio/gdal_data/GDALLogoBW.svg +138 -0
  29. pyogrio/gdal_data/GDALLogoColor.svg +126 -0
  30. pyogrio/gdal_data/GDALLogoGS.svg +126 -0
  31. pyogrio/gdal_data/LICENSE.TXT +467 -0
  32. pyogrio/gdal_data/bag_template.xml +201 -0
  33. pyogrio/gdal_data/copyright +467 -0
  34. pyogrio/gdal_data/cubewerx_extra.wkt +48 -0
  35. pyogrio/gdal_data/default.rsc +0 -0
  36. pyogrio/gdal_data/ecw_cs.wkt +1453 -0
  37. pyogrio/gdal_data/eedaconf.json +23 -0
  38. pyogrio/gdal_data/epsg.wkt +1 -0
  39. pyogrio/gdal_data/esri_StatePlane_extra.wkt +631 -0
  40. pyogrio/gdal_data/gdalicon.png +0 -0
  41. pyogrio/gdal_data/gdalinfo_output.schema.json +346 -0
  42. pyogrio/gdal_data/gdalmdiminfo_output.schema.json +321 -0
  43. pyogrio/gdal_data/gdalvrt.xsd +772 -0
  44. pyogrio/gdal_data/gfs.xsd +246 -0
  45. pyogrio/gdal_data/gml_registry.xml +117 -0
  46. pyogrio/gdal_data/gml_registry.xsd +66 -0
  47. pyogrio/gdal_data/gmlasconf.xml +169 -0
  48. pyogrio/gdal_data/gmlasconf.xsd +1066 -0
  49. pyogrio/gdal_data/grib2_center.csv +251 -0
  50. pyogrio/gdal_data/grib2_process.csv +102 -0
  51. pyogrio/gdal_data/grib2_subcenter.csv +63 -0
  52. pyogrio/gdal_data/grib2_table_4_2_0_0.csv +261 -0
  53. pyogrio/gdal_data/grib2_table_4_2_0_1.csv +261 -0
  54. pyogrio/gdal_data/grib2_table_4_2_0_13.csv +261 -0
  55. pyogrio/gdal_data/grib2_table_4_2_0_14.csv +261 -0
  56. pyogrio/gdal_data/grib2_table_4_2_0_15.csv +261 -0
  57. pyogrio/gdal_data/grib2_table_4_2_0_16.csv +261 -0
  58. pyogrio/gdal_data/grib2_table_4_2_0_17.csv +11 -0
  59. pyogrio/gdal_data/grib2_table_4_2_0_18.csv +261 -0
  60. pyogrio/gdal_data/grib2_table_4_2_0_19.csv +261 -0
  61. pyogrio/gdal_data/grib2_table_4_2_0_190.csv +261 -0
  62. pyogrio/gdal_data/grib2_table_4_2_0_191.csv +261 -0
  63. pyogrio/gdal_data/grib2_table_4_2_0_2.csv +261 -0
  64. pyogrio/gdal_data/grib2_table_4_2_0_20.csv +261 -0
  65. pyogrio/gdal_data/grib2_table_4_2_0_21.csv +261 -0
  66. pyogrio/gdal_data/grib2_table_4_2_0_3.csv +261 -0
  67. pyogrio/gdal_data/grib2_table_4_2_0_4.csv +261 -0
  68. pyogrio/gdal_data/grib2_table_4_2_0_5.csv +261 -0
  69. pyogrio/gdal_data/grib2_table_4_2_0_6.csv +261 -0
  70. pyogrio/gdal_data/grib2_table_4_2_0_7.csv +261 -0
  71. pyogrio/gdal_data/grib2_table_4_2_10_0.csv +261 -0
  72. pyogrio/gdal_data/grib2_table_4_2_10_1.csv +261 -0
  73. pyogrio/gdal_data/grib2_table_4_2_10_191.csv +261 -0
  74. pyogrio/gdal_data/grib2_table_4_2_10_2.csv +261 -0
  75. pyogrio/gdal_data/grib2_table_4_2_10_3.csv +261 -0
  76. pyogrio/gdal_data/grib2_table_4_2_10_4.csv +261 -0
  77. pyogrio/gdal_data/grib2_table_4_2_1_0.csv +261 -0
  78. pyogrio/gdal_data/grib2_table_4_2_1_1.csv +261 -0
  79. pyogrio/gdal_data/grib2_table_4_2_1_2.csv +261 -0
  80. pyogrio/gdal_data/grib2_table_4_2_20_0.csv +261 -0
  81. pyogrio/gdal_data/grib2_table_4_2_20_1.csv +261 -0
  82. pyogrio/gdal_data/grib2_table_4_2_20_2.csv +261 -0
  83. pyogrio/gdal_data/grib2_table_4_2_2_0.csv +261 -0
  84. pyogrio/gdal_data/grib2_table_4_2_2_3.csv +261 -0
  85. pyogrio/gdal_data/grib2_table_4_2_2_4.csv +261 -0
  86. pyogrio/gdal_data/grib2_table_4_2_2_5.csv +261 -0
  87. pyogrio/gdal_data/grib2_table_4_2_2_6.csv +261 -0
  88. pyogrio/gdal_data/grib2_table_4_2_3_0.csv +261 -0
  89. pyogrio/gdal_data/grib2_table_4_2_3_1.csv +261 -0
  90. pyogrio/gdal_data/grib2_table_4_2_3_2.csv +28 -0
  91. pyogrio/gdal_data/grib2_table_4_2_3_3.csv +8 -0
  92. pyogrio/gdal_data/grib2_table_4_2_3_4.csv +14 -0
  93. pyogrio/gdal_data/grib2_table_4_2_3_5.csv +11 -0
  94. pyogrio/gdal_data/grib2_table_4_2_3_6.csv +11 -0
  95. pyogrio/gdal_data/grib2_table_4_2_4_0.csv +261 -0
  96. pyogrio/gdal_data/grib2_table_4_2_4_1.csv +261 -0
  97. pyogrio/gdal_data/grib2_table_4_2_4_10.csv +261 -0
  98. pyogrio/gdal_data/grib2_table_4_2_4_2.csv +261 -0
  99. pyogrio/gdal_data/grib2_table_4_2_4_3.csv +261 -0
  100. pyogrio/gdal_data/grib2_table_4_2_4_4.csv +261 -0
  101. pyogrio/gdal_data/grib2_table_4_2_4_5.csv +261 -0
  102. pyogrio/gdal_data/grib2_table_4_2_4_6.csv +261 -0
  103. pyogrio/gdal_data/grib2_table_4_2_4_7.csv +261 -0
  104. pyogrio/gdal_data/grib2_table_4_2_4_8.csv +261 -0
  105. pyogrio/gdal_data/grib2_table_4_2_4_9.csv +261 -0
  106. pyogrio/gdal_data/grib2_table_4_2_local_Canada.csv +5 -0
  107. pyogrio/gdal_data/grib2_table_4_2_local_HPC.csv +2 -0
  108. pyogrio/gdal_data/grib2_table_4_2_local_MRMS.csv +175 -0
  109. pyogrio/gdal_data/grib2_table_4_2_local_NCEP.csv +401 -0
  110. pyogrio/gdal_data/grib2_table_4_2_local_NDFD.csv +38 -0
  111. pyogrio/gdal_data/grib2_table_4_2_local_index.csv +7 -0
  112. pyogrio/gdal_data/grib2_table_4_5.csv +261 -0
  113. pyogrio/gdal_data/grib2_table_versions.csv +3 -0
  114. pyogrio/gdal_data/gt_datum.csv +229 -0
  115. pyogrio/gdal_data/gt_ellips.csv +24 -0
  116. pyogrio/gdal_data/header.dxf +1124 -0
  117. pyogrio/gdal_data/inspire_cp_BasicPropertyUnit.gfs +57 -0
  118. pyogrio/gdal_data/inspire_cp_CadastralBoundary.gfs +60 -0
  119. pyogrio/gdal_data/inspire_cp_CadastralParcel.gfs +81 -0
  120. pyogrio/gdal_data/inspire_cp_CadastralZoning.gfs +161 -0
  121. pyogrio/gdal_data/jpfgdgml_AdmArea.gfs +59 -0
  122. pyogrio/gdal_data/jpfgdgml_AdmBdry.gfs +49 -0
  123. pyogrio/gdal_data/jpfgdgml_AdmPt.gfs +59 -0
  124. pyogrio/gdal_data/jpfgdgml_BldA.gfs +54 -0
  125. pyogrio/gdal_data/jpfgdgml_BldL.gfs +54 -0
  126. pyogrio/gdal_data/jpfgdgml_Cntr.gfs +54 -0
  127. pyogrio/gdal_data/jpfgdgml_CommBdry.gfs +49 -0
  128. pyogrio/gdal_data/jpfgdgml_CommPt.gfs +59 -0
  129. pyogrio/gdal_data/jpfgdgml_Cstline.gfs +54 -0
  130. pyogrio/gdal_data/jpfgdgml_ElevPt.gfs +54 -0
  131. pyogrio/gdal_data/jpfgdgml_GCP.gfs +94 -0
  132. pyogrio/gdal_data/jpfgdgml_LeveeEdge.gfs +49 -0
  133. pyogrio/gdal_data/jpfgdgml_RailCL.gfs +54 -0
  134. pyogrio/gdal_data/jpfgdgml_RdASL.gfs +44 -0
  135. pyogrio/gdal_data/jpfgdgml_RdArea.gfs +54 -0
  136. pyogrio/gdal_data/jpfgdgml_RdCompt.gfs +59 -0
  137. pyogrio/gdal_data/jpfgdgml_RdEdg.gfs +59 -0
  138. pyogrio/gdal_data/jpfgdgml_RdMgtBdry.gfs +49 -0
  139. pyogrio/gdal_data/jpfgdgml_RdSgmtA.gfs +59 -0
  140. pyogrio/gdal_data/jpfgdgml_RvrMgtBdry.gfs +49 -0
  141. pyogrio/gdal_data/jpfgdgml_SBAPt.gfs +49 -0
  142. pyogrio/gdal_data/jpfgdgml_SBArea.gfs +54 -0
  143. pyogrio/gdal_data/jpfgdgml_SBBdry.gfs +44 -0
  144. pyogrio/gdal_data/jpfgdgml_WA.gfs +54 -0
  145. pyogrio/gdal_data/jpfgdgml_WL.gfs +54 -0
  146. pyogrio/gdal_data/jpfgdgml_WStrA.gfs +54 -0
  147. pyogrio/gdal_data/jpfgdgml_WStrL.gfs +54 -0
  148. pyogrio/gdal_data/netcdf_config.xsd +143 -0
  149. pyogrio/gdal_data/nitf_spec.xml +3306 -0
  150. pyogrio/gdal_data/nitf_spec.xsd +189 -0
  151. pyogrio/gdal_data/ogrinfo_output.schema.json +505 -0
  152. pyogrio/gdal_data/ogrvrt.xsd +543 -0
  153. pyogrio/gdal_data/osmconf.ini +132 -0
  154. pyogrio/gdal_data/ozi_datum.csv +131 -0
  155. pyogrio/gdal_data/ozi_ellips.csv +35 -0
  156. pyogrio/gdal_data/pci_datum.txt +463 -0
  157. pyogrio/gdal_data/pci_ellips.txt +77 -0
  158. pyogrio/gdal_data/pdfcomposition.xsd +721 -0
  159. pyogrio/gdal_data/pds4_template.xml +65 -0
  160. pyogrio/gdal_data/plscenesconf.json +1985 -0
  161. pyogrio/gdal_data/ruian_vf_ob_v1.gfs +1455 -0
  162. pyogrio/gdal_data/ruian_vf_st_uvoh_v1.gfs +86 -0
  163. pyogrio/gdal_data/ruian_vf_st_v1.gfs +1489 -0
  164. pyogrio/gdal_data/ruian_vf_v1.gfs +2126 -0
  165. pyogrio/gdal_data/s57agencies.csv +249 -0
  166. pyogrio/gdal_data/s57attributes.csv +484 -0
  167. pyogrio/gdal_data/s57expectedinput.csv +1008 -0
  168. pyogrio/gdal_data/s57objectclasses.csv +287 -0
  169. pyogrio/gdal_data/seed_2d.dgn +0 -0
  170. pyogrio/gdal_data/seed_3d.dgn +0 -0
  171. pyogrio/gdal_data/stateplane.csv +259 -0
  172. pyogrio/gdal_data/template_tiles.mapml +28 -0
  173. pyogrio/gdal_data/tms_LINZAntarticaMapTileGrid.json +190 -0
  174. pyogrio/gdal_data/tms_MapML_APSTILE.json +268 -0
  175. pyogrio/gdal_data/tms_MapML_CBMTILE.json +346 -0
  176. pyogrio/gdal_data/tms_NZTM2000.json +243 -0
  177. pyogrio/gdal_data/trailer.dxf +434 -0
  178. pyogrio/gdal_data/usage +4 -0
  179. pyogrio/gdal_data/vcpkg-cmake-wrapper.cmake +23 -0
  180. pyogrio/gdal_data/vcpkg.spdx.json +264 -0
  181. pyogrio/gdal_data/vcpkg_abi_info.txt +41 -0
  182. pyogrio/gdal_data/vdv452.xml +367 -0
  183. pyogrio/gdal_data/vdv452.xsd +63 -0
  184. pyogrio/gdal_data/vicar.json +164 -0
  185. pyogrio/geopandas.py +675 -0
  186. pyogrio/proj_data/CH +22 -0
  187. pyogrio/proj_data/GL27 +23 -0
  188. pyogrio/proj_data/ITRF2000 +24 -0
  189. pyogrio/proj_data/ITRF2008 +94 -0
  190. pyogrio/proj_data/ITRF2014 +55 -0
  191. pyogrio/proj_data/copyright +34 -0
  192. pyogrio/proj_data/deformation_model.schema.json +582 -0
  193. pyogrio/proj_data/nad.lst +142 -0
  194. pyogrio/proj_data/nad27 +810 -0
  195. pyogrio/proj_data/nad83 +745 -0
  196. pyogrio/proj_data/other.extra +53 -0
  197. pyogrio/proj_data/proj-config-version.cmake +44 -0
  198. pyogrio/proj_data/proj-config.cmake +79 -0
  199. pyogrio/proj_data/proj-targets-release.cmake +19 -0
  200. pyogrio/proj_data/proj-targets.cmake +107 -0
  201. pyogrio/proj_data/proj.db +0 -0
  202. pyogrio/proj_data/proj.ini +51 -0
  203. pyogrio/proj_data/proj4-targets-release.cmake +19 -0
  204. pyogrio/proj_data/proj4-targets.cmake +107 -0
  205. pyogrio/proj_data/projjson.schema.json +1174 -0
  206. pyogrio/proj_data/triangulation.schema.json +214 -0
  207. pyogrio/proj_data/usage +4 -0
  208. pyogrio/proj_data/vcpkg.spdx.json +198 -0
  209. pyogrio/proj_data/vcpkg_abi_info.txt +27 -0
  210. pyogrio/proj_data/world +214 -0
  211. pyogrio/raw.py +871 -0
  212. pyogrio/tests/__init__.py +0 -0
  213. pyogrio/tests/conftest.py +204 -0
  214. pyogrio/tests/fixtures/README.md +89 -0
  215. pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.cpg +1 -0
  216. pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.dbf +0 -0
  217. pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.prj +1 -0
  218. pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.shp +0 -0
  219. pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.shx +0 -0
  220. pyogrio/tests/fixtures/poly_not_enough_points.shp.zip +0 -0
  221. pyogrio/tests/fixtures/sample.osm.pbf +0 -0
  222. pyogrio/tests/fixtures/test_datetime.geojson +7 -0
  223. pyogrio/tests/fixtures/test_datetime_tz.geojson +8 -0
  224. pyogrio/tests/fixtures/test_fgdb.gdb.zip +0 -0
  225. pyogrio/tests/fixtures/test_gpkg_nulls.gpkg +0 -0
  226. pyogrio/tests/fixtures/test_multisurface.gpkg +0 -0
  227. pyogrio/tests/fixtures/test_nested.geojson +18 -0
  228. pyogrio/tests/fixtures/test_ogr_types_list.geojson +12 -0
  229. pyogrio/tests/test_arrow.py +1041 -0
  230. pyogrio/tests/test_core.py +588 -0
  231. pyogrio/tests/test_geopandas_io.py +2174 -0
  232. pyogrio/tests/test_path.py +352 -0
  233. pyogrio/tests/test_raw_io.py +1404 -0
  234. pyogrio/util.py +223 -0
  235. pyogrio-0.9.0.dist-info/LICENSE +21 -0
  236. pyogrio-0.9.0.dist-info/METADATA +100 -0
  237. pyogrio-0.9.0.dist-info/RECORD +239 -0
  238. pyogrio-0.9.0.dist-info/WHEEL +5 -0
  239. pyogrio-0.9.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1404 @@
1
+ import contextlib
2
+ import ctypes
3
+ from io import BytesIO
4
+ import json
5
+ import sys
6
+
7
+ import numpy as np
8
+ from numpy import array_equal
9
+ import pytest
10
+
11
+ import pyogrio
12
+ from pyogrio import (
13
+ list_layers,
14
+ list_drivers,
15
+ read_info,
16
+ set_gdal_config_options,
17
+ get_gdal_config_option,
18
+ __gdal_version__,
19
+ )
20
+ from pyogrio._compat import HAS_SHAPELY, HAS_PYARROW
21
+ from pyogrio.raw import read, write, open_arrow
22
+ from pyogrio.errors import DataSourceError, DataLayerError, FeatureError
23
+ from pyogrio.tests.conftest import (
24
+ DRIVERS,
25
+ DRIVER_EXT,
26
+ prepare_testfile,
27
+ requires_pyarrow_api,
28
+ requires_arrow_api,
29
+ )
30
+
31
+ try:
32
+ import shapely
33
+ except ImportError:
34
+ pass
35
+
36
+
37
+ def test_read(naturalearth_lowres):
38
+ meta, _, geometry, fields = read(naturalearth_lowres)
39
+
40
+ assert meta["crs"] == "EPSG:4326"
41
+ assert meta["geometry_type"] == "Polygon"
42
+ assert meta["encoding"] == "UTF-8"
43
+ assert meta["fields"].shape == (5,)
44
+
45
+ assert meta["fields"].tolist() == [
46
+ "pop_est",
47
+ "continent",
48
+ "name",
49
+ "iso_a3",
50
+ "gdp_md_est",
51
+ ]
52
+
53
+ assert len(fields) == 5
54
+ assert len(geometry) == len(fields[0])
55
+
56
+ # quick test that WKB is a Polygon type
57
+ assert geometry[0][:6] == b"\x01\x06\x00\x00\x00\x03"
58
+
59
+
60
+ @pytest.mark.parametrize("ext", DRIVERS)
61
+ def test_read_autodetect_driver(tmp_path, naturalearth_lowres, ext):
62
+ # Test all supported autodetect drivers
63
+ testfile = prepare_testfile(naturalearth_lowres, dst_dir=tmp_path, ext=ext)
64
+
65
+ assert testfile.suffix == ext
66
+ assert testfile.exists()
67
+ meta, _, geometry, fields = read(testfile)
68
+
69
+ assert meta["crs"] == "EPSG:4326"
70
+ assert meta["geometry_type"] in ("MultiPolygon", "Polygon", "Unknown")
71
+ assert meta["encoding"] == "UTF-8"
72
+ assert meta["fields"].shape == (5,)
73
+
74
+ assert meta["fields"].tolist() == [
75
+ "pop_est",
76
+ "continent",
77
+ "name",
78
+ "iso_a3",
79
+ "gdp_md_est",
80
+ ]
81
+
82
+ assert len(fields) == 5
83
+ assert len(geometry) == len(fields[0])
84
+
85
+
86
+ def test_read_arrow_unspecified_layer_warning(data_dir):
87
+ """Reading a multi-layer file without specifying a layer gives a warning."""
88
+ with pytest.warns(UserWarning, match="More than one layer found "):
89
+ read(data_dir / "sample.osm.pbf")
90
+
91
+
92
+ def test_read_invalid_layer(naturalearth_lowres):
93
+ with pytest.raises(DataLayerError, match="Layer 'invalid' could not be opened"):
94
+ read(naturalearth_lowres, layer="invalid")
95
+
96
+ with pytest.raises(DataLayerError, match="Layer '-1' could not be opened"):
97
+ read(naturalearth_lowres, layer=-1)
98
+
99
+ with pytest.raises(DataLayerError, match="Layer '2' could not be opened"):
100
+ read(naturalearth_lowres, layer=2)
101
+
102
+
103
+ def test_vsi_read_layers(naturalearth_lowres_vsi):
104
+ _, naturalearth_lowres_vsi = naturalearth_lowres_vsi
105
+ assert array_equal(
106
+ list_layers(naturalearth_lowres_vsi), [["naturalearth_lowres", "Polygon"]]
107
+ )
108
+
109
+ geometry = read(naturalearth_lowres_vsi)[2]
110
+ assert geometry.shape == (177,)
111
+
112
+
113
+ def test_read_no_geometry(naturalearth_lowres):
114
+ geometry = read(naturalearth_lowres, read_geometry=False)[2]
115
+
116
+ assert geometry is None
117
+
118
+
119
+ def test_read_no_geometry_no_columns_no_fids(naturalearth_lowres):
120
+ with pytest.raises(
121
+ ValueError,
122
+ match=(
123
+ "at least one of read_geometry or return_fids must be True or columns must "
124
+ "be None or non-empty"
125
+ ),
126
+ ):
127
+ _ = read(
128
+ naturalearth_lowres, columns=[], read_geometry=False, return_fids=False
129
+ )
130
+
131
+
132
+ def test_read_columns(naturalearth_lowres):
133
+ columns = ["NAME", "NAME_LONG"]
134
+ meta, _, geometry, fields = read(
135
+ naturalearth_lowres, columns=columns, read_geometry=False
136
+ )
137
+ array_equal(meta["fields"], columns)
138
+
139
+ # Repeats should be dropped
140
+ columns = ["NAME", "NAME_LONG", "NAME"]
141
+ meta, _, geometry, fields = read(
142
+ naturalearth_lowres, columns=columns, read_geometry=False
143
+ )
144
+ array_equal(meta["fields"], columns[:2])
145
+
146
+
147
+ @pytest.mark.parametrize("skip_features", [10, 200])
148
+ def test_read_skip_features(naturalearth_lowres_all_ext, skip_features):
149
+ expected_geometry, expected_fields = read(naturalearth_lowres_all_ext)[2:]
150
+ geometry, fields = read(naturalearth_lowres_all_ext, skip_features=skip_features)[
151
+ 2:
152
+ ]
153
+
154
+ # skipping more features than available in layer returns empty arrays
155
+ expected_count = max(len(expected_geometry) - skip_features, 0)
156
+
157
+ assert len(geometry) == expected_count
158
+ assert len(fields[0]) == expected_count
159
+
160
+ assert np.array_equal(geometry, expected_geometry[skip_features:])
161
+ # Last field has more variable data
162
+ assert np.array_equal(fields[-1], expected_fields[-1][skip_features:])
163
+
164
+
165
+ def test_read_negative_skip_features(naturalearth_lowres):
166
+ with pytest.raises(ValueError, match="'skip_features' must be >= 0"):
167
+ read(naturalearth_lowres, skip_features=-1)
168
+
169
+
170
+ def test_read_max_features(naturalearth_lowres):
171
+ expected_geometry, expected_fields = read(naturalearth_lowres)[2:]
172
+ geometry, fields = read(naturalearth_lowres, max_features=2)[2:]
173
+
174
+ assert len(geometry) == 2
175
+ assert len(fields[0]) == 2
176
+
177
+ assert np.array_equal(geometry, expected_geometry[:2])
178
+ assert np.array_equal(fields[-1], expected_fields[-1][:2])
179
+
180
+
181
+ def test_read_negative_max_features(naturalearth_lowres):
182
+ with pytest.raises(ValueError, match="'max_features' must be >= 0"):
183
+ read(naturalearth_lowres, max_features=-1)
184
+
185
+
186
+ def test_read_where(naturalearth_lowres):
187
+ # empty filter should return full set of records
188
+ geometry, fields = read(naturalearth_lowres, where="")[2:]
189
+ assert len(geometry) == 177
190
+ assert len(fields) == 5
191
+ assert len(fields[0]) == 177
192
+
193
+ # should return singular item
194
+ geometry, fields = read(naturalearth_lowres, where="iso_a3 = 'CAN'")[2:]
195
+ assert len(geometry) == 1
196
+ assert len(fields) == 5
197
+ assert len(fields[0]) == 1
198
+ assert fields[3] == "CAN"
199
+
200
+ # should return items within range
201
+ geometry, fields = read(
202
+ naturalearth_lowres, where="POP_EST >= 10000000 AND POP_EST < 100000000"
203
+ )[2:]
204
+ assert len(geometry) == 75
205
+ assert min(fields[0]) >= 10000000
206
+ assert max(fields[0]) < 100000000
207
+
208
+ # should match no items
209
+ geometry, fields = read(naturalearth_lowres, where="iso_a3 = 'INVALID'")[2:]
210
+ assert len(geometry) == 0
211
+
212
+
213
+ def test_read_where_invalid(naturalearth_lowres):
214
+ with pytest.raises(ValueError, match="Invalid SQL"):
215
+ read(naturalearth_lowres, where="invalid")
216
+
217
+
218
+ @pytest.mark.parametrize("bbox", [(1,), (1, 2), (1, 2, 3)])
219
+ def test_read_bbox_invalid(naturalearth_lowres, bbox):
220
+ with pytest.raises(ValueError, match="Invalid bbox"):
221
+ read(naturalearth_lowres, bbox=bbox)
222
+
223
+
224
+ def test_read_bbox(naturalearth_lowres_all_ext):
225
+ # should return no features
226
+ geometry, fields = read(naturalearth_lowres_all_ext, bbox=(0, 0, 0.00001, 0.00001))[
227
+ 2:
228
+ ]
229
+
230
+ assert len(geometry) == 0
231
+
232
+ geometry, fields = read(naturalearth_lowres_all_ext, bbox=(-85, 8, -80, 10))[2:]
233
+
234
+ assert len(geometry) == 2
235
+ assert np.array_equal(fields[3], ["PAN", "CRI"])
236
+
237
+
238
+ def test_read_bbox_sql(naturalearth_lowres_all_ext):
239
+ fields = read(
240
+ naturalearth_lowres_all_ext,
241
+ bbox=(-180, 50, -100, 90),
242
+ sql="SELECT * from naturalearth_lowres where iso_a3 not in ('USA', 'RUS')",
243
+ )[3]
244
+ assert len(fields[3]) == 1
245
+ assert np.array_equal(fields[3], ["CAN"])
246
+
247
+
248
+ def test_read_bbox_where(naturalearth_lowres_all_ext):
249
+ fields = read(
250
+ naturalearth_lowres_all_ext,
251
+ bbox=(-180, 50, -100, 90),
252
+ where="iso_a3 not in ('USA', 'RUS')",
253
+ )[3]
254
+ assert len(fields[3]) == 1
255
+ assert np.array_equal(fields[3], ["CAN"])
256
+
257
+
258
+ @pytest.mark.skipif(
259
+ not HAS_SHAPELY, reason="Shapely is required for mask functionality"
260
+ )
261
+ @pytest.mark.parametrize(
262
+ "mask",
263
+ [
264
+ {"type": "Point", "coordinates": [0, 0]},
265
+ '{"type": "Point", "coordinates": [0, 0]}',
266
+ "invalid",
267
+ ],
268
+ )
269
+ def test_read_mask_invalid(naturalearth_lowres, mask):
270
+ with pytest.raises(ValueError, match="'mask' parameter must be a Shapely geometry"):
271
+ read(naturalearth_lowres, mask=mask)
272
+
273
+
274
+ @pytest.mark.skipif(
275
+ not HAS_SHAPELY, reason="Shapely is required for mask functionality"
276
+ )
277
+ def test_read_bbox_mask_invalid(naturalearth_lowres):
278
+ with pytest.raises(ValueError, match="cannot set both 'bbox' and 'mask'"):
279
+ read(naturalearth_lowres, bbox=(-85, 8, -80, 10), mask=shapely.Point(-105, 55))
280
+
281
+
282
+ @pytest.mark.skipif(
283
+ not HAS_SHAPELY, reason="Shapely is required for mask functionality"
284
+ )
285
+ @pytest.mark.parametrize(
286
+ "mask,expected",
287
+ [
288
+ ("POINT (-105 55)", ["CAN"]),
289
+ ("POLYGON ((-80 8, -80 10, -85 10, -85 8, -80 8))", ["PAN", "CRI"]),
290
+ (
291
+ """POLYGON ((
292
+ 6.101929 50.97085,
293
+ 5.773002 50.906611,
294
+ 5.593156 50.642649,
295
+ 6.059271 50.686052,
296
+ 6.374064 50.851481,
297
+ 6.101929 50.97085
298
+ ))""",
299
+ ["DEU", "BEL", "NLD"],
300
+ ),
301
+ (
302
+ """GEOMETRYCOLLECTION (
303
+ POINT (-7.7 53),
304
+ POLYGON ((-80 8, -80 10, -85 10, -85 8, -80 8))
305
+ )""",
306
+ ["PAN", "CRI", "IRL"],
307
+ ),
308
+ ],
309
+ )
310
+ def test_read_mask(naturalearth_lowres_all_ext, mask, expected):
311
+ mask = shapely.from_wkt(mask)
312
+
313
+ geometry, fields = read(naturalearth_lowres_all_ext, mask=mask)[2:]
314
+
315
+ assert np.array_equal(fields[3], expected)
316
+ assert len(geometry) == len(expected)
317
+
318
+
319
+ @pytest.mark.skipif(
320
+ not HAS_SHAPELY, reason="Shapely is required for mask functionality"
321
+ )
322
+ def test_read_mask_sql(naturalearth_lowres_all_ext):
323
+ fields = read(
324
+ naturalearth_lowres_all_ext,
325
+ mask=shapely.box(-180, 50, -100, 90),
326
+ sql="SELECT * from naturalearth_lowres where iso_a3 not in ('USA', 'RUS')",
327
+ )[3]
328
+ assert len(fields[3]) == 1
329
+ assert np.array_equal(fields[3], ["CAN"])
330
+
331
+
332
+ @pytest.mark.skipif(
333
+ not HAS_SHAPELY, reason="Shapely is required for mask functionality"
334
+ )
335
+ def test_read_mask_where(naturalearth_lowres_all_ext):
336
+ fields = read(
337
+ naturalearth_lowres_all_ext,
338
+ mask=shapely.box(-180, 50, -100, 90),
339
+ where="iso_a3 not in ('USA', 'RUS')",
340
+ )[3]
341
+ assert len(fields[3]) == 1
342
+ assert np.array_equal(fields[3], ["CAN"])
343
+
344
+
345
+ def test_read_fids(naturalearth_lowres):
346
+ expected_fids, expected_geometry, expected_fields = read(
347
+ naturalearth_lowres, return_fids=True
348
+ )[1:]
349
+ subset = [0, 10, 5]
350
+
351
+ for fids in [subset, np.array(subset)]:
352
+ index, geometry, fields = read(
353
+ naturalearth_lowres, fids=subset, return_fids=True
354
+ )[1:]
355
+
356
+ assert len(fids) == 3
357
+ assert len(geometry) == 3
358
+ assert len(fields[0]) == 3
359
+
360
+ assert np.array_equal(index, expected_fids[subset])
361
+ assert np.array_equal(geometry, expected_geometry[subset])
362
+ assert np.array_equal(fields[-1], expected_fields[-1][subset])
363
+
364
+
365
+ def test_read_fids_out_of_bounds(naturalearth_lowres):
366
+ with pytest.raises(
367
+ FeatureError,
368
+ match=r"Attempt to read shape with feature id \(-1\) out of available range",
369
+ ):
370
+ read(naturalearth_lowres, fids=[-1])
371
+
372
+ with pytest.raises(
373
+ FeatureError,
374
+ match=r"Attempt to read shape with feature id \(200\) out of available range",
375
+ ):
376
+ read(naturalearth_lowres, fids=[200])
377
+
378
+
379
+ def test_read_fids_unsupported_keywords(naturalearth_lowres):
380
+ with pytest.raises(ValueError, match="cannot set both 'fids' and any of"):
381
+ read(naturalearth_lowres, fids=[1], where="iso_a3 = 'CAN'")
382
+
383
+ with pytest.raises(ValueError, match="cannot set both 'fids' and any of"):
384
+ read(naturalearth_lowres, fids=[1], bbox=(-140, 20, -100, 45))
385
+
386
+ with pytest.raises(ValueError, match="cannot set both 'fids' and any of"):
387
+ read(naturalearth_lowres, fids=[1], skip_features=5)
388
+
389
+ with pytest.raises(ValueError, match="cannot set both 'fids' and any of"):
390
+ read(naturalearth_lowres, fids=[1], max_features=5)
391
+
392
+ with pytest.raises(ValueError, match="cannot set both 'fids' and any of"):
393
+ read(naturalearth_lowres, fids=[1], bbox=(0, 0, 0.0001, 0.0001))
394
+
395
+ if HAS_SHAPELY:
396
+ with pytest.raises(ValueError, match="cannot set both 'fids' and any of"):
397
+ read(naturalearth_lowres, fids=[1], mask=shapely.Point(0, 0))
398
+
399
+
400
+ def test_read_return_fids(naturalearth_lowres):
401
+ # default is to not return fids
402
+ fids = read(naturalearth_lowres)[1]
403
+ assert fids is None
404
+
405
+ fids = read(naturalearth_lowres, return_fids=False)[1]
406
+ assert fids is None
407
+
408
+ fids = read(naturalearth_lowres, return_fids=True, skip_features=2, max_features=2)[
409
+ 1
410
+ ]
411
+ assert fids is not None
412
+ assert fids.dtype == np.int64
413
+ # Note: shapefile FIDS start at 0
414
+ assert np.array_equal(fids, np.array([2, 3], dtype="int64"))
415
+
416
+
417
+ def test_read_return_only_fids(naturalearth_lowres):
418
+ _, fids, geometry, field_data = read(
419
+ naturalearth_lowres, columns=[], read_geometry=False, return_fids=True
420
+ )
421
+ assert fids is not None
422
+ assert len(fids) == 177
423
+ assert geometry is None
424
+ assert len(field_data) == 0
425
+
426
+
427
+ @pytest.mark.parametrize("encoding", [None, "ISO-8859-1"])
428
+ def test_write_shp(tmp_path, naturalearth_lowres, encoding):
429
+ meta, _, geometry, field_data = read(naturalearth_lowres)
430
+
431
+ filename = tmp_path / "test.shp"
432
+ meta["encoding"] = encoding
433
+ write(filename, geometry, field_data, **meta)
434
+
435
+ assert filename.exists()
436
+ for ext in (".dbf", ".prj"):
437
+ assert filename.with_suffix(ext).exists()
438
+
439
+ # We write shapefiles in UTF-8 by default on all platforms
440
+ expected_encoding = encoding if encoding is not None else "UTF-8"
441
+ with open(filename.with_suffix(".cpg")) as cpg_file:
442
+ result_encoding = cpg_file.read()
443
+ assert result_encoding == expected_encoding
444
+
445
+
446
+ def test_write_gpkg(tmp_path, naturalearth_lowres):
447
+ meta, _, geometry, field_data = read(naturalearth_lowres)
448
+ meta.update({"geometry_type": "MultiPolygon"})
449
+
450
+ filename = tmp_path / "test.gpkg"
451
+ write(filename, geometry, field_data, driver="GPKG", **meta)
452
+
453
+ assert filename.exists()
454
+
455
+
456
+ def test_write_gpkg_multiple_layers(tmp_path, naturalearth_lowres):
457
+ meta, _, geometry, field_data = read(naturalearth_lowres)
458
+ meta["geometry_type"] = "MultiPolygon"
459
+
460
+ filename = tmp_path / "test.gpkg"
461
+ write(filename, geometry, field_data, driver="GPKG", layer="first", **meta)
462
+
463
+ assert filename.exists()
464
+
465
+ assert np.array_equal(list_layers(filename), [["first", "MultiPolygon"]])
466
+
467
+ write(filename, geometry, field_data, driver="GPKG", layer="second", **meta)
468
+
469
+ assert np.array_equal(
470
+ list_layers(filename), [["first", "MultiPolygon"], ["second", "MultiPolygon"]]
471
+ )
472
+
473
+
474
+ def test_write_geojson(tmp_path, naturalearth_lowres):
475
+ meta, _, geometry, field_data = read(naturalearth_lowres)
476
+
477
+ filename = tmp_path / "test.json"
478
+ write(filename, geometry, field_data, driver="GeoJSON", **meta)
479
+
480
+ assert filename.exists()
481
+
482
+ data = json.loads(open(filename).read())
483
+
484
+ assert data["type"] == "FeatureCollection"
485
+ assert data["name"] == "test"
486
+ assert "crs" in data
487
+ assert len(data["features"]) == len(geometry)
488
+ assert not len(
489
+ set(meta["fields"]).difference(data["features"][0]["properties"].keys())
490
+ )
491
+
492
+
493
+ def test_write_no_fields(tmp_path, naturalearth_lowres):
494
+ """Test writing file with no fields/attribute columns."""
495
+ # Prepare test data
496
+ meta, _, geometry, field_data = read(naturalearth_lowres)
497
+ field_data = None
498
+ meta["fields"] = None
499
+ # naturalearth_lowres actually contains MultiPolygons. A shapefile doesn't make the
500
+ # distinction, so the metadata just reports Polygon. GPKG does, so override here to
501
+ # avoid GDAL warnings.
502
+ meta["geometry_type"] = "MultiPolygon"
503
+
504
+ # Test
505
+ filename = tmp_path / "test.gpkg"
506
+ write(filename, geometry, field_data, driver="GPKG", **meta)
507
+
508
+ # Check result
509
+ assert filename.exists()
510
+ meta, _, geometry, fields = read(filename)
511
+
512
+ assert meta["crs"] == "EPSG:4326"
513
+ assert meta["geometry_type"] == "MultiPolygon"
514
+ assert meta["encoding"] == "UTF-8"
515
+ assert meta["fields"].shape == (0,)
516
+ assert len(fields) == 0
517
+ assert len(geometry) == 177
518
+
519
+ # quick test that WKB is a Polygon type
520
+ assert geometry[0][:6] == b"\x01\x06\x00\x00\x00\x03"
521
+
522
+
523
+ def test_write_no_geom(tmp_path, naturalearth_lowres):
524
+ """Test writing file with no geometry column."""
525
+ # Prepare test data
526
+ meta, _, geometry, field_data = read(naturalearth_lowres)
527
+ geometry = None
528
+ meta["geometry_type"] = None
529
+
530
+ # Test
531
+ filename = tmp_path / "test.gpkg"
532
+ write(filename, geometry, field_data, driver="GPKG", **meta)
533
+
534
+ # Check result
535
+ assert filename.exists()
536
+ meta, _, geometry, fields = read(filename)
537
+
538
+ assert meta["crs"] is None
539
+ assert meta["geometry_type"] is None
540
+ assert meta["encoding"] == "UTF-8"
541
+ assert meta["fields"].shape == (5,)
542
+
543
+ assert meta["fields"].tolist() == [
544
+ "pop_est",
545
+ "continent",
546
+ "name",
547
+ "iso_a3",
548
+ "gdp_md_est",
549
+ ]
550
+
551
+ assert len(fields) == 5
552
+ assert len(fields[0]) == 177
553
+
554
+
555
+ def test_write_no_geom_data(tmp_path, naturalearth_lowres):
556
+ """Test writing file with no geometry data passed but a geometry_type specified.
557
+
558
+ In this case the geometry_type is ignored, so a file without geometry column is
559
+ written.
560
+ """
561
+ # Prepare test data
562
+ meta, _, geometry, field_data = read(naturalearth_lowres)
563
+ # If geometry data is set to None, meta["geometry_type"] is ignored and so no
564
+ # geometry column will be created.
565
+ geometry = None
566
+
567
+ # Test
568
+ filename = tmp_path / "test.gpkg"
569
+ write(filename, geometry, field_data, driver="GPKG", **meta)
570
+
571
+ # Check result
572
+ assert filename.exists()
573
+ result_meta, _, result_geometry, result_field_data = read(filename)
574
+
575
+ assert result_meta["crs"] is None
576
+ assert result_meta["geometry_type"] is None
577
+ assert result_meta["encoding"] == "UTF-8"
578
+ assert result_meta["fields"].shape == (5,)
579
+
580
+ assert result_meta["fields"].tolist() == [
581
+ "pop_est",
582
+ "continent",
583
+ "name",
584
+ "iso_a3",
585
+ "gdp_md_est",
586
+ ]
587
+
588
+ assert len(result_field_data) == 5
589
+ assert len(result_field_data[0]) == 177
590
+ assert result_geometry is None
591
+
592
+
593
+ def test_write_no_geom_no_fields():
594
+ """Test writing file with no geometry column nor fields -> error."""
595
+ with pytest.raises(
596
+ ValueError,
597
+ match="You must provide at least a geometry column or a field",
598
+ ):
599
+ write("test.gpkg", geometry=None, field_data=None, fields=None)
600
+
601
+
602
+ @pytest.mark.skipif(
603
+ __gdal_version__ < (3, 6, 0),
604
+ reason="OpenFileGDB write support only available for GDAL >= 3.6.0",
605
+ )
606
+ def test_write_openfilegdb(tmp_path, naturalearth_lowres):
607
+ meta, _, geometry, field_data = read(naturalearth_lowres)
608
+
609
+ filename = tmp_path / "test.gdb"
610
+ write(filename, geometry, field_data, driver="OpenFileGDB", **meta)
611
+
612
+ assert filename.exists()
613
+
614
+
615
+ @pytest.mark.parametrize("ext", DRIVERS)
616
+ def test_write_append(tmp_path, naturalearth_lowres, ext):
617
+ if ext == ".fgb" and __gdal_version__ <= (3, 5, 0):
618
+ pytest.skip("Append to FlatGeobuf fails for GDAL <= 3.5.0")
619
+
620
+ if ext in (".geojsonl", ".geojsons") and __gdal_version__ < (3, 6, 0):
621
+ pytest.skip("Append to GeoJSONSeq only available for GDAL >= 3.6.0")
622
+
623
+ meta, _, geometry, field_data = read(naturalearth_lowres)
624
+
625
+ # coerce output layer to MultiPolygon to avoid mixed type errors
626
+ meta["geometry_type"] = "MultiPolygon"
627
+
628
+ filename = tmp_path / f"test{ext}"
629
+ write(filename, geometry, field_data, **meta)
630
+
631
+ assert filename.exists()
632
+
633
+ assert read_info(filename)["features"] == 177
634
+
635
+ # write the same records again
636
+ write(filename, geometry, field_data, append=True, **meta)
637
+
638
+ assert read_info(filename)["features"] == 354
639
+
640
+
641
+ @pytest.mark.parametrize("driver,ext", [("GML", ".gml"), ("GeoJSONSeq", ".geojsons")])
642
+ def test_write_append_unsupported(tmp_path, naturalearth_lowres, driver, ext):
643
+ if ext == ".geojsons" and __gdal_version__ >= (3, 6, 0):
644
+ pytest.skip("Append to GeoJSONSeq supported for GDAL >= 3.6.0")
645
+
646
+ meta, _, geometry, field_data = read(naturalearth_lowres)
647
+
648
+ # GML does not support append functionality
649
+ filename = tmp_path / f"test{ext}"
650
+ write(filename, geometry, field_data, driver=driver, **meta)
651
+
652
+ assert filename.exists()
653
+
654
+ assert read_info(filename, force_feature_count=True)["features"] == 177
655
+
656
+ with pytest.raises(DataSourceError):
657
+ write(filename, geometry, field_data, driver=driver, append=True, **meta)
658
+
659
+
660
+ @pytest.mark.skipif(
661
+ __gdal_version__ > (3, 5, 0),
662
+ reason="segfaults on FlatGeobuf limited to GDAL <= 3.5.0",
663
+ )
664
+ def test_write_append_prevent_gdal_segfault(tmp_path, naturalearth_lowres):
665
+ """GDAL <= 3.5.0 segfaults when appending to FlatGeobuf; this test
666
+ verifies that we catch that before segfault"""
667
+ meta, _, geometry, field_data = read(naturalearth_lowres)
668
+ meta["geometry_type"] = "MultiPolygon"
669
+
670
+ filename = tmp_path / "test.fgb"
671
+ write(filename, geometry, field_data, **meta)
672
+
673
+ assert filename.exists()
674
+
675
+ with pytest.raises(
676
+ RuntimeError, # match="append to FlatGeobuf is not supported for GDAL <= 3.5.0"
677
+ ):
678
+ write(filename, geometry, field_data, append=True, **meta)
679
+
680
+
681
+ @pytest.mark.parametrize(
682
+ "driver",
683
+ {
684
+ driver
685
+ for driver in DRIVERS.values()
686
+ if driver not in ("ESRI Shapefile", "GPKG", "GeoJSON")
687
+ },
688
+ )
689
+ def test_write_supported(tmp_path, naturalearth_lowres, driver):
690
+ """Test drivers known to work that are not specifically tested above"""
691
+ meta, _, geometry, field_data = read(naturalearth_lowres, columns=["iso_a3"])
692
+
693
+ # note: naturalearth_lowres contains mixed polygons / multipolygons, which
694
+ # are not supported in mixed form for all drivers. To get around this here
695
+ # we take the first record only.
696
+ meta["geometry_type"] = "MultiPolygon"
697
+
698
+ filename = tmp_path / f"test{DRIVER_EXT[driver]}"
699
+ write(
700
+ filename,
701
+ geometry[:1],
702
+ field_data=[f[:1] for f in field_data],
703
+ driver=driver,
704
+ **meta,
705
+ )
706
+
707
+ assert filename.exists()
708
+
709
+
710
+ @pytest.mark.skipif(
711
+ __gdal_version__ >= (3, 6, 0), reason="OpenFileGDB supports write for GDAL >= 3.6.0"
712
+ )
713
+ def test_write_unsupported(tmp_path, naturalearth_lowres):
714
+ meta, _, geometry, field_data = read(naturalearth_lowres)
715
+
716
+ filename = tmp_path / "test.gdb"
717
+
718
+ with pytest.raises(DataSourceError, match="does not support write functionality"):
719
+ write(filename, geometry, field_data, driver="OpenFileGDB", **meta)
720
+
721
+
722
+ def test_write_gdalclose_error(naturalearth_lowres):
723
+ meta, _, geometry, field_data = read(naturalearth_lowres)
724
+
725
+ filename = "s3://non-existing-bucket/test.geojson"
726
+
727
+ # set config options to avoid errors on open due to GDAL S3 configuration
728
+ set_gdal_config_options(
729
+ {
730
+ "AWS_ACCESS_KEY_ID": "invalid",
731
+ "AWS_SECRET_ACCESS_KEY": "invalid",
732
+ "AWS_NO_SIGN_REQUEST": True,
733
+ }
734
+ )
735
+
736
+ with pytest.raises(DataSourceError, match="Failed to write features to dataset"):
737
+ write(filename, geometry, field_data, **meta)
738
+
739
+
740
+ def assert_equal_result(result1, result2):
741
+ meta1, index1, geometry1, field_data1 = result1
742
+ meta2, index2, geometry2, field_data2 = result2
743
+
744
+ assert np.array_equal(meta1["fields"], meta2["fields"])
745
+ assert np.array_equal(index1, index2)
746
+ assert all([np.array_equal(f1, f2) for f1, f2 in zip(field_data1, field_data2)])
747
+
748
+ if HAS_SHAPELY:
749
+ # a plain `assert np.array_equal(geometry1, geometry2)` doesn't work
750
+ # because the WKB values are not exactly equal, therefore parsing with
751
+ # shapely to compare with tolerance
752
+ assert shapely.equals_exact(
753
+ shapely.from_wkb(geometry1), shapely.from_wkb(geometry2), tolerance=0.00001
754
+ ).all()
755
+
756
+
757
+ @pytest.mark.filterwarnings("ignore:File /vsimem:RuntimeWarning") # TODO
758
+ @pytest.mark.parametrize("driver,ext", [("GeoJSON", "geojson"), ("GPKG", "gpkg")])
759
+ def test_read_from_bytes(tmp_path, naturalearth_lowres, driver, ext):
760
+ meta, index, geometry, field_data = read(naturalearth_lowres)
761
+ meta.update({"geometry_type": "Unknown"})
762
+ filename = tmp_path / f"test.{ext}"
763
+ write(filename, geometry, field_data, driver=driver, **meta)
764
+
765
+ with open(filename, "rb") as f:
766
+ buffer = f.read()
767
+
768
+ result2 = read(buffer)
769
+ assert_equal_result((meta, index, geometry, field_data), result2)
770
+
771
+
772
+ def test_read_from_bytes_zipped(naturalearth_lowres_vsi):
773
+ path, vsi_path = naturalearth_lowres_vsi
774
+ meta, index, geometry, field_data = read(vsi_path)
775
+
776
+ with open(path, "rb") as f:
777
+ buffer = f.read()
778
+
779
+ result2 = read(buffer)
780
+ assert_equal_result((meta, index, geometry, field_data), result2)
781
+
782
+
783
+ @pytest.mark.filterwarnings("ignore:File /vsimem:RuntimeWarning") # TODO
784
+ @pytest.mark.parametrize("driver,ext", [("GeoJSON", "geojson"), ("GPKG", "gpkg")])
785
+ def test_read_from_file_like(tmp_path, naturalearth_lowres, driver, ext):
786
+ meta, index, geometry, field_data = read(naturalearth_lowres)
787
+ meta.update({"geometry_type": "Unknown"})
788
+ filename = tmp_path / f"test.{ext}"
789
+ write(filename, geometry, field_data, driver=driver, **meta)
790
+
791
+ with open(filename, "rb") as f:
792
+ result2 = read(f)
793
+
794
+ assert_equal_result((meta, index, geometry, field_data), result2)
795
+
796
+
797
+ @pytest.mark.parametrize("ext", ["gpkg", "fgb"])
798
+ def test_read_write_data_types_numeric(tmp_path, ext):
799
+ # Point(0, 0)
800
+ geometry = np.array(
801
+ [bytes.fromhex("010100000000000000000000000000000000000000")] * 3, dtype=object
802
+ )
803
+ field_data = [
804
+ np.array([True, False, True], dtype="bool"),
805
+ np.array([1, 2, 3], dtype="int16"),
806
+ np.array([1, 2, 3], dtype="int32"),
807
+ np.array([1, 2, 3], dtype="int64"),
808
+ np.array([1, 2, 3], dtype="float32"),
809
+ np.array([1, 2, 3], dtype="float64"),
810
+ ]
811
+ fields = ["bool", "int16", "int32", "int64", "float32", "float64"]
812
+ meta = dict(geometry_type="Point", crs="EPSG:4326", spatial_index=False)
813
+
814
+ filename = tmp_path / f"test.{ext}"
815
+ write(filename, geometry, field_data, fields, **meta)
816
+ result = read(filename)[3]
817
+ assert all([np.array_equal(f1, f2) for f1, f2 in zip(result, field_data)])
818
+ assert all([f1.dtype == f2.dtype for f1, f2 in zip(result, field_data)])
819
+
820
+ # other integer data types that don't roundtrip exactly
821
+ # these are generally promoted to a larger integer type except for uint64
822
+ for i, (dtype, result_dtype) in enumerate(
823
+ [
824
+ ("int8", "int16"),
825
+ ("uint8", "int16"),
826
+ ("uint16", "int32"),
827
+ ("uint32", "int64"),
828
+ ("uint64", "int64"),
829
+ ]
830
+ ):
831
+ field_data = [np.array([1, 2, 3], dtype=dtype)]
832
+ filename = tmp_path / f"test{i}.{ext}"
833
+ write(filename, geometry, field_data, ["col"], **meta)
834
+ result = read(filename)[3][0]
835
+ assert np.array_equal(result, np.array([1, 2, 3]))
836
+ assert result.dtype == result_dtype
837
+
838
+
839
+ def test_read_write_datetime(tmp_path):
840
+ field_data = [
841
+ np.array(["2005-02-01", "2005-02-02"], dtype="datetime64[D]"),
842
+ np.array(["2001-01-01T12:00", "2002-02-03T13:56:03"], dtype="datetime64[s]"),
843
+ np.array(
844
+ ["2001-01-01T12:00", "2002-02-03T13:56:03.072"], dtype="datetime64[ms]"
845
+ ),
846
+ np.array(
847
+ ["2001-01-01T12:00", "2002-02-03T13:56:03.072"], dtype="datetime64[ns]"
848
+ ),
849
+ np.array(
850
+ ["2001-01-01T12:00", "2002-02-03T13:56:03.072123456"],
851
+ dtype="datetime64[ns]",
852
+ ),
853
+ # Remark: a None value is automatically converted to np.datetime64("NaT")
854
+ np.array([np.datetime64("NaT"), None], dtype="datetime64[ms]"),
855
+ ]
856
+ fields = [
857
+ "datetime64_d",
858
+ "datetime64_s",
859
+ "datetime64_ms",
860
+ "datetime64_ns",
861
+ "datetime64_precise_ns",
862
+ "datetime64_ms_nat",
863
+ ]
864
+
865
+ # Point(0, 0)
866
+ geometry = np.array(
867
+ [bytes.fromhex("010100000000000000000000000000000000000000")] * 2, dtype=object
868
+ )
869
+ meta = dict(geometry_type="Point", crs="EPSG:4326", spatial_index=False)
870
+
871
+ filename = tmp_path / "test.gpkg"
872
+ write(filename, geometry, field_data, fields, **meta)
873
+ result = read(filename)[3]
874
+ for idx, field in enumerate(fields):
875
+ if field == "datetime64_precise_ns":
876
+ # gdal rounds datetimes to ms
877
+ assert np.array_equal(result[idx], field_data[idx].astype("datetime64[ms]"))
878
+ else:
879
+ assert np.array_equal(result[idx], field_data[idx], equal_nan=True)
880
+
881
+
882
+ @pytest.mark.parametrize("ext", ["gpkg", "fgb"])
883
+ def test_read_write_int64_large(tmp_path, ext):
884
+ # Test if value > max int32 is correctly written and read.
885
+ # Test introduced to validate https://github.com/geopandas/pyogrio/issues/259
886
+ # Point(0, 0)
887
+ geometry = np.array(
888
+ [bytes.fromhex("010100000000000000000000000000000000000000")] * 3, dtype=object
889
+ )
890
+ field_data = [np.array([1, 2192502720, -5], dtype="int64")]
891
+ fields = ["overflow_int64"]
892
+ meta = dict(geometry_type="Point", crs="EPSG:4326", spatial_index=False)
893
+
894
+ filename = tmp_path / f"test.{ext}"
895
+ write(filename, geometry, field_data, fields, **meta)
896
+ result = read(filename)[3]
897
+ assert np.array_equal(result, field_data)
898
+ assert result[0].dtype == field_data[0].dtype
899
+
900
+
901
+ def test_read_data_types_numeric_with_null(test_gpkg_nulls):
902
+ fields = read(test_gpkg_nulls)[3]
903
+
904
+ for i, field in enumerate(fields):
905
+ # last value should be np.nan
906
+ assert np.isnan(field[-1])
907
+
908
+ # all integer fields should be cast to float64; float32 should be preserved
909
+ if i == 9:
910
+ assert field.dtype == "float32"
911
+ else:
912
+ assert field.dtype == "float64"
913
+
914
+
915
+ def test_read_unsupported_types(test_ogr_types_list):
916
+ fields = read(test_ogr_types_list)[3]
917
+ # list field gets skipped, only integer field is read
918
+ assert len(fields) == 1
919
+
920
+ fields = read(test_ogr_types_list, columns=["int64"])[3]
921
+ assert len(fields) == 1
922
+
923
+
924
+ def test_read_datetime_millisecond(test_datetime):
925
+ field = read(test_datetime)[3][0]
926
+ assert field.dtype == "datetime64[ms]"
927
+ assert field[0] == np.datetime64("2020-01-01 09:00:00.123")
928
+ assert field[1] == np.datetime64("2020-01-01 10:00:00.000")
929
+
930
+
931
+ def test_read_unsupported_ext(tmp_path):
932
+ test_unsupported_path = tmp_path / "test.unsupported"
933
+ with open(test_unsupported_path, "w") as file:
934
+ file.write("column1,column2\n")
935
+ file.write("data1,data2")
936
+
937
+ with pytest.raises(
938
+ DataSourceError, match=".* by prefixing the file path with '<DRIVER>:'.*"
939
+ ):
940
+ read(test_unsupported_path)
941
+
942
+
943
+ def test_read_unsupported_ext_with_prefix(tmp_path):
944
+ test_unsupported_path = tmp_path / "test.unsupported"
945
+ with open(test_unsupported_path, "w") as file:
946
+ file.write("column1,column2\n")
947
+ file.write("data1,data2")
948
+
949
+ _, _, _, field_data = read(f"CSV:{test_unsupported_path}")
950
+ assert len(field_data) == 2
951
+ assert field_data[0] == "data1"
952
+
953
+
954
+ def test_read_datetime_as_string(test_datetime_tz):
955
+ field = read(test_datetime_tz)[3][0]
956
+ assert field.dtype == "datetime64[ms]"
957
+ # timezone is ignored in numpy layer
958
+ assert field[0] == np.datetime64("2020-01-01 09:00:00.123")
959
+ assert field[1] == np.datetime64("2020-01-01 10:00:00.000")
960
+ field = read(test_datetime_tz, datetime_as_string=True)[3][0]
961
+ assert field.dtype == "object"
962
+ # GDAL doesn't return strings in ISO format (yet)
963
+ assert field[0] == "2020/01/01 09:00:00.123-05"
964
+ assert field[1] == "2020/01/01 10:00:00-05"
965
+
966
+
967
+ @pytest.mark.parametrize("ext", ["gpkg", "geojson"])
968
+ def test_read_write_null_geometry(tmp_path, ext):
969
+ # Point(0, 0), null
970
+ geometry = np.array(
971
+ [bytes.fromhex("010100000000000000000000000000000000000000"), None],
972
+ dtype=object,
973
+ )
974
+ field_data = [np.array([1, 2], dtype="int32")]
975
+ fields = ["col"]
976
+ meta = dict(geometry_type="Point", crs="EPSG:4326")
977
+ if ext == "gpkg":
978
+ meta["spatial_index"] = False
979
+
980
+ filename = tmp_path / f"test.{ext}"
981
+ write(filename, geometry, field_data, fields, **meta)
982
+ result_geometry, result_fields = read(filename)[2:]
983
+ assert np.array_equal(result_geometry, geometry)
984
+ assert np.array_equal(result_fields[0], field_data[0])
985
+
986
+
987
+ @pytest.mark.parametrize("dtype", ["float32", "float64"])
988
+ def test_write_float_nan_null(tmp_path, dtype):
989
+ # Point(0, 0)
990
+ geometry = np.array(
991
+ [bytes.fromhex("010100000000000000000000000000000000000000")] * 2,
992
+ dtype=object,
993
+ )
994
+ field_data = [np.array([1.5, np.nan], dtype=dtype)]
995
+ fields = ["col"]
996
+ meta = dict(geometry_type="Point", crs="EPSG:4326")
997
+ filename = tmp_path / "test.geojson"
998
+
999
+ # default nan_as_null=True
1000
+ write(filename, geometry, field_data, fields, **meta)
1001
+ with open(filename, "r") as f:
1002
+ content = f.read()
1003
+ assert '{ "col": null }' in content
1004
+
1005
+ # set to False
1006
+ # by default, GDAL will skip the property for GeoJSON if the value is NaN
1007
+ if dtype == "float32":
1008
+ ctx = pytest.warns(RuntimeWarning, match="NaN of Infinity value found. Skipped")
1009
+ else:
1010
+ ctx = contextlib.nullcontext()
1011
+ with ctx:
1012
+ write(filename, geometry, field_data, fields, **meta, nan_as_null=False)
1013
+ with open(filename, "r") as f:
1014
+ content = f.read()
1015
+ assert '"properties": { }' in content
1016
+
1017
+ # but can instruct GDAL to write NaN to json
1018
+ write(
1019
+ filename,
1020
+ geometry,
1021
+ field_data,
1022
+ fields,
1023
+ **meta,
1024
+ nan_as_null=False,
1025
+ WRITE_NON_FINITE_VALUES="YES",
1026
+ )
1027
+ with open(filename, "r") as f:
1028
+ content = f.read()
1029
+ assert '{ "col": NaN }' in content
1030
+
1031
+
1032
+ @requires_pyarrow_api
1033
+ @pytest.mark.skipif(
1034
+ "Arrow" not in list_drivers(), reason="Arrow driver is not available"
1035
+ )
1036
+ def test_write_float_nan_null_arrow(tmp_path):
1037
+ import pyarrow.feather
1038
+
1039
+ # Point(0, 0)
1040
+ geometry = np.array(
1041
+ [bytes.fromhex("010100000000000000000000000000000000000000")] * 2,
1042
+ dtype=object,
1043
+ )
1044
+ field_data = [np.array([1.5, np.nan], dtype="float64")]
1045
+ fields = ["col"]
1046
+ meta = dict(geometry_type="Point", crs="EPSG:4326")
1047
+ fname = tmp_path / "test.arrow"
1048
+
1049
+ # default nan_as_null=True
1050
+ write(fname, geometry, field_data, fields, driver="Arrow", **meta)
1051
+ table = pyarrow.feather.read_table(fname)
1052
+ assert table["col"].is_null().to_pylist() == [False, True]
1053
+
1054
+ # set to False
1055
+ write(
1056
+ fname, geometry, field_data, fields, driver="Arrow", nan_as_null=False, **meta
1057
+ )
1058
+ table = pyarrow.feather.read_table(fname)
1059
+ assert table["col"].is_null().to_pylist() == [False, False]
1060
+ pc = pytest.importorskip("pyarrow.compute")
1061
+ assert pc.is_nan(table["col"]).to_pylist() == [False, True]
1062
+
1063
+
1064
+ @pytest.mark.filterwarnings("ignore:File /vsimem:RuntimeWarning")
1065
+ @pytest.mark.parametrize("driver", ["GeoJSON", "GPKG"])
1066
+ def test_write_memory(naturalearth_lowres, driver):
1067
+ meta, _, geometry, field_data = read(naturalearth_lowres)
1068
+ meta.update({"geometry_type": "MultiPolygon"})
1069
+
1070
+ buffer = BytesIO()
1071
+ write(buffer, geometry, field_data, driver=driver, layer="test", **meta)
1072
+
1073
+ assert len(buffer.getbuffer()) > 0
1074
+ assert list_layers(buffer)[0][0] == "test"
1075
+
1076
+ actual_meta, _, actual_geometry, actual_field_data = read(buffer)
1077
+
1078
+ assert np.array_equal(actual_meta["fields"], meta["fields"])
1079
+ assert np.array_equal(actual_field_data, field_data)
1080
+ assert len(actual_geometry) == len(geometry)
1081
+
1082
+
1083
+ def test_write_memory_driver_required(naturalearth_lowres):
1084
+ meta, _, geometry, field_data = read(naturalearth_lowres)
1085
+
1086
+ buffer = BytesIO()
1087
+ with pytest.raises(
1088
+ ValueError,
1089
+ match="driver must be provided to write to in-memory file",
1090
+ ):
1091
+ write(buffer, geometry, field_data, driver=None, layer="test", **meta)
1092
+
1093
+
1094
+ @pytest.mark.parametrize("driver", ["ESRI Shapefile", "OpenFileGDB"])
1095
+ def test_write_memory_unsupported_driver(naturalearth_lowres, driver):
1096
+ if driver == "OpenFileGDB" and __gdal_version__ < (3, 6, 0):
1097
+ pytest.skip("OpenFileGDB write support only available for GDAL >= 3.6.0")
1098
+
1099
+ meta, _, geometry, field_data = read(naturalearth_lowres)
1100
+
1101
+ buffer = BytesIO()
1102
+
1103
+ with pytest.raises(
1104
+ ValueError, match=f"writing to in-memory file is not supported for {driver}"
1105
+ ):
1106
+ write(
1107
+ buffer,
1108
+ geometry,
1109
+ field_data,
1110
+ driver=driver,
1111
+ layer="test",
1112
+ append=True,
1113
+ **meta,
1114
+ )
1115
+
1116
+
1117
+ @pytest.mark.parametrize("driver", ["GeoJSON", "GPKG"])
1118
+ def test_write_memory_append_unsupported(naturalearth_lowres, driver):
1119
+ meta, _, geometry, field_data = read(naturalearth_lowres)
1120
+ meta.update({"geometry_type": "MultiPolygon"})
1121
+
1122
+ buffer = BytesIO()
1123
+
1124
+ with pytest.raises(
1125
+ NotImplementedError, match="append is not supported for in-memory files"
1126
+ ):
1127
+ write(
1128
+ buffer,
1129
+ geometry,
1130
+ field_data,
1131
+ driver=driver,
1132
+ layer="test",
1133
+ append=True,
1134
+ **meta,
1135
+ )
1136
+
1137
+
1138
+ def test_write_memory_existing_unsupported(naturalearth_lowres):
1139
+ meta, _, geometry, field_data = read(naturalearth_lowres)
1140
+
1141
+ buffer = BytesIO(b"0000")
1142
+ with pytest.raises(
1143
+ NotImplementedError,
1144
+ match="writing to existing in-memory object is not supported",
1145
+ ):
1146
+ write(buffer, geometry, field_data, driver="GeoJSON", layer="test", **meta)
1147
+
1148
+
1149
+ @pytest.mark.parametrize("ext", ["fgb", "gpkg", "geojson"])
1150
+ @pytest.mark.parametrize(
1151
+ "read_encoding,write_encoding",
1152
+ [
1153
+ pytest.param(
1154
+ None,
1155
+ None,
1156
+ marks=pytest.mark.skipif(
1157
+ sys.platform == "win32", reason="must specify write encoding on Windows"
1158
+ ),
1159
+ ),
1160
+ pytest.param(
1161
+ "UTF-8",
1162
+ None,
1163
+ marks=pytest.mark.skipif(
1164
+ sys.platform == "win32", reason="must specify write encoding on Windows"
1165
+ ),
1166
+ ),
1167
+ (None, "UTF-8"),
1168
+ ("UTF-8", "UTF-8"),
1169
+ ],
1170
+ )
1171
+ def test_encoding_io(tmp_path, ext, read_encoding, write_encoding):
1172
+ # Point(0, 0)
1173
+ geometry = np.array(
1174
+ [bytes.fromhex("010100000000000000000000000000000000000000")], dtype=object
1175
+ )
1176
+ arabic = "العربية"
1177
+ cree = "ᓀᐦᐃᔭᐍᐏᐣ"
1178
+ mandarin = "中文"
1179
+ field_data = [
1180
+ np.array([arabic], dtype=object),
1181
+ np.array([cree], dtype=object),
1182
+ np.array([mandarin], dtype=object),
1183
+ ]
1184
+ fields = [arabic, cree, mandarin]
1185
+ meta = dict(geometry_type="Point", crs="EPSG:4326", encoding=write_encoding)
1186
+
1187
+ filename = tmp_path / f"test.{ext}"
1188
+ write(filename, geometry, field_data, fields, **meta)
1189
+
1190
+ actual_meta, _, _, actual_field_data = read(filename, encoding=read_encoding)
1191
+ assert np.array_equal(fields, actual_meta["fields"])
1192
+ assert np.array_equal(field_data, actual_field_data)
1193
+ assert np.array_equal(fields, read_info(filename, encoding=read_encoding)["fields"])
1194
+
1195
+
1196
+ @pytest.mark.parametrize(
1197
+ "read_encoding,write_encoding",
1198
+ [
1199
+ pytest.param(
1200
+ None,
1201
+ None,
1202
+ marks=pytest.mark.skipif(
1203
+ sys.platform == "win32", reason="must specify write encoding on Windows"
1204
+ ),
1205
+ ),
1206
+ pytest.param(
1207
+ "UTF-8",
1208
+ None,
1209
+ marks=pytest.mark.skipif(
1210
+ sys.platform == "win32", reason="must specify write encoding on Windows"
1211
+ ),
1212
+ ),
1213
+ (None, "UTF-8"),
1214
+ ("UTF-8", "UTF-8"),
1215
+ ],
1216
+ )
1217
+ def test_encoding_io_shapefile(tmp_path, read_encoding, write_encoding):
1218
+ # Point(0, 0)
1219
+ geometry = np.array(
1220
+ [bytes.fromhex("010100000000000000000000000000000000000000")], dtype=object
1221
+ )
1222
+ arabic = "العربية"
1223
+ cree = "ᓀᐦᐃᔭᐍᐏᐣ"
1224
+ mandarin = "中文"
1225
+ field_data = [
1226
+ np.array([arabic], dtype=object),
1227
+ np.array([cree], dtype=object),
1228
+ np.array([mandarin], dtype=object),
1229
+ ]
1230
+
1231
+ # Field names are longer than 10 bytes and get truncated badly (not at UTF-8
1232
+ # character level) by GDAL when output to shapefile, so we have to truncate
1233
+ # before writing
1234
+ fields = [arabic[:5], cree[:3], mandarin]
1235
+ meta = dict(geometry_type="Point", crs="EPSG:4326", encoding="UTF-8")
1236
+
1237
+ filename = tmp_path / "test.shp"
1238
+ # NOTE: GDAL automatically creates a cpg file with the encoding name, which
1239
+ # means that if we read this without specifying the encoding it uses the
1240
+ # correct one
1241
+ write(filename, geometry, field_data, fields, **meta)
1242
+
1243
+ actual_meta, _, _, actual_field_data = read(filename, encoding=read_encoding)
1244
+ assert np.array_equal(fields, actual_meta["fields"])
1245
+ assert np.array_equal(field_data, actual_field_data)
1246
+ assert np.array_equal(fields, read_info(filename, encoding=read_encoding)["fields"])
1247
+
1248
+ # verify that if cpg file is not present, that user-provided encoding is used,
1249
+ # otherwise it defaults to ISO-8859-1
1250
+ if read_encoding is not None:
1251
+ filename.with_suffix(".cpg").unlink()
1252
+ actual_meta, _, _, actual_field_data = read(filename, encoding=read_encoding)
1253
+ assert np.array_equal(fields, actual_meta["fields"])
1254
+ assert np.array_equal(field_data, actual_field_data)
1255
+ assert np.array_equal(
1256
+ fields, read_info(filename, encoding=read_encoding)["fields"]
1257
+ )
1258
+
1259
+
1260
+ @pytest.mark.parametrize("ext", ["gpkg", "geojson"])
1261
+ def test_non_utf8_encoding_io(tmp_path, ext, encoded_text):
1262
+ """Verify that we write non-UTF data to the data source
1263
+
1264
+ IMPORTANT: this may not be valid for the data source and will likely render
1265
+ them unusable in other tools, but should successfully roundtrip unless we
1266
+ disable writing using other encodings.
1267
+
1268
+ NOTE: FlatGeobuff driver cannot handle non-UTF data in GDAL >= 3.9
1269
+ """
1270
+ encoding, text = encoded_text
1271
+
1272
+ # Point(0, 0)
1273
+ geometry = np.array(
1274
+ [bytes.fromhex("010100000000000000000000000000000000000000")], dtype=object
1275
+ )
1276
+
1277
+ field_data = [np.array([text], dtype=object)]
1278
+
1279
+ fields = [text]
1280
+ meta = dict(geometry_type="Point", crs="EPSG:4326", encoding=encoding)
1281
+
1282
+ filename = tmp_path / f"test.{ext}"
1283
+ write(filename, geometry, field_data, fields, **meta)
1284
+
1285
+ # cannot open these files without specifying encoding
1286
+ with pytest.raises(UnicodeDecodeError):
1287
+ read(filename)
1288
+
1289
+ with pytest.raises(UnicodeDecodeError):
1290
+ read_info(filename)
1291
+
1292
+ # must provide encoding to read these properly
1293
+ actual_meta, _, _, actual_field_data = read(filename, encoding=encoding)
1294
+ assert actual_meta["fields"][0] == text
1295
+ assert actual_field_data[0] == text
1296
+ assert read_info(filename, encoding=encoding)["fields"][0] == text
1297
+
1298
+
1299
+ def test_non_utf8_encoding_io_shapefile(tmp_path, encoded_text):
1300
+ encoding, text = encoded_text
1301
+
1302
+ # Point(0, 0)
1303
+ geometry = np.array(
1304
+ [bytes.fromhex("010100000000000000000000000000000000000000")], dtype=object
1305
+ )
1306
+
1307
+ field_data = [np.array([text], dtype=object)]
1308
+
1309
+ fields = [text]
1310
+ meta = dict(geometry_type="Point", crs="EPSG:4326", encoding=encoding)
1311
+
1312
+ filename = tmp_path / "test.shp"
1313
+ write(filename, geometry, field_data, fields, **meta)
1314
+
1315
+ # NOTE: GDAL automatically creates a cpg file with the encoding name, which
1316
+ # means that if we read this without specifying the encoding it uses the
1317
+ # correct one
1318
+ actual_meta, _, _, actual_field_data = read(filename)
1319
+ assert actual_meta["fields"][0] == text
1320
+ assert actual_field_data[0] == text
1321
+ assert read_info(filename)["fields"][0] == text
1322
+
1323
+ # verify that if cpg file is not present, that user-provided encoding must be used
1324
+ filename.with_suffix(".cpg").unlink()
1325
+
1326
+ # We will assume ISO-8859-1, which is wrong
1327
+ miscoded = text.encode(encoding).decode("ISO-8859-1")
1328
+ bad_meta, _, _, bad_field_data = read(filename)
1329
+ assert bad_meta["fields"][0] == miscoded
1330
+ assert bad_field_data[0] == miscoded
1331
+ assert read_info(filename)["fields"][0] == miscoded
1332
+
1333
+ # If encoding is provided, that should yield correct text
1334
+ actual_meta, _, _, actual_field_data = read(filename, encoding=encoding)
1335
+ assert actual_meta["fields"][0] == text
1336
+ assert actual_field_data[0] == text
1337
+ assert read_info(filename, encoding=encoding)["fields"][0] == text
1338
+
1339
+ # verify that setting encoding does not corrupt SHAPE_ENCODING option if set
1340
+ # globally (it is ignored during read when encoding is specified by user)
1341
+ try:
1342
+ set_gdal_config_options({"SHAPE_ENCODING": "CP1254"})
1343
+ _ = read(filename, encoding=encoding)
1344
+ assert get_gdal_config_option("SHAPE_ENCODING") == "CP1254"
1345
+
1346
+ finally:
1347
+ # reset to clear between tests
1348
+ set_gdal_config_options({"SHAPE_ENCODING": None})
1349
+
1350
+
1351
+ def test_write_with_mask(tmp_path):
1352
+ # Point(0, 0), null
1353
+ geometry = np.array(
1354
+ [bytes.fromhex("010100000000000000000000000000000000000000")] * 3,
1355
+ dtype=object,
1356
+ )
1357
+ field_data = [np.array([1, 2, 3], dtype="int32")]
1358
+ field_mask = [np.array([False, True, False])]
1359
+ fields = ["col"]
1360
+ meta = dict(geometry_type="Point", crs="EPSG:4326")
1361
+
1362
+ filename = tmp_path / "test.geojson"
1363
+ write(filename, geometry, field_data, fields, field_mask, **meta)
1364
+ result_geometry, result_fields = read(filename)[2:]
1365
+ assert np.array_equal(result_geometry, geometry)
1366
+ np.testing.assert_allclose(result_fields[0], np.array([1, np.nan, 3]))
1367
+
1368
+ # wrong length for mask
1369
+ field_mask = [np.array([False, True])]
1370
+ with pytest.raises(ValueError):
1371
+ write(filename, geometry, field_data, fields, field_mask, **meta)
1372
+
1373
+ # wrong number of mask arrays
1374
+ field_mask = [np.array([False, True, False])] * 2
1375
+ with pytest.raises(ValueError):
1376
+ write(filename, geometry, field_data, fields, field_mask, **meta)
1377
+
1378
+
1379
+ @requires_arrow_api
1380
+ def test_open_arrow_capsule_protocol_without_pyarrow(naturalearth_lowres):
1381
+ # this test is included here instead of test_arrow.py to ensure we also run
1382
+ # it when pyarrow is not installed
1383
+
1384
+ with open_arrow(naturalearth_lowres) as (meta, reader):
1385
+ assert isinstance(meta, dict)
1386
+ assert isinstance(reader, pyogrio._io._ArrowStream)
1387
+ capsule = reader.__arrow_c_stream__()
1388
+ assert (
1389
+ ctypes.pythonapi.PyCapsule_IsValid(
1390
+ ctypes.py_object(capsule), b"arrow_array_stream"
1391
+ )
1392
+ == 1
1393
+ )
1394
+
1395
+
1396
+ @pytest.mark.skipif(HAS_PYARROW, reason="pyarrow is installed")
1397
+ @requires_arrow_api
1398
+ def test_open_arrow_error_no_pyarrow(naturalearth_lowres):
1399
+ # this test is included here instead of test_arrow.py to ensure we run
1400
+ # it when pyarrow is not installed
1401
+
1402
+ with pytest.raises(ImportError):
1403
+ with open_arrow(naturalearth_lowres, use_pyarrow=True) as _:
1404
+ pass