pyogrio 0.12.0__cp314-cp314t-macosx_12_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. pyogrio/.dylibs/libgdal.37.3.11.4.dylib +0 -0
  2. pyogrio/__init__.py +57 -0
  3. pyogrio/_compat.py +54 -0
  4. pyogrio/_env.py +59 -0
  5. pyogrio/_err.cpython-314t-darwin.so +0 -0
  6. pyogrio/_geometry.cpython-314t-darwin.so +0 -0
  7. pyogrio/_io.cpython-314t-darwin.so +0 -0
  8. pyogrio/_ogr.cpython-314t-darwin.so +0 -0
  9. pyogrio/_version.py +21 -0
  10. pyogrio/_vsi.cpython-314t-darwin.so +0 -0
  11. pyogrio/core.py +387 -0
  12. pyogrio/errors.py +25 -0
  13. pyogrio/gdal_data/GDAL-targets-release.cmake +19 -0
  14. pyogrio/gdal_data/GDAL-targets.cmake +106 -0
  15. pyogrio/gdal_data/GDALConfig.cmake +24 -0
  16. pyogrio/gdal_data/GDALConfigVersion.cmake +65 -0
  17. pyogrio/gdal_data/GDALLogoBW.svg +138 -0
  18. pyogrio/gdal_data/GDALLogoColor.svg +126 -0
  19. pyogrio/gdal_data/GDALLogoGS.svg +126 -0
  20. pyogrio/gdal_data/LICENSE.TXT +467 -0
  21. pyogrio/gdal_data/MM_m_idofic.csv +321 -0
  22. pyogrio/gdal_data/copyright +467 -0
  23. pyogrio/gdal_data/cubewerx_extra.wkt +48 -0
  24. pyogrio/gdal_data/default.rsc +0 -0
  25. pyogrio/gdal_data/ecw_cs.wkt +1453 -0
  26. pyogrio/gdal_data/eedaconf.json +23 -0
  27. pyogrio/gdal_data/epsg.wkt +1 -0
  28. pyogrio/gdal_data/esri_StatePlane_extra.wkt +631 -0
  29. pyogrio/gdal_data/gdal_algorithm.schema.json +220 -0
  30. pyogrio/gdal_data/gdalg.schema.json +36 -0
  31. pyogrio/gdal_data/gdalicon.png +0 -0
  32. pyogrio/gdal_data/gdalinfo_output.schema.json +390 -0
  33. pyogrio/gdal_data/gdalmdiminfo_output.schema.json +326 -0
  34. pyogrio/gdal_data/gdaltileindex.xsd +253 -0
  35. pyogrio/gdal_data/gdalvrt.xsd +927 -0
  36. pyogrio/gdal_data/gfs.xsd +246 -0
  37. pyogrio/gdal_data/gml_registry.xml +117 -0
  38. pyogrio/gdal_data/gml_registry.xsd +66 -0
  39. pyogrio/gdal_data/grib2_center.csv +251 -0
  40. pyogrio/gdal_data/grib2_process.csv +102 -0
  41. pyogrio/gdal_data/grib2_subcenter.csv +63 -0
  42. pyogrio/gdal_data/grib2_table_4_2_0_0.csv +261 -0
  43. pyogrio/gdal_data/grib2_table_4_2_0_1.csv +261 -0
  44. pyogrio/gdal_data/grib2_table_4_2_0_13.csv +261 -0
  45. pyogrio/gdal_data/grib2_table_4_2_0_14.csv +261 -0
  46. pyogrio/gdal_data/grib2_table_4_2_0_15.csv +261 -0
  47. pyogrio/gdal_data/grib2_table_4_2_0_16.csv +261 -0
  48. pyogrio/gdal_data/grib2_table_4_2_0_17.csv +11 -0
  49. pyogrio/gdal_data/grib2_table_4_2_0_18.csv +261 -0
  50. pyogrio/gdal_data/grib2_table_4_2_0_19.csv +261 -0
  51. pyogrio/gdal_data/grib2_table_4_2_0_190.csv +261 -0
  52. pyogrio/gdal_data/grib2_table_4_2_0_191.csv +261 -0
  53. pyogrio/gdal_data/grib2_table_4_2_0_2.csv +261 -0
  54. pyogrio/gdal_data/grib2_table_4_2_0_20.csv +261 -0
  55. pyogrio/gdal_data/grib2_table_4_2_0_21.csv +261 -0
  56. pyogrio/gdal_data/grib2_table_4_2_0_3.csv +261 -0
  57. pyogrio/gdal_data/grib2_table_4_2_0_4.csv +261 -0
  58. pyogrio/gdal_data/grib2_table_4_2_0_5.csv +261 -0
  59. pyogrio/gdal_data/grib2_table_4_2_0_6.csv +261 -0
  60. pyogrio/gdal_data/grib2_table_4_2_0_7.csv +261 -0
  61. pyogrio/gdal_data/grib2_table_4_2_10_0.csv +261 -0
  62. pyogrio/gdal_data/grib2_table_4_2_10_1.csv +261 -0
  63. pyogrio/gdal_data/grib2_table_4_2_10_191.csv +261 -0
  64. pyogrio/gdal_data/grib2_table_4_2_10_2.csv +261 -0
  65. pyogrio/gdal_data/grib2_table_4_2_10_3.csv +261 -0
  66. pyogrio/gdal_data/grib2_table_4_2_10_4.csv +261 -0
  67. pyogrio/gdal_data/grib2_table_4_2_1_0.csv +261 -0
  68. pyogrio/gdal_data/grib2_table_4_2_1_1.csv +261 -0
  69. pyogrio/gdal_data/grib2_table_4_2_1_2.csv +261 -0
  70. pyogrio/gdal_data/grib2_table_4_2_20_0.csv +261 -0
  71. pyogrio/gdal_data/grib2_table_4_2_20_1.csv +261 -0
  72. pyogrio/gdal_data/grib2_table_4_2_20_2.csv +261 -0
  73. pyogrio/gdal_data/grib2_table_4_2_2_0.csv +261 -0
  74. pyogrio/gdal_data/grib2_table_4_2_2_3.csv +261 -0
  75. pyogrio/gdal_data/grib2_table_4_2_2_4.csv +261 -0
  76. pyogrio/gdal_data/grib2_table_4_2_2_5.csv +261 -0
  77. pyogrio/gdal_data/grib2_table_4_2_2_6.csv +261 -0
  78. pyogrio/gdal_data/grib2_table_4_2_3_0.csv +261 -0
  79. pyogrio/gdal_data/grib2_table_4_2_3_1.csv +261 -0
  80. pyogrio/gdal_data/grib2_table_4_2_3_2.csv +28 -0
  81. pyogrio/gdal_data/grib2_table_4_2_3_3.csv +8 -0
  82. pyogrio/gdal_data/grib2_table_4_2_3_4.csv +14 -0
  83. pyogrio/gdal_data/grib2_table_4_2_3_5.csv +11 -0
  84. pyogrio/gdal_data/grib2_table_4_2_3_6.csv +11 -0
  85. pyogrio/gdal_data/grib2_table_4_2_4_0.csv +261 -0
  86. pyogrio/gdal_data/grib2_table_4_2_4_1.csv +261 -0
  87. pyogrio/gdal_data/grib2_table_4_2_4_10.csv +261 -0
  88. pyogrio/gdal_data/grib2_table_4_2_4_2.csv +261 -0
  89. pyogrio/gdal_data/grib2_table_4_2_4_3.csv +261 -0
  90. pyogrio/gdal_data/grib2_table_4_2_4_4.csv +261 -0
  91. pyogrio/gdal_data/grib2_table_4_2_4_5.csv +261 -0
  92. pyogrio/gdal_data/grib2_table_4_2_4_6.csv +261 -0
  93. pyogrio/gdal_data/grib2_table_4_2_4_7.csv +261 -0
  94. pyogrio/gdal_data/grib2_table_4_2_4_8.csv +261 -0
  95. pyogrio/gdal_data/grib2_table_4_2_4_9.csv +261 -0
  96. pyogrio/gdal_data/grib2_table_4_2_local_Canada.csv +5 -0
  97. pyogrio/gdal_data/grib2_table_4_2_local_HPC.csv +2 -0
  98. pyogrio/gdal_data/grib2_table_4_2_local_MRMS.csv +175 -0
  99. pyogrio/gdal_data/grib2_table_4_2_local_NCEP.csv +401 -0
  100. pyogrio/gdal_data/grib2_table_4_2_local_NDFD.csv +38 -0
  101. pyogrio/gdal_data/grib2_table_4_2_local_index.csv +7 -0
  102. pyogrio/gdal_data/grib2_table_4_5.csv +261 -0
  103. pyogrio/gdal_data/grib2_table_versions.csv +3 -0
  104. pyogrio/gdal_data/gt_datum.csv +229 -0
  105. pyogrio/gdal_data/gt_ellips.csv +24 -0
  106. pyogrio/gdal_data/header.dxf +1124 -0
  107. pyogrio/gdal_data/inspire_cp_BasicPropertyUnit.gfs +57 -0
  108. pyogrio/gdal_data/inspire_cp_CadastralBoundary.gfs +60 -0
  109. pyogrio/gdal_data/inspire_cp_CadastralParcel.gfs +81 -0
  110. pyogrio/gdal_data/inspire_cp_CadastralZoning.gfs +161 -0
  111. pyogrio/gdal_data/jpfgdgml_AdmArea.gfs +59 -0
  112. pyogrio/gdal_data/jpfgdgml_AdmBdry.gfs +49 -0
  113. pyogrio/gdal_data/jpfgdgml_AdmPt.gfs +59 -0
  114. pyogrio/gdal_data/jpfgdgml_BldA.gfs +54 -0
  115. pyogrio/gdal_data/jpfgdgml_BldL.gfs +54 -0
  116. pyogrio/gdal_data/jpfgdgml_Cntr.gfs +54 -0
  117. pyogrio/gdal_data/jpfgdgml_CommBdry.gfs +49 -0
  118. pyogrio/gdal_data/jpfgdgml_CommPt.gfs +59 -0
  119. pyogrio/gdal_data/jpfgdgml_Cstline.gfs +54 -0
  120. pyogrio/gdal_data/jpfgdgml_ElevPt.gfs +54 -0
  121. pyogrio/gdal_data/jpfgdgml_GCP.gfs +94 -0
  122. pyogrio/gdal_data/jpfgdgml_LeveeEdge.gfs +49 -0
  123. pyogrio/gdal_data/jpfgdgml_RailCL.gfs +54 -0
  124. pyogrio/gdal_data/jpfgdgml_RdASL.gfs +44 -0
  125. pyogrio/gdal_data/jpfgdgml_RdArea.gfs +54 -0
  126. pyogrio/gdal_data/jpfgdgml_RdCompt.gfs +59 -0
  127. pyogrio/gdal_data/jpfgdgml_RdEdg.gfs +59 -0
  128. pyogrio/gdal_data/jpfgdgml_RdMgtBdry.gfs +49 -0
  129. pyogrio/gdal_data/jpfgdgml_RdSgmtA.gfs +59 -0
  130. pyogrio/gdal_data/jpfgdgml_RvrMgtBdry.gfs +49 -0
  131. pyogrio/gdal_data/jpfgdgml_SBAPt.gfs +49 -0
  132. pyogrio/gdal_data/jpfgdgml_SBArea.gfs +54 -0
  133. pyogrio/gdal_data/jpfgdgml_SBBdry.gfs +44 -0
  134. pyogrio/gdal_data/jpfgdgml_WA.gfs +54 -0
  135. pyogrio/gdal_data/jpfgdgml_WL.gfs +54 -0
  136. pyogrio/gdal_data/jpfgdgml_WStrA.gfs +54 -0
  137. pyogrio/gdal_data/jpfgdgml_WStrL.gfs +54 -0
  138. pyogrio/gdal_data/leaflet_template.html +102 -0
  139. pyogrio/gdal_data/nitf_spec.xml +3288 -0
  140. pyogrio/gdal_data/nitf_spec.xsd +171 -0
  141. pyogrio/gdal_data/ogr_fields_override.schema.json +125 -0
  142. pyogrio/gdal_data/ogrinfo_output.schema.json +528 -0
  143. pyogrio/gdal_data/ogrvrt.xsd +528 -0
  144. pyogrio/gdal_data/osmconf.ini +134 -0
  145. pyogrio/gdal_data/ozi_datum.csv +131 -0
  146. pyogrio/gdal_data/ozi_ellips.csv +35 -0
  147. pyogrio/gdal_data/pci_datum.txt +530 -0
  148. pyogrio/gdal_data/pci_ellips.txt +129 -0
  149. pyogrio/gdal_data/pdfcomposition.xsd +703 -0
  150. pyogrio/gdal_data/pds4_template.xml +65 -0
  151. pyogrio/gdal_data/plscenesconf.json +1985 -0
  152. pyogrio/gdal_data/ruian_vf_ob_v1.gfs +1455 -0
  153. pyogrio/gdal_data/ruian_vf_st_uvoh_v1.gfs +86 -0
  154. pyogrio/gdal_data/ruian_vf_st_v1.gfs +1489 -0
  155. pyogrio/gdal_data/ruian_vf_v1.gfs +2126 -0
  156. pyogrio/gdal_data/s57agencies.csv +249 -0
  157. pyogrio/gdal_data/s57attributes.csv +484 -0
  158. pyogrio/gdal_data/s57expectedinput.csv +1008 -0
  159. pyogrio/gdal_data/s57objectclasses.csv +287 -0
  160. pyogrio/gdal_data/seed_2d.dgn +0 -0
  161. pyogrio/gdal_data/seed_3d.dgn +0 -0
  162. pyogrio/gdal_data/stateplane.csv +259 -0
  163. pyogrio/gdal_data/template_tiles.mapml +28 -0
  164. pyogrio/gdal_data/tms_LINZAntarticaMapTileGrid.json +190 -0
  165. pyogrio/gdal_data/tms_MapML_APSTILE.json +268 -0
  166. pyogrio/gdal_data/tms_MapML_CBMTILE.json +346 -0
  167. pyogrio/gdal_data/tms_NZTM2000.json +243 -0
  168. pyogrio/gdal_data/trailer.dxf +434 -0
  169. pyogrio/gdal_data/usage +4 -0
  170. pyogrio/gdal_data/vcpkg-cmake-wrapper.cmake +23 -0
  171. pyogrio/gdal_data/vcpkg.spdx.json +291 -0
  172. pyogrio/gdal_data/vcpkg_abi_info.txt +45 -0
  173. pyogrio/gdal_data/vdv452.xml +349 -0
  174. pyogrio/gdal_data/vdv452.xsd +45 -0
  175. pyogrio/gdal_data/vicar.json +164 -0
  176. pyogrio/geopandas.py +978 -0
  177. pyogrio/proj_data/CH +22 -0
  178. pyogrio/proj_data/GL27 +23 -0
  179. pyogrio/proj_data/ITRF2000 +24 -0
  180. pyogrio/proj_data/ITRF2008 +94 -0
  181. pyogrio/proj_data/ITRF2014 +55 -0
  182. pyogrio/proj_data/ITRF2020 +91 -0
  183. pyogrio/proj_data/copyright +34 -0
  184. pyogrio/proj_data/deformation_model.schema.json +582 -0
  185. pyogrio/proj_data/nad.lst +142 -0
  186. pyogrio/proj_data/nad27 +810 -0
  187. pyogrio/proj_data/nad83 +745 -0
  188. pyogrio/proj_data/other.extra +53 -0
  189. pyogrio/proj_data/proj-config-version.cmake +44 -0
  190. pyogrio/proj_data/proj-config.cmake +79 -0
  191. pyogrio/proj_data/proj-targets-release.cmake +19 -0
  192. pyogrio/proj_data/proj-targets.cmake +107 -0
  193. pyogrio/proj_data/proj.db +0 -0
  194. pyogrio/proj_data/proj.ini +59 -0
  195. pyogrio/proj_data/proj4-targets-release.cmake +19 -0
  196. pyogrio/proj_data/proj4-targets.cmake +107 -0
  197. pyogrio/proj_data/projjson.schema.json +1174 -0
  198. pyogrio/proj_data/triangulation.schema.json +214 -0
  199. pyogrio/proj_data/usage +9 -0
  200. pyogrio/proj_data/vcpkg.spdx.json +203 -0
  201. pyogrio/proj_data/vcpkg_abi_info.txt +28 -0
  202. pyogrio/proj_data/world +214 -0
  203. pyogrio/raw.py +897 -0
  204. pyogrio/tests/__init__.py +0 -0
  205. pyogrio/tests/conftest.py +588 -0
  206. pyogrio/tests/fixtures/README.md +108 -0
  207. pyogrio/tests/fixtures/curve.gpkg +0 -0
  208. pyogrio/tests/fixtures/curvepolygon.gpkg +0 -0
  209. pyogrio/tests/fixtures/line_zm.gpkg +0 -0
  210. pyogrio/tests/fixtures/list_field_values_file.parquet +0 -0
  211. pyogrio/tests/fixtures/list_nested_struct_file.parquet +0 -0
  212. pyogrio/tests/fixtures/multisurface.gpkg +0 -0
  213. pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.cpg +1 -0
  214. pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.dbf +0 -0
  215. pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.prj +1 -0
  216. pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.shp +0 -0
  217. pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.shx +0 -0
  218. pyogrio/tests/fixtures/sample.osm.pbf +0 -0
  219. pyogrio/tests/fixtures/test_gpkg_nulls.gpkg +0 -0
  220. pyogrio/tests/test_arrow.py +1160 -0
  221. pyogrio/tests/test_core.py +702 -0
  222. pyogrio/tests/test_geopandas_io.py +3218 -0
  223. pyogrio/tests/test_path.py +374 -0
  224. pyogrio/tests/test_raw_io.py +1473 -0
  225. pyogrio/tests/test_util.py +56 -0
  226. pyogrio/util.py +258 -0
  227. pyogrio-0.12.0.dist-info/METADATA +125 -0
  228. pyogrio-0.12.0.dist-info/RECORD +231 -0
  229. pyogrio-0.12.0.dist-info/WHEEL +6 -0
  230. pyogrio-0.12.0.dist-info/licenses/LICENSE +21 -0
  231. pyogrio-0.12.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1473 @@
1
+ import contextlib
2
+ import ctypes
3
+ import json
4
+ import sys
5
+ from io import BytesIO
6
+ from zipfile import ZipFile
7
+
8
+ import numpy as np
9
+ from numpy import array_equal
10
+
11
+ import pyogrio
12
+ from pyogrio import (
13
+ __gdal_version__,
14
+ get_gdal_config_option,
15
+ list_drivers,
16
+ list_layers,
17
+ read_info,
18
+ set_gdal_config_options,
19
+ )
20
+ from pyogrio._compat import GDAL_GE_37, HAS_PYARROW, HAS_SHAPELY
21
+ from pyogrio.errors import DataLayerError, DataSourceError, FeatureError
22
+ from pyogrio.raw import open_arrow, read, write
23
+ from pyogrio.tests.conftest import (
24
+ DRIVER_EXT,
25
+ DRIVERS,
26
+ prepare_testfile,
27
+ requires_pyarrow_api,
28
+ requires_shapely,
29
+ )
30
+
31
+ import pytest
32
+
33
+ try:
34
+ import shapely
35
+ except ImportError:
36
+ pass
37
+
38
+
39
+ def test_read(naturalearth_lowres):
40
+ meta, _, geometry, fields = read(naturalearth_lowres)
41
+
42
+ assert meta["crs"] == "EPSG:4326"
43
+ assert meta["geometry_type"] == "Polygon"
44
+ assert meta["encoding"] == "UTF-8"
45
+ assert meta["fields"].shape == (5,)
46
+
47
+ assert meta["fields"].tolist() == [
48
+ "pop_est",
49
+ "continent",
50
+ "name",
51
+ "iso_a3",
52
+ "gdp_md_est",
53
+ ]
54
+
55
+ assert len(fields) == 5
56
+ assert len(geometry) == len(fields[0])
57
+
58
+ # quick test that WKB is a Polygon type
59
+ assert geometry[0][:6] == b"\x01\x06\x00\x00\x00\x03"
60
+
61
+
62
+ @pytest.mark.parametrize("ext", DRIVERS)
63
+ def test_read_autodetect_driver(tmp_path, naturalearth_lowres, ext):
64
+ # Test all supported autodetect drivers
65
+ if ext == ".gpkg.zip" and not GDAL_GE_37:
66
+ pytest.skip(".gpkg.zip not supported for gdal < 3.7.0")
67
+ testfile = prepare_testfile(naturalearth_lowres, dst_dir=tmp_path, ext=ext)
68
+
69
+ assert testfile.exists()
70
+ meta, _, geometry, fields = read(testfile)
71
+
72
+ assert meta["crs"] == "EPSG:4326"
73
+ assert meta["geometry_type"] in ("MultiPolygon", "Polygon", "Unknown")
74
+ assert meta["encoding"] == "UTF-8"
75
+ assert meta["fields"].shape == (5,)
76
+
77
+ assert meta["fields"].tolist() == [
78
+ "pop_est",
79
+ "continent",
80
+ "name",
81
+ "iso_a3",
82
+ "gdp_md_est",
83
+ ]
84
+
85
+ assert len(fields) == 5
86
+ assert len(geometry) == len(fields[0])
87
+
88
+
89
+ def test_read_arrow_unspecified_layer_warning(data_dir):
90
+ """Reading a multi-layer file without specifying a layer gives a warning."""
91
+ with pytest.warns(UserWarning, match="More than one layer found "):
92
+ read(data_dir / "sample.osm.pbf")
93
+
94
+
95
+ def test_read_invalid_layer(naturalearth_lowres):
96
+ with pytest.raises(DataLayerError, match="Layer 'invalid' could not be opened"):
97
+ read(naturalearth_lowres, layer="invalid")
98
+
99
+ with pytest.raises(DataLayerError, match="Layer '-1' could not be opened"):
100
+ read(naturalearth_lowres, layer=-1)
101
+
102
+ with pytest.raises(DataLayerError, match="Layer '2' could not be opened"):
103
+ read(naturalearth_lowres, layer=2)
104
+
105
+
106
+ def test_vsi_read_layers(naturalearth_lowres_vsi):
107
+ _, naturalearth_lowres_vsi = naturalearth_lowres_vsi
108
+ assert array_equal(
109
+ list_layers(naturalearth_lowres_vsi), [["naturalearth_lowres", "Polygon"]]
110
+ )
111
+
112
+ geometry = read(naturalearth_lowres_vsi)[2]
113
+ assert geometry.shape == (177,)
114
+
115
+
116
+ def test_read_no_geometry(naturalearth_lowres):
117
+ geometry = read(naturalearth_lowres, read_geometry=False)[2]
118
+
119
+ assert geometry is None
120
+
121
+
122
+ @requires_shapely
123
+ def test_read_no_geometry__mask(naturalearth_lowres):
124
+ geometry, fields = read(
125
+ naturalearth_lowres,
126
+ read_geometry=False,
127
+ mask=shapely.Point(-105, 55),
128
+ )[2:]
129
+
130
+ assert np.array_equal(fields[3], ["CAN"])
131
+ assert geometry is None
132
+
133
+
134
+ def test_read_no_geometry__bbox(naturalearth_lowres):
135
+ geometry, fields = read(
136
+ naturalearth_lowres,
137
+ read_geometry=False,
138
+ bbox=(-109.0, 55.0, -109.0, 55.0),
139
+ )[2:]
140
+
141
+ assert np.array_equal(fields[3], ["CAN"])
142
+ assert geometry is None
143
+
144
+
145
+ def test_read_no_geometry_no_columns_no_fids(naturalearth_lowres):
146
+ with pytest.raises(
147
+ ValueError,
148
+ match=(
149
+ "at least one of read_geometry or return_fids must be True or columns must "
150
+ "be None or non-empty"
151
+ ),
152
+ ):
153
+ _ = read(
154
+ naturalearth_lowres, columns=[], read_geometry=False, return_fids=False
155
+ )
156
+
157
+
158
+ def test_read_columns(naturalearth_lowres):
159
+ columns = ["NAME", "NAME_LONG"]
160
+ meta, _, geometry, fields = read(
161
+ naturalearth_lowres, columns=columns, read_geometry=False
162
+ )
163
+ array_equal(meta["fields"], columns)
164
+
165
+ # Repeats should be dropped
166
+ columns = ["NAME", "NAME_LONG", "NAME"]
167
+ meta, _, geometry, fields = read(
168
+ naturalearth_lowres, columns=columns, read_geometry=False
169
+ )
170
+ array_equal(meta["fields"], columns[:2])
171
+
172
+
173
+ @pytest.mark.parametrize("skip_features", [10, 200])
174
+ def test_read_skip_features(naturalearth_lowres_all_ext, skip_features):
175
+ expected_geometry, expected_fields = read(naturalearth_lowres_all_ext)[2:]
176
+ geometry, fields = read(naturalearth_lowres_all_ext, skip_features=skip_features)[
177
+ 2:
178
+ ]
179
+
180
+ # skipping more features than available in layer returns empty arrays
181
+ expected_count = max(len(expected_geometry) - skip_features, 0)
182
+
183
+ assert len(geometry) == expected_count
184
+ assert len(fields[0]) == expected_count
185
+
186
+ assert np.array_equal(geometry, expected_geometry[skip_features:])
187
+ # Last field has more variable data
188
+ assert np.array_equal(fields[-1], expected_fields[-1][skip_features:])
189
+
190
+
191
+ def test_read_negative_skip_features(naturalearth_lowres):
192
+ with pytest.raises(ValueError, match="'skip_features' must be >= 0"):
193
+ read(naturalearth_lowres, skip_features=-1)
194
+
195
+
196
+ def test_read_max_features(naturalearth_lowres):
197
+ expected_geometry, expected_fields = read(naturalearth_lowres)[2:]
198
+ geometry, fields = read(naturalearth_lowres, max_features=2)[2:]
199
+
200
+ assert len(geometry) == 2
201
+ assert len(fields[0]) == 2
202
+
203
+ assert np.array_equal(geometry, expected_geometry[:2])
204
+ assert np.array_equal(fields[-1], expected_fields[-1][:2])
205
+
206
+
207
+ def test_read_negative_max_features(naturalearth_lowres):
208
+ with pytest.raises(ValueError, match="'max_features' must be >= 0"):
209
+ read(naturalearth_lowres, max_features=-1)
210
+
211
+
212
+ def test_read_where(naturalearth_lowres):
213
+ # empty filter should return full set of records
214
+ geometry, fields = read(naturalearth_lowres, where="")[2:]
215
+ assert len(geometry) == 177
216
+ assert len(fields) == 5
217
+ assert len(fields[0]) == 177
218
+
219
+ # should return singular item
220
+ geometry, fields = read(naturalearth_lowres, where="iso_a3 = 'CAN'")[2:]
221
+ assert len(geometry) == 1
222
+ assert len(fields) == 5
223
+ assert len(fields[0]) == 1
224
+ assert fields[3] == "CAN"
225
+
226
+ # should return items within range
227
+ geometry, fields = read(
228
+ naturalearth_lowres, where="POP_EST >= 10000000 AND POP_EST < 100000000"
229
+ )[2:]
230
+ assert len(geometry) == 75
231
+ assert min(fields[0]) >= 10000000
232
+ assert max(fields[0]) < 100000000
233
+
234
+ # should match no items
235
+ geometry, fields = read(naturalearth_lowres, where="iso_a3 = 'INVALID'")[2:]
236
+ assert len(geometry) == 0
237
+
238
+
239
+ def test_read_where_invalid(naturalearth_lowres):
240
+ with pytest.raises(ValueError, match="Invalid SQL"):
241
+ read(naturalearth_lowres, where="invalid")
242
+
243
+
244
+ @pytest.mark.parametrize("bbox", [(1,), (1, 2), (1, 2, 3)])
245
+ def test_read_bbox_invalid(naturalearth_lowres, bbox):
246
+ with pytest.raises(ValueError, match="Invalid bbox"):
247
+ read(naturalearth_lowres, bbox=bbox)
248
+
249
+
250
+ def test_read_bbox(naturalearth_lowres_all_ext):
251
+ # should return no features
252
+ geometry, fields = read(naturalearth_lowres_all_ext, bbox=(0, 0, 0.00001, 0.00001))[
253
+ 2:
254
+ ]
255
+
256
+ assert len(geometry) == 0
257
+
258
+ geometry, fields = read(naturalearth_lowres_all_ext, bbox=(-85, 8, -80, 10))[2:]
259
+
260
+ assert len(geometry) == 2
261
+ assert np.array_equal(fields[3], ["PAN", "CRI"])
262
+
263
+
264
+ def test_read_bbox_sql(naturalearth_lowres_all_ext):
265
+ fields = read(
266
+ naturalearth_lowres_all_ext,
267
+ bbox=(-180, 50, -100, 90),
268
+ sql="SELECT * from naturalearth_lowres where iso_a3 not in ('USA', 'RUS')",
269
+ )[3]
270
+ assert len(fields[3]) == 1
271
+ assert np.array_equal(fields[3], ["CAN"])
272
+
273
+
274
+ def test_read_bbox_where(naturalearth_lowres_all_ext):
275
+ fields = read(
276
+ naturalearth_lowres_all_ext,
277
+ bbox=(-180, 50, -100, 90),
278
+ where="iso_a3 not in ('USA', 'RUS')",
279
+ )[3]
280
+ assert len(fields[3]) == 1
281
+ assert np.array_equal(fields[3], ["CAN"])
282
+
283
+
284
+ @requires_shapely
285
+ @pytest.mark.parametrize(
286
+ "mask",
287
+ [
288
+ {"type": "Point", "coordinates": [0, 0]},
289
+ '{"type": "Point", "coordinates": [0, 0]}',
290
+ "invalid",
291
+ ],
292
+ )
293
+ def test_read_mask_invalid(naturalearth_lowres, mask):
294
+ with pytest.raises(ValueError, match="'mask' parameter must be a Shapely geometry"):
295
+ read(naturalearth_lowres, mask=mask)
296
+
297
+
298
+ @requires_shapely
299
+ def test_read_bbox_mask_invalid(naturalearth_lowres):
300
+ with pytest.raises(ValueError, match="cannot set both 'bbox' and 'mask'"):
301
+ read(naturalearth_lowres, bbox=(-85, 8, -80, 10), mask=shapely.Point(-105, 55))
302
+
303
+
304
+ @requires_shapely
305
+ @pytest.mark.parametrize(
306
+ "mask,expected",
307
+ [
308
+ ("POINT (-105 55)", ["CAN"]),
309
+ ("POLYGON ((-80 8, -80 10, -85 10, -85 8, -80 8))", ["PAN", "CRI"]),
310
+ (
311
+ """POLYGON ((
312
+ 6.101929 50.97085,
313
+ 5.773002 50.906611,
314
+ 5.593156 50.642649,
315
+ 6.059271 50.686052,
316
+ 6.374064 50.851481,
317
+ 6.101929 50.97085
318
+ ))""",
319
+ ["DEU", "BEL", "NLD"],
320
+ ),
321
+ (
322
+ """GEOMETRYCOLLECTION (
323
+ POINT (-7.7 53),
324
+ POLYGON ((-80 8, -80 10, -85 10, -85 8, -80 8))
325
+ )""",
326
+ ["PAN", "CRI", "IRL"],
327
+ ),
328
+ ],
329
+ )
330
+ def test_read_mask(naturalearth_lowres_all_ext, mask, expected):
331
+ mask = shapely.from_wkt(mask)
332
+
333
+ geometry, fields = read(naturalearth_lowres_all_ext, mask=mask)[2:]
334
+
335
+ assert np.array_equal(fields[3], expected)
336
+ assert len(geometry) == len(expected)
337
+
338
+
339
+ @requires_shapely
340
+ def test_read_mask_sql(naturalearth_lowres_all_ext):
341
+ fields = read(
342
+ naturalearth_lowres_all_ext,
343
+ mask=shapely.box(-180, 50, -100, 90),
344
+ sql="SELECT * from naturalearth_lowres where iso_a3 not in ('USA', 'RUS')",
345
+ )[3]
346
+ assert len(fields[3]) == 1
347
+ assert np.array_equal(fields[3], ["CAN"])
348
+
349
+
350
+ @requires_shapely
351
+ def test_read_mask_where(naturalearth_lowres_all_ext):
352
+ fields = read(
353
+ naturalearth_lowres_all_ext,
354
+ mask=shapely.box(-180, 50, -100, 90),
355
+ where="iso_a3 not in ('USA', 'RUS')",
356
+ )[3]
357
+ assert len(fields[3]) == 1
358
+ assert np.array_equal(fields[3], ["CAN"])
359
+
360
+
361
+ def test_read_fids(naturalearth_lowres):
362
+ expected_fids, expected_geometry, expected_fields = read(
363
+ naturalearth_lowres, return_fids=True
364
+ )[1:]
365
+ subset = [0, 10, 5]
366
+
367
+ for fids in [subset, np.array(subset)]:
368
+ index, geometry, fields = read(
369
+ naturalearth_lowres, fids=subset, return_fids=True
370
+ )[1:]
371
+
372
+ assert len(fids) == 3
373
+ assert len(geometry) == 3
374
+ assert len(fields[0]) == 3
375
+
376
+ assert np.array_equal(index, expected_fids[subset])
377
+ assert np.array_equal(geometry, expected_geometry[subset])
378
+ assert np.array_equal(fields[-1], expected_fields[-1][subset])
379
+
380
+
381
+ def test_read_fids_out_of_bounds(naturalearth_lowres):
382
+ with pytest.raises(
383
+ FeatureError,
384
+ match=r"Attempt to read shape with feature id \(-1\) out of available range",
385
+ ):
386
+ read(naturalearth_lowres, fids=[-1])
387
+
388
+ with pytest.raises(
389
+ FeatureError,
390
+ match=r"Attempt to read shape with feature id \(200\) out of available range",
391
+ ):
392
+ read(naturalearth_lowres, fids=[200])
393
+
394
+
395
+ def test_read_fids_unsupported_keywords(naturalearth_lowres):
396
+ with pytest.raises(ValueError, match="cannot set both 'fids' and any of"):
397
+ read(naturalearth_lowres, fids=[1], where="iso_a3 = 'CAN'")
398
+
399
+ with pytest.raises(ValueError, match="cannot set both 'fids' and any of"):
400
+ read(naturalearth_lowres, fids=[1], bbox=(-140, 20, -100, 45))
401
+
402
+ with pytest.raises(ValueError, match="cannot set both 'fids' and any of"):
403
+ read(naturalearth_lowres, fids=[1], skip_features=5)
404
+
405
+ with pytest.raises(ValueError, match="cannot set both 'fids' and any of"):
406
+ read(naturalearth_lowres, fids=[1], max_features=5)
407
+
408
+ with pytest.raises(ValueError, match="cannot set both 'fids' and any of"):
409
+ read(naturalearth_lowres, fids=[1], bbox=(0, 0, 0.0001, 0.0001))
410
+
411
+ if HAS_SHAPELY:
412
+ with pytest.raises(ValueError, match="cannot set both 'fids' and any of"):
413
+ read(naturalearth_lowres, fids=[1], mask=shapely.Point(0, 0))
414
+
415
+
416
+ def test_read_return_fids(naturalearth_lowres):
417
+ # default is to not return fids
418
+ fids = read(naturalearth_lowres)[1]
419
+ assert fids is None
420
+
421
+ fids = read(naturalearth_lowres, return_fids=False)[1]
422
+ assert fids is None
423
+
424
+ fids = read(naturalearth_lowres, return_fids=True, skip_features=2, max_features=2)[
425
+ 1
426
+ ]
427
+ assert fids is not None
428
+ assert fids.dtype == np.int64
429
+ # Note: shapefile FIDS start at 0
430
+ assert np.array_equal(fids, np.array([2, 3], dtype="int64"))
431
+
432
+
433
+ def test_read_return_only_fids(naturalearth_lowres):
434
+ _, fids, geometry, field_data = read(
435
+ naturalearth_lowres, columns=[], read_geometry=False, return_fids=True
436
+ )
437
+ assert fids is not None
438
+ assert len(fids) == 177
439
+ assert geometry is None
440
+ assert len(field_data) == 0
441
+
442
+
443
+ @pytest.mark.parametrize("encoding", [None, "ISO-8859-1"])
444
+ def test_write_shp(tmp_path, naturalearth_lowres, encoding):
445
+ meta, _, geometry, field_data = read(naturalearth_lowres)
446
+
447
+ filename = tmp_path / "test.shp"
448
+ meta["encoding"] = encoding
449
+ write(filename, geometry, field_data, **meta)
450
+
451
+ assert filename.exists()
452
+ for ext in (".dbf", ".prj"):
453
+ assert filename.with_suffix(ext).exists()
454
+
455
+ # We write shapefiles in UTF-8 by default on all platforms
456
+ expected_encoding = encoding if encoding is not None else "UTF-8"
457
+ with open(filename.with_suffix(".cpg")) as cpg_file:
458
+ result_encoding = cpg_file.read()
459
+ assert result_encoding == expected_encoding
460
+
461
+
462
+ def test_write_gpkg(tmp_path, naturalearth_lowres):
463
+ meta, _, geometry, field_data = read(naturalearth_lowres)
464
+ meta.update({"geometry_type": "MultiPolygon"})
465
+
466
+ filename = tmp_path / "test.gpkg"
467
+ write(filename, geometry, field_data, driver="GPKG", **meta)
468
+
469
+ assert filename.exists()
470
+
471
+
472
+ def test_write_gpkg_multiple_layers(tmp_path, naturalearth_lowres):
473
+ meta, _, geometry, field_data = read(naturalearth_lowres)
474
+ meta["geometry_type"] = "MultiPolygon"
475
+
476
+ filename = tmp_path / "test.gpkg"
477
+ write(filename, geometry, field_data, driver="GPKG", layer="first", **meta)
478
+
479
+ assert filename.exists()
480
+
481
+ assert np.array_equal(list_layers(filename), [["first", "MultiPolygon"]])
482
+
483
+ write(filename, geometry, field_data, driver="GPKG", layer="second", **meta)
484
+
485
+ assert np.array_equal(
486
+ list_layers(filename), [["first", "MultiPolygon"], ["second", "MultiPolygon"]]
487
+ )
488
+
489
+
490
+ def test_write_geojson(tmp_path, naturalearth_lowres):
491
+ meta, _, geometry, field_data = read(naturalearth_lowres)
492
+
493
+ filename = tmp_path / "test.json"
494
+ write(filename, geometry, field_data, driver="GeoJSON", **meta)
495
+
496
+ assert filename.exists()
497
+
498
+ data = json.loads(open(filename).read())
499
+
500
+ assert data["type"] == "FeatureCollection"
501
+ assert data["name"] == "test"
502
+ assert "crs" in data
503
+ assert len(data["features"]) == len(geometry)
504
+ assert not len(
505
+ set(meta["fields"]).difference(data["features"][0]["properties"].keys())
506
+ )
507
+
508
+
509
+ def test_write_no_fields(tmp_path, naturalearth_lowres):
510
+ """Test writing file with no fields/attribute columns."""
511
+ # Prepare test data
512
+ meta, _, geometry, field_data = read(naturalearth_lowres)
513
+ field_data = None
514
+ meta["fields"] = None
515
+ # naturalearth_lowres actually contains MultiPolygons. A shapefile doesn't make the
516
+ # distinction, so the metadata just reports Polygon. GPKG does, so override here to
517
+ # avoid GDAL warnings.
518
+ meta["geometry_type"] = "MultiPolygon"
519
+
520
+ # Test
521
+ filename = tmp_path / "test.gpkg"
522
+ write(filename, geometry, field_data, driver="GPKG", **meta)
523
+
524
+ # Check result
525
+ assert filename.exists()
526
+ meta, _, geometry, fields = read(filename)
527
+
528
+ assert meta["crs"] == "EPSG:4326"
529
+ assert meta["geometry_type"] == "MultiPolygon"
530
+ assert meta["encoding"] == "UTF-8"
531
+ assert meta["fields"].shape == (0,)
532
+ assert len(fields) == 0
533
+ assert len(geometry) == 177
534
+
535
+ # quick test that WKB is a Polygon type
536
+ assert geometry[0][:6] == b"\x01\x06\x00\x00\x00\x03"
537
+
538
+
539
+ def test_write_no_geom(tmp_path, naturalearth_lowres):
540
+ """Test writing file with no geometry column."""
541
+ # Prepare test data
542
+ meta, _, geometry, field_data = read(naturalearth_lowres)
543
+ geometry = None
544
+ meta["geometry_type"] = None
545
+
546
+ # Test
547
+ filename = tmp_path / "test.gpkg"
548
+ write(filename, geometry, field_data, driver="GPKG", **meta)
549
+
550
+ # Check result
551
+ assert filename.exists()
552
+ meta, _, geometry, fields = read(filename)
553
+
554
+ assert meta["crs"] is None
555
+ assert meta["geometry_type"] is None
556
+ assert meta["encoding"] == "UTF-8"
557
+ assert meta["fields"].shape == (5,)
558
+
559
+ assert meta["fields"].tolist() == [
560
+ "pop_est",
561
+ "continent",
562
+ "name",
563
+ "iso_a3",
564
+ "gdp_md_est",
565
+ ]
566
+
567
+ assert len(fields) == 5
568
+ assert len(fields[0]) == 177
569
+
570
+
571
+ def test_write_no_geom_data(tmp_path, naturalearth_lowres):
572
+ """Test writing file with no geometry data passed but a geometry_type specified.
573
+
574
+ In this case the geometry_type is ignored, so a file without geometry column is
575
+ written.
576
+ """
577
+ # Prepare test data
578
+ meta, _, geometry, field_data = read(naturalearth_lowres)
579
+ # If geometry data is set to None, meta["geometry_type"] is ignored and so no
580
+ # geometry column will be created.
581
+ geometry = None
582
+
583
+ # Test
584
+ filename = tmp_path / "test.gpkg"
585
+ write(filename, geometry, field_data, driver="GPKG", **meta)
586
+
587
+ # Check result
588
+ assert filename.exists()
589
+ result_meta, _, result_geometry, result_field_data = read(filename)
590
+
591
+ assert result_meta["crs"] is None
592
+ assert result_meta["geometry_type"] is None
593
+ assert result_meta["encoding"] == "UTF-8"
594
+ assert result_meta["fields"].shape == (5,)
595
+
596
+ assert result_meta["fields"].tolist() == [
597
+ "pop_est",
598
+ "continent",
599
+ "name",
600
+ "iso_a3",
601
+ "gdp_md_est",
602
+ ]
603
+
604
+ assert len(result_field_data) == 5
605
+ assert len(result_field_data[0]) == 177
606
+ assert result_geometry is None
607
+
608
+
609
+ def test_write_no_geom_no_fields():
610
+ """Test writing file with no geometry column nor fields -> error."""
611
+ with pytest.raises(
612
+ ValueError,
613
+ match="You must provide at least a geometry column or a field",
614
+ ):
615
+ write("test.gpkg", geometry=None, field_data=None, fields=None)
616
+
617
+
618
+ @pytest.mark.parametrize(
619
+ "write_int64",
620
+ [
621
+ False,
622
+ pytest.param(
623
+ True,
624
+ marks=pytest.mark.skipif(
625
+ __gdal_version__ < (3, 9, 0),
626
+ reason="OpenFileGDB write support for int64 values for GDAL >= 3.9.0",
627
+ ),
628
+ ),
629
+ ],
630
+ )
631
+ def test_write_openfilegdb(tmp_path, write_int64):
632
+ # Point(0, 0)
633
+ expected_geometry = np.array(
634
+ [bytes.fromhex("010100000000000000000000000000000000000000")] * 3, dtype=object
635
+ )
636
+ expected_field_data = [
637
+ np.array([True, False, True], dtype="bool"),
638
+ np.array([1, 2, 3], dtype="int16"),
639
+ np.array([1, 2, 3], dtype="int32"),
640
+ np.array([1, 2, 3], dtype="int64"),
641
+ np.array([1, 2, 3], dtype="float32"),
642
+ np.array([1, 2, 3], dtype="float64"),
643
+ ]
644
+ expected_fields = ["bool", "int16", "int32", "int64", "float32", "float64"]
645
+ expected_meta = {
646
+ "geometry_type": "Point",
647
+ "crs": "EPSG:4326",
648
+ "fields": expected_fields,
649
+ }
650
+
651
+ filename = tmp_path / "test.gdb"
652
+
653
+ # int64 is not supported without additional config: https://gdal.org/en/latest/drivers/vector/openfilegdb.html#bit-integer-field-support
654
+ # it is converted to float64 by default and raises a warning
655
+ # (for GDAL >= 3.9.0 only)
656
+ write_params = (
657
+ {"TARGET_ARCGIS_VERSION": "ARCGIS_PRO_3_2_OR_LATER"} if write_int64 else {}
658
+ )
659
+
660
+ if write_int64 or __gdal_version__ < (3, 9, 0):
661
+ ctx = contextlib.nullcontext()
662
+ else:
663
+ ctx = pytest.warns(
664
+ RuntimeWarning, match="Integer64 will be written as a Float64"
665
+ )
666
+
667
+ with ctx:
668
+ write(
669
+ filename,
670
+ expected_geometry,
671
+ expected_field_data,
672
+ driver="OpenFileGDB",
673
+ **expected_meta,
674
+ **write_params,
675
+ )
676
+
677
+ meta, _, geometry, field_data = read(filename)
678
+
679
+ if not write_int64:
680
+ expected_field_data[3] = expected_field_data[3].astype("float64")
681
+
682
+ # bool types are converted to int32
683
+ expected_field_data[0] = expected_field_data[0].astype("int32")
684
+
685
+ assert meta["crs"] == expected_meta["crs"]
686
+ assert np.array_equal(meta["fields"], expected_meta["fields"])
687
+
688
+ assert np.array_equal(geometry, expected_geometry)
689
+ for i in range(len(expected_field_data)):
690
+ assert field_data[i].dtype == expected_field_data[i].dtype
691
+ assert np.array_equal(field_data[i], expected_field_data[i])
692
+
693
+
694
+ @pytest.mark.parametrize("ext", DRIVERS)
695
+ def test_write_append(tmp_path, naturalearth_lowres, ext):
696
+ if ext == ".gpkg.zip":
697
+ pytest.skip("Append to .gpkg.zip is not supported")
698
+
699
+ meta, _, geometry, field_data = read(naturalearth_lowres)
700
+
701
+ # coerce output layer to MultiPolygon to avoid mixed type errors
702
+ meta["geometry_type"] = "MultiPolygon"
703
+
704
+ filename = tmp_path / f"test{ext}"
705
+ write(filename, geometry, field_data, **meta)
706
+
707
+ assert filename.exists()
708
+
709
+ assert read_info(filename)["features"] == 177
710
+
711
+ # write the same records again
712
+ write(filename, geometry, field_data, append=True, **meta)
713
+
714
+ assert read_info(filename)["features"] == 354
715
+
716
+
717
+ @pytest.mark.parametrize("driver,ext", [("GML", ".gml")])
718
+ def test_write_append_unsupported(tmp_path, naturalearth_lowres, driver, ext):
719
+ meta, _, geometry, field_data = read(naturalearth_lowres)
720
+
721
+ # GML does not support append functionality
722
+ filename = tmp_path / f"test{ext}"
723
+ write(filename, geometry, field_data, driver=driver, **meta)
724
+
725
+ assert filename.exists()
726
+
727
+ assert read_info(filename, force_feature_count=True)["features"] == 177
728
+
729
+ with pytest.raises(DataSourceError):
730
+ write(filename, geometry, field_data, driver=driver, append=True, **meta)
731
+
732
+
733
+ @pytest.mark.parametrize(
734
+ "driver",
735
+ {
736
+ driver
737
+ for driver in DRIVERS.values()
738
+ if driver not in ("ESRI Shapefile", "GPKG", "GeoJSON")
739
+ },
740
+ )
741
+ def test_write_supported(tmp_path, naturalearth_lowres, driver):
742
+ """Test drivers known to work that are not specifically tested above"""
743
+ meta, _, geometry, field_data = read(naturalearth_lowres, columns=["iso_a3"])
744
+
745
+ # note: naturalearth_lowres contains mixed polygons / multipolygons, which
746
+ # are not supported in mixed form for all drivers. To get around this here
747
+ # we take the first record only.
748
+ meta["geometry_type"] = "MultiPolygon"
749
+
750
+ filename = tmp_path / f"test{DRIVER_EXT[driver]}"
751
+ write(
752
+ filename,
753
+ geometry[:1],
754
+ field_data=[f[:1] for f in field_data],
755
+ driver=driver,
756
+ **meta,
757
+ )
758
+
759
+ assert filename.exists()
760
+
761
+
762
+ def test_write_unsupported(tmp_path, naturalearth_lowres):
763
+ """Test writing using a driver that does not support writing."""
764
+ meta, _, geometry, field_data = read(naturalearth_lowres)
765
+
766
+ filename = tmp_path / "test.topojson"
767
+
768
+ with pytest.raises(DataSourceError, match="does not support write functionality"):
769
+ write(filename, geometry, field_data, driver="TopoJSON", **meta)
770
+
771
+
772
+ def test_write_gdalclose_error(naturalearth_lowres):
773
+ meta, _, geometry, field_data = read(naturalearth_lowres)
774
+
775
+ filename = "s3://non-existing-bucket/test.geojson"
776
+
777
+ # set config options to avoid errors on open due to GDAL S3 configuration
778
+ set_gdal_config_options(
779
+ {
780
+ "AWS_ACCESS_KEY_ID": "invalid",
781
+ "AWS_SECRET_ACCESS_KEY": "invalid",
782
+ "AWS_NO_SIGN_REQUEST": True,
783
+ }
784
+ )
785
+
786
+ with pytest.raises(DataSourceError, match="Failed to write features to dataset"):
787
+ write(filename, geometry, field_data, **meta)
788
+
789
+
790
+ def assert_equal_result(result1, result2):
791
+ meta1, index1, geometry1, field_data1 = result1
792
+ meta2, index2, geometry2, field_data2 = result2
793
+
794
+ assert np.array_equal(meta1["fields"], meta2["fields"])
795
+ assert np.array_equal(index1, index2)
796
+ assert all(np.array_equal(f1, f2) for f1, f2 in zip(field_data1, field_data2))
797
+
798
+ if HAS_SHAPELY:
799
+ # a plain `assert np.array_equal(geometry1, geometry2)` doesn't work
800
+ # because the WKB values are not exactly equal, therefore parsing with
801
+ # shapely to compare with tolerance
802
+ assert shapely.equals_exact(
803
+ shapely.from_wkb(geometry1), shapely.from_wkb(geometry2), tolerance=0.00001
804
+ ).all()
805
+
806
+
807
+ @pytest.mark.filterwarnings("ignore:File /vsimem:RuntimeWarning") # TODO
808
+ @pytest.mark.parametrize("driver,ext", [("GeoJSON", "geojson"), ("GPKG", "gpkg")])
809
+ def test_read_from_bytes(tmp_path, naturalearth_lowres, driver, ext):
810
+ meta, index, geometry, field_data = read(naturalearth_lowres)
811
+ meta.update({"geometry_type": "Unknown"})
812
+ filename = tmp_path / f"test.{ext}"
813
+ write(filename, geometry, field_data, driver=driver, **meta)
814
+
815
+ with open(filename, "rb") as f:
816
+ buffer = f.read()
817
+
818
+ result2 = read(buffer)
819
+ assert_equal_result((meta, index, geometry, field_data), result2)
820
+
821
+
822
+ def test_read_from_bytes_zipped(naturalearth_lowres_vsi):
823
+ path, vsi_path = naturalearth_lowres_vsi
824
+ meta, index, geometry, field_data = read(vsi_path)
825
+
826
+ with open(path, "rb") as f:
827
+ buffer = f.read()
828
+
829
+ result2 = read(buffer)
830
+ assert_equal_result((meta, index, geometry, field_data), result2)
831
+
832
+
833
+ @pytest.mark.filterwarnings("ignore:File /vsimem:RuntimeWarning") # TODO
834
+ @pytest.mark.parametrize("driver,ext", [("GeoJSON", "geojson"), ("GPKG", "gpkg")])
835
+ def test_read_from_file_like(tmp_path, naturalearth_lowres, driver, ext):
836
+ meta, index, geometry, field_data = read(naturalearth_lowres)
837
+ meta.update({"geometry_type": "Unknown"})
838
+ filename = tmp_path / f"test.{ext}"
839
+ write(filename, geometry, field_data, driver=driver, **meta)
840
+
841
+ with open(filename, "rb") as f:
842
+ result2 = read(f)
843
+
844
+ assert_equal_result((meta, index, geometry, field_data), result2)
845
+
846
+
847
+ def test_read_from_nonseekable_bytes(nonseekable_bytes):
848
+ meta, _, geometry, _ = read(nonseekable_bytes)
849
+ assert meta["fields"].shape == (0,)
850
+ assert len(geometry) == 1
851
+
852
+
853
+ @pytest.mark.parametrize("ext", ["gpkg", "fgb"])
854
+ def test_read_write_data_types_numeric(tmp_path, ext):
855
+ # Point(0, 0)
856
+ geometry = np.array(
857
+ [bytes.fromhex("010100000000000000000000000000000000000000")] * 3, dtype=object
858
+ )
859
+ field_data = [
860
+ np.array([True, False, True], dtype="bool"),
861
+ np.array([1, 2, 3], dtype="int16"),
862
+ np.array([1, 2, 3], dtype="int32"),
863
+ np.array([1, 2, 3], dtype="int64"),
864
+ np.array([1, 2, 3], dtype="float32"),
865
+ np.array([1, 2, 3], dtype="float64"),
866
+ ]
867
+ fields = ["bool", "int16", "int32", "int64", "float32", "float64"]
868
+ meta = {"geometry_type": "Point", "crs": "EPSG:4326", "spatial_index": False}
869
+
870
+ filename = tmp_path / f"test.{ext}"
871
+ write(filename, geometry, field_data, fields, **meta)
872
+ result = read(filename)[3]
873
+ assert all(np.array_equal(f1, f2) for f1, f2 in zip(result, field_data))
874
+ assert all(f1.dtype == f2.dtype for f1, f2 in zip(result, field_data))
875
+
876
+ # other integer data types that don't roundtrip exactly
877
+ # these are generally promoted to a larger integer type except for uint64
878
+ for i, (dtype, result_dtype) in enumerate(
879
+ [
880
+ ("int8", "int16"),
881
+ ("uint8", "int16"),
882
+ ("uint16", "int32"),
883
+ ("uint32", "int64"),
884
+ ("uint64", "int64"),
885
+ ]
886
+ ):
887
+ field_data = [np.array([1, 2, 3], dtype=dtype)]
888
+ filename = tmp_path / f"test{i}.{ext}"
889
+ write(filename, geometry, field_data, ["col"], **meta)
890
+ result = read(filename)[3][0]
891
+ assert np.array_equal(result, np.array([1, 2, 3]))
892
+ assert result.dtype == result_dtype
893
+
894
+
895
+ def test_read_write_datetime(tmp_path):
896
+ field_data = [
897
+ np.array(["2005-02-01", "2005-02-02"], dtype="datetime64[D]"),
898
+ np.array(["2001-01-01T12:00", "2002-02-03T13:56:03"], dtype="datetime64[s]"),
899
+ np.array(
900
+ ["2001-01-01T12:00", "2002-02-03T13:56:03.072"], dtype="datetime64[ms]"
901
+ ),
902
+ np.array(
903
+ ["2001-01-01T12:00", "2002-02-03T13:56:03.072"], dtype="datetime64[ns]"
904
+ ),
905
+ np.array(
906
+ ["2001-01-01T12:00", "2002-02-03T13:56:03.072123456"],
907
+ dtype="datetime64[ns]",
908
+ ),
909
+ # Remark: a None value is automatically converted to np.datetime64("NaT")
910
+ np.array([np.datetime64("NaT"), None], dtype="datetime64[ms]"),
911
+ ]
912
+ fields = [
913
+ "datetime64_d",
914
+ "datetime64_s",
915
+ "datetime64_ms",
916
+ "datetime64_ns",
917
+ "datetime64_precise_ns",
918
+ "datetime64_ms_nat",
919
+ ]
920
+
921
+ # Point(0, 0)
922
+ geometry = np.array(
923
+ [bytes.fromhex("010100000000000000000000000000000000000000")] * 2, dtype=object
924
+ )
925
+ meta = {"geometry_type": "Point", "crs": "EPSG:4326", "spatial_index": False}
926
+
927
+ filename = tmp_path / "test.gpkg"
928
+ write(filename, geometry, field_data, fields, **meta)
929
+ result = read(filename)[3]
930
+ for idx, field in enumerate(fields):
931
+ if field == "datetime64_precise_ns":
932
+ # gdal rounds datetimes to ms
933
+ assert np.array_equal(result[idx], field_data[idx].astype("datetime64[ms]"))
934
+ else:
935
+ assert np.array_equal(result[idx], field_data[idx], equal_nan=True)
936
+
937
+
938
+ @pytest.mark.parametrize("ext", ["gpkg", "fgb"])
939
+ def test_read_write_int64_large(tmp_path, ext):
940
+ # Test if value > max int32 is correctly written and read.
941
+ # Test introduced to validate https://github.com/geopandas/pyogrio/issues/259
942
+ # Point(0, 0)
943
+ geometry = np.array(
944
+ [bytes.fromhex("010100000000000000000000000000000000000000")] * 3, dtype=object
945
+ )
946
+ field_data = [np.array([1, 2192502720, -5], dtype="int64")]
947
+ fields = ["overflow_int64"]
948
+ meta = {"geometry_type": "Point", "crs": "EPSG:4326", "spatial_index": False}
949
+
950
+ filename = tmp_path / f"test.{ext}"
951
+ write(filename, geometry, field_data, fields, **meta)
952
+ result = read(filename)[3]
953
+ assert np.array_equal(result, field_data)
954
+ assert result[0].dtype == field_data[0].dtype
955
+
956
+
957
+ def test_read_data_types_numeric_with_null(test_gpkg_nulls):
958
+ fields = read(test_gpkg_nulls)[3]
959
+
960
+ for i, field in enumerate(fields):
961
+ # last value should be np.nan
962
+ assert np.isnan(field[-1])
963
+
964
+ # all integer fields should be cast to float64; float32 should be preserved
965
+ if i == 9:
966
+ assert field.dtype == "float32"
967
+ else:
968
+ assert field.dtype == "float64"
969
+
970
+
971
+ def test_read_datetime_millisecond(datetime_file):
972
+ field = read(datetime_file)[3][0]
973
+ assert field.dtype == "datetime64[ms]"
974
+ assert field[0] == np.datetime64("2020-01-01 09:00:00.123")
975
+ assert field[1] == np.datetime64("2020-01-01 10:00:00.000")
976
+
977
+
978
+ def test_read_unsupported_ext(tmp_path):
979
+ test_unsupported_path = tmp_path / "test.unsupported"
980
+ with open(test_unsupported_path, "w") as file:
981
+ file.write("column1,column2\n")
982
+ file.write("data1,data2")
983
+
984
+ with pytest.raises(
985
+ DataSourceError, match=".* by prefixing the file path with '<DRIVER>:'.*"
986
+ ):
987
+ read(test_unsupported_path)
988
+
989
+
990
+ def test_read_unsupported_ext_with_prefix(tmp_path):
991
+ test_unsupported_path = tmp_path / "test.unsupported"
992
+ with open(test_unsupported_path, "w") as file:
993
+ file.write("column1,column2\n")
994
+ file.write("data1,data2")
995
+
996
+ _, _, _, field_data = read(f"CSV:{test_unsupported_path}")
997
+ assert len(field_data) == 2
998
+ assert field_data[0] == "data1"
999
+
1000
+
1001
+ def test_read_datetime_as_string(datetime_tz_file):
1002
+ field = read(datetime_tz_file)[3][0]
1003
+ assert field.dtype == "datetime64[ms]"
1004
+ # time zone is ignored in numpy layer
1005
+ assert field[0] == np.datetime64("2020-01-01 09:00:00.123")
1006
+ assert field[1] == np.datetime64("2020-01-01 10:00:00.000")
1007
+
1008
+ field = read(datetime_tz_file, datetime_as_string=True)[3][0]
1009
+ assert field.dtype == "object"
1010
+
1011
+ if __gdal_version__ < (3, 7, 0):
1012
+ # With GDAL < 3.7, datetimes are not returned as ISO8601 strings
1013
+ assert field[0] == "2020/01/01 09:00:00.123-05"
1014
+ assert field[1] == "2020/01/01 10:00:00-05"
1015
+ else:
1016
+ assert field[0] == "2020-01-01T09:00:00.123-05:00"
1017
+ assert field[1] == "2020-01-01T10:00:00-05:00"
1018
+
1019
+
1020
+ @pytest.mark.parametrize("ext", ["gpkg", "geojson"])
1021
+ def test_read_write_null_geometry(tmp_path, ext):
1022
+ # Point(0, 0), null
1023
+ geometry = np.array(
1024
+ [bytes.fromhex("010100000000000000000000000000000000000000"), None],
1025
+ dtype=object,
1026
+ )
1027
+ field_data = [np.array([1, 2], dtype="int32")]
1028
+ fields = ["col"]
1029
+ meta = {"geometry_type": "Point", "crs": "EPSG:4326"}
1030
+ if ext == "gpkg":
1031
+ meta["spatial_index"] = False
1032
+
1033
+ filename = tmp_path / f"test.{ext}"
1034
+ write(filename, geometry, field_data, fields, **meta)
1035
+ result_geometry, result_fields = read(filename)[2:]
1036
+ assert np.array_equal(result_geometry, geometry)
1037
+ assert np.array_equal(result_fields[0], field_data[0])
1038
+
1039
+
1040
+ @pytest.mark.parametrize("dtype", ["float32", "float64"])
1041
+ def test_write_float_nan_null(tmp_path, dtype):
1042
+ # Point(0, 0)
1043
+ geometry = np.array(
1044
+ [bytes.fromhex("010100000000000000000000000000000000000000")] * 2,
1045
+ dtype=object,
1046
+ )
1047
+ field_data = [np.array([1.5, np.nan], dtype=dtype)]
1048
+ fields = ["col"]
1049
+ meta = {"geometry_type": "Point", "crs": "EPSG:4326"}
1050
+ filename = tmp_path / "test.geojson"
1051
+
1052
+ # default nan_as_null=True
1053
+ write(filename, geometry, field_data, fields, **meta)
1054
+ with open(filename) as f:
1055
+ content = f.read()
1056
+ assert '{ "col": null }' in content
1057
+
1058
+ # set to False
1059
+ # by default, GDAL will skip the property for GeoJSON if the value is NaN
1060
+ if dtype == "float32":
1061
+ ctx = pytest.warns(RuntimeWarning, match="NaN of Infinity value found. Skipped")
1062
+ else:
1063
+ ctx = contextlib.nullcontext()
1064
+ with ctx:
1065
+ write(filename, geometry, field_data, fields, **meta, nan_as_null=False)
1066
+ with open(filename) as f:
1067
+ content = f.read()
1068
+ assert '"properties": { }' in content
1069
+
1070
+ # but can instruct GDAL to write NaN to json
1071
+ write(
1072
+ filename,
1073
+ geometry,
1074
+ field_data,
1075
+ fields,
1076
+ **meta,
1077
+ nan_as_null=False,
1078
+ WRITE_NON_FINITE_VALUES="YES",
1079
+ )
1080
+ with open(filename) as f:
1081
+ content = f.read()
1082
+ assert '{ "col": NaN }' in content
1083
+
1084
+
1085
+ @requires_pyarrow_api
1086
+ @pytest.mark.skipif(
1087
+ "Arrow" not in list_drivers(), reason="Arrow driver is not available"
1088
+ )
1089
+ def test_write_float_nan_null_arrow(tmp_path):
1090
+ import pyarrow.feather
1091
+
1092
+ # Point(0, 0)
1093
+ geometry = np.array(
1094
+ [bytes.fromhex("010100000000000000000000000000000000000000")] * 2,
1095
+ dtype=object,
1096
+ )
1097
+ field_data = [np.array([1.5, np.nan], dtype="float64")]
1098
+ fields = ["col"]
1099
+ meta = {"geometry_type": "Point", "crs": "EPSG:4326"}
1100
+ fname = tmp_path / "test.arrow"
1101
+
1102
+ # default nan_as_null=True
1103
+ write(fname, geometry, field_data, fields, driver="Arrow", **meta)
1104
+ table = pyarrow.feather.read_table(fname)
1105
+ assert table["col"].is_null().to_pylist() == [False, True]
1106
+
1107
+ # set to False
1108
+ write(
1109
+ fname, geometry, field_data, fields, driver="Arrow", nan_as_null=False, **meta
1110
+ )
1111
+ table = pyarrow.feather.read_table(fname)
1112
+ assert table["col"].is_null().to_pylist() == [False, False]
1113
+ pc = pytest.importorskip("pyarrow.compute")
1114
+ assert pc.is_nan(table["col"]).to_pylist() == [False, True]
1115
+
1116
+
1117
+ @pytest.mark.filterwarnings("ignore:File /vsimem:RuntimeWarning")
1118
+ @pytest.mark.parametrize("driver", ["GeoJSON", "GPKG"])
1119
+ def test_write_memory(naturalearth_lowres, driver):
1120
+ meta, _, geometry, field_data = read(naturalearth_lowres)
1121
+ meta.update({"geometry_type": "MultiPolygon"})
1122
+
1123
+ buffer = BytesIO()
1124
+ write(buffer, geometry, field_data, driver=driver, layer="test", **meta)
1125
+
1126
+ assert len(buffer.getbuffer()) > 0
1127
+ assert list_layers(buffer)[0][0] == "test"
1128
+
1129
+ actual_meta, _, actual_geometry, actual_field_data = read(buffer)
1130
+
1131
+ assert np.array_equal(actual_meta["fields"], meta["fields"])
1132
+ assert np.array_equal(actual_field_data, field_data)
1133
+ assert len(actual_geometry) == len(geometry)
1134
+
1135
+
1136
+ def test_write_memory_driver_required(naturalearth_lowres):
1137
+ meta, _, geometry, field_data = read(naturalearth_lowres)
1138
+
1139
+ buffer = BytesIO()
1140
+ with pytest.raises(
1141
+ ValueError,
1142
+ match="driver must be provided to write to in-memory file",
1143
+ ):
1144
+ write(buffer, geometry, field_data, driver=None, layer="test", **meta)
1145
+
1146
+
1147
+ @pytest.mark.parametrize("driver", ["ESRI Shapefile", "OpenFileGDB"])
1148
+ def test_write_memory_unsupported_driver(naturalearth_lowres, driver):
1149
+ meta, _, geometry, field_data = read(naturalearth_lowres)
1150
+
1151
+ buffer = BytesIO()
1152
+
1153
+ with pytest.raises(
1154
+ ValueError, match=f"writing to in-memory file is not supported for {driver}"
1155
+ ):
1156
+ write(
1157
+ buffer,
1158
+ geometry,
1159
+ field_data,
1160
+ driver=driver,
1161
+ layer="test",
1162
+ append=True,
1163
+ **meta,
1164
+ )
1165
+
1166
+
1167
+ @pytest.mark.parametrize("driver", ["GeoJSON", "GPKG"])
1168
+ def test_write_memory_append_unsupported(naturalearth_lowres, driver):
1169
+ meta, _, geometry, field_data = read(naturalearth_lowres)
1170
+ meta.update({"geometry_type": "MultiPolygon"})
1171
+
1172
+ buffer = BytesIO()
1173
+
1174
+ with pytest.raises(
1175
+ NotImplementedError, match="append is not supported for in-memory files"
1176
+ ):
1177
+ write(
1178
+ buffer,
1179
+ geometry,
1180
+ field_data,
1181
+ driver=driver,
1182
+ layer="test",
1183
+ append=True,
1184
+ **meta,
1185
+ )
1186
+
1187
+
1188
+ def test_write_memory_existing_unsupported(naturalearth_lowres):
1189
+ meta, _, geometry, field_data = read(naturalearth_lowres)
1190
+
1191
+ buffer = BytesIO(b"0000")
1192
+ with pytest.raises(
1193
+ NotImplementedError,
1194
+ match="writing to existing in-memory object is not supported",
1195
+ ):
1196
+ write(buffer, geometry, field_data, driver="GeoJSON", layer="test", **meta)
1197
+
1198
+
1199
+ def test_write_open_file_handle(tmp_path, naturalearth_lowres):
1200
+ """Verify that writing to an open file handle is not currently supported"""
1201
+
1202
+ meta, _, geometry, field_data = read(naturalearth_lowres)
1203
+
1204
+ # verify it fails for regular file handle
1205
+ with pytest.raises(
1206
+ NotImplementedError, match="writing to an open file handle is not yet supported"
1207
+ ):
1208
+ with open(tmp_path / "test.geojson", "wb") as f:
1209
+ write(f, geometry, field_data, driver="GeoJSON", layer="test", **meta)
1210
+
1211
+ # verify it fails for ZipFile
1212
+ with pytest.raises(
1213
+ NotImplementedError, match="writing to an open file handle is not yet supported"
1214
+ ):
1215
+ with ZipFile(tmp_path / "test.geojson.zip", "w") as z:
1216
+ with z.open("test.geojson", "w") as f:
1217
+ write(f, geometry, field_data, driver="GeoJSON", layer="test", **meta)
1218
+
1219
+
1220
+ @pytest.mark.parametrize("ext", ["fgb", "gpkg", "geojson"])
1221
+ @pytest.mark.parametrize(
1222
+ "read_encoding,write_encoding",
1223
+ [
1224
+ pytest.param(
1225
+ None,
1226
+ None,
1227
+ marks=pytest.mark.skipif(
1228
+ sys.platform == "win32", reason="must specify write encoding on Windows"
1229
+ ),
1230
+ ),
1231
+ pytest.param(
1232
+ "UTF-8",
1233
+ None,
1234
+ marks=pytest.mark.skipif(
1235
+ sys.platform == "win32", reason="must specify write encoding on Windows"
1236
+ ),
1237
+ ),
1238
+ (None, "UTF-8"),
1239
+ ("UTF-8", "UTF-8"),
1240
+ ],
1241
+ )
1242
+ def test_encoding_io(tmp_path, ext, read_encoding, write_encoding):
1243
+ # Point(0, 0)
1244
+ geometry = np.array(
1245
+ [bytes.fromhex("010100000000000000000000000000000000000000")], dtype=object
1246
+ )
1247
+ arabic = "العربية"
1248
+ cree = "ᓀᐦᐃᔭᐍᐏᐣ"
1249
+ mandarin = "中文"
1250
+ field_data = [
1251
+ np.array([arabic], dtype=object),
1252
+ np.array([cree], dtype=object),
1253
+ np.array([mandarin], dtype=object),
1254
+ ]
1255
+ fields = [arabic, cree, mandarin]
1256
+ meta = {"geometry_type": "Point", "crs": "EPSG:4326", "encoding": write_encoding}
1257
+
1258
+ filename = tmp_path / f"test.{ext}"
1259
+ write(filename, geometry, field_data, fields, **meta)
1260
+
1261
+ actual_meta, _, _, actual_field_data = read(filename, encoding=read_encoding)
1262
+ assert np.array_equal(fields, actual_meta["fields"])
1263
+ assert np.array_equal(field_data, actual_field_data)
1264
+ assert np.array_equal(fields, read_info(filename, encoding=read_encoding)["fields"])
1265
+
1266
+
1267
+ @pytest.mark.parametrize(
1268
+ "read_encoding,write_encoding",
1269
+ [
1270
+ pytest.param(
1271
+ None,
1272
+ None,
1273
+ marks=pytest.mark.skipif(
1274
+ sys.platform == "win32", reason="must specify write encoding on Windows"
1275
+ ),
1276
+ ),
1277
+ pytest.param(
1278
+ "UTF-8",
1279
+ None,
1280
+ marks=pytest.mark.skipif(
1281
+ sys.platform == "win32", reason="must specify write encoding on Windows"
1282
+ ),
1283
+ ),
1284
+ (None, "UTF-8"),
1285
+ ("UTF-8", "UTF-8"),
1286
+ ],
1287
+ )
1288
+ def test_encoding_io_shapefile(tmp_path, read_encoding, write_encoding):
1289
+ # Point(0, 0)
1290
+ geometry = np.array(
1291
+ [bytes.fromhex("010100000000000000000000000000000000000000")], dtype=object
1292
+ )
1293
+ arabic = "العربية"
1294
+ cree = "ᓀᐦᐃᔭᐍᐏᐣ"
1295
+ mandarin = "中文"
1296
+ field_data = [
1297
+ np.array([arabic], dtype=object),
1298
+ np.array([cree], dtype=object),
1299
+ np.array([mandarin], dtype=object),
1300
+ ]
1301
+
1302
+ # Field names are longer than 10 bytes and get truncated badly (not at UTF-8
1303
+ # character level) by GDAL when output to shapefile, so we have to truncate
1304
+ # before writing
1305
+ fields = [arabic[:5], cree[:3], mandarin]
1306
+ meta = {"geometry_type": "Point", "crs": "EPSG:4326", "encoding": "UTF-8"}
1307
+
1308
+ filename = tmp_path / "test.shp"
1309
+ # NOTE: GDAL automatically creates a cpg file with the encoding name, which
1310
+ # means that if we read this without specifying the encoding it uses the
1311
+ # correct one
1312
+ write(filename, geometry, field_data, fields, **meta)
1313
+
1314
+ actual_meta, _, _, actual_field_data = read(filename, encoding=read_encoding)
1315
+ assert np.array_equal(fields, actual_meta["fields"])
1316
+ assert np.array_equal(field_data, actual_field_data)
1317
+ assert np.array_equal(fields, read_info(filename, encoding=read_encoding)["fields"])
1318
+
1319
+ # verify that if cpg file is not present, that user-provided encoding is used,
1320
+ # otherwise it defaults to ISO-8859-1
1321
+ if read_encoding is not None:
1322
+ filename.with_suffix(".cpg").unlink()
1323
+ actual_meta, _, _, actual_field_data = read(filename, encoding=read_encoding)
1324
+ assert np.array_equal(fields, actual_meta["fields"])
1325
+ assert np.array_equal(field_data, actual_field_data)
1326
+ assert np.array_equal(
1327
+ fields, read_info(filename, encoding=read_encoding)["fields"]
1328
+ )
1329
+
1330
+
1331
+ @pytest.mark.parametrize("ext", ["gpkg", "geojson"])
1332
+ def test_non_utf8_encoding_io(tmp_path, ext, encoded_text):
1333
+ """Verify that we write non-UTF data to the data source
1334
+
1335
+ IMPORTANT: this may not be valid for the data source and will likely render
1336
+ them unusable in other tools, but should successfully roundtrip unless we
1337
+ disable writing using other encodings.
1338
+
1339
+ NOTE: FlatGeobuff driver cannot handle non-UTF data in GDAL >= 3.9
1340
+ """
1341
+ encoding, text = encoded_text
1342
+
1343
+ # Point(0, 0)
1344
+ geometry = np.array(
1345
+ [bytes.fromhex("010100000000000000000000000000000000000000")], dtype=object
1346
+ )
1347
+
1348
+ field_data = [np.array([text], dtype=object)]
1349
+
1350
+ fields = [text]
1351
+ meta = {"geometry_type": "Point", "crs": "EPSG:4326", "encoding": encoding}
1352
+
1353
+ filename = tmp_path / f"test.{ext}"
1354
+ write(filename, geometry, field_data, fields, **meta)
1355
+
1356
+ # cannot open these files without specifying encoding
1357
+ with pytest.raises(UnicodeDecodeError):
1358
+ read(filename)
1359
+
1360
+ with pytest.raises(UnicodeDecodeError):
1361
+ read_info(filename)
1362
+
1363
+ # must provide encoding to read these properly
1364
+ actual_meta, _, _, actual_field_data = read(filename, encoding=encoding)
1365
+ assert actual_meta["fields"][0] == text
1366
+ assert actual_field_data[0] == text
1367
+ assert read_info(filename, encoding=encoding)["fields"][0] == text
1368
+
1369
+
1370
+ def test_non_utf8_encoding_io_shapefile(tmp_path, encoded_text):
1371
+ encoding, text = encoded_text
1372
+
1373
+ # Point(0, 0)
1374
+ geometry = np.array(
1375
+ [bytes.fromhex("010100000000000000000000000000000000000000")], dtype=object
1376
+ )
1377
+
1378
+ field_data = [np.array([text], dtype=object)]
1379
+
1380
+ fields = [text]
1381
+ meta = {"geometry_type": "Point", "crs": "EPSG:4326", "encoding": encoding}
1382
+
1383
+ filename = tmp_path / "test.shp"
1384
+ write(filename, geometry, field_data, fields, **meta)
1385
+
1386
+ # NOTE: GDAL automatically creates a cpg file with the encoding name, which
1387
+ # means that if we read this without specifying the encoding it uses the
1388
+ # correct one
1389
+ actual_meta, _, _, actual_field_data = read(filename)
1390
+ assert actual_meta["fields"][0] == text
1391
+ assert actual_field_data[0] == text
1392
+ assert read_info(filename)["fields"][0] == text
1393
+
1394
+ # verify that if cpg file is not present, that user-provided encoding must be used
1395
+ filename.with_suffix(".cpg").unlink()
1396
+
1397
+ # We will assume ISO-8859-1, which is wrong
1398
+ miscoded = text.encode(encoding).decode("ISO-8859-1")
1399
+ bad_meta, _, _, bad_field_data = read(filename)
1400
+ assert bad_meta["fields"][0] == miscoded
1401
+ assert bad_field_data[0] == miscoded
1402
+ assert read_info(filename)["fields"][0] == miscoded
1403
+
1404
+ # If encoding is provided, that should yield correct text
1405
+ actual_meta, _, _, actual_field_data = read(filename, encoding=encoding)
1406
+ assert actual_meta["fields"][0] == text
1407
+ assert actual_field_data[0] == text
1408
+ assert read_info(filename, encoding=encoding)["fields"][0] == text
1409
+
1410
+ # verify that setting encoding does not corrupt SHAPE_ENCODING option if set
1411
+ # globally (it is ignored during read when encoding is specified by user)
1412
+ try:
1413
+ set_gdal_config_options({"SHAPE_ENCODING": "CP1254"})
1414
+ _ = read(filename, encoding=encoding)
1415
+ assert get_gdal_config_option("SHAPE_ENCODING") == "CP1254"
1416
+
1417
+ finally:
1418
+ # reset to clear between tests
1419
+ set_gdal_config_options({"SHAPE_ENCODING": None})
1420
+
1421
+
1422
+ def test_write_with_mask(tmp_path):
1423
+ # Point(0, 0), null
1424
+ geometry = np.array(
1425
+ [bytes.fromhex("010100000000000000000000000000000000000000")] * 3,
1426
+ dtype=object,
1427
+ )
1428
+ field_data = [np.array([1, 2, 3], dtype="int32")]
1429
+ field_mask = [np.array([False, True, False])]
1430
+ fields = ["col"]
1431
+ meta = {"geometry_type": "Point", "crs": "EPSG:4326"}
1432
+
1433
+ filename = tmp_path / "test.geojson"
1434
+ write(filename, geometry, field_data, fields, field_mask, **meta)
1435
+ result_geometry, result_fields = read(filename)[2:]
1436
+ assert np.array_equal(result_geometry, geometry)
1437
+ np.testing.assert_allclose(result_fields[0], np.array([1, np.nan, 3]))
1438
+
1439
+ # wrong length for mask
1440
+ field_mask = [np.array([False, True])]
1441
+ with pytest.raises(ValueError):
1442
+ write(filename, geometry, field_data, fields, field_mask, **meta)
1443
+
1444
+ # wrong number of mask arrays
1445
+ field_mask = [np.array([False, True, False])] * 2
1446
+ with pytest.raises(ValueError):
1447
+ write(filename, geometry, field_data, fields, field_mask, **meta)
1448
+
1449
+
1450
+ def test_open_arrow_capsule_protocol_without_pyarrow(naturalearth_lowres):
1451
+ # this test is included here instead of test_arrow.py to ensure we also run
1452
+ # it when pyarrow is not installed
1453
+
1454
+ with open_arrow(naturalearth_lowres) as (meta, reader):
1455
+ assert isinstance(meta, dict)
1456
+ assert isinstance(reader, pyogrio._io._ArrowStream)
1457
+ capsule = reader.__arrow_c_stream__()
1458
+ assert (
1459
+ ctypes.pythonapi.PyCapsule_IsValid(
1460
+ ctypes.py_object(capsule), b"arrow_array_stream"
1461
+ )
1462
+ == 1
1463
+ )
1464
+
1465
+
1466
+ @pytest.mark.skipif(HAS_PYARROW, reason="pyarrow is installed")
1467
+ def test_open_arrow_error_no_pyarrow(naturalearth_lowres):
1468
+ # this test is included here instead of test_arrow.py to ensure we run
1469
+ # it when pyarrow is not installed
1470
+
1471
+ with pytest.raises(ImportError):
1472
+ with open_arrow(naturalearth_lowres, use_pyarrow=True) as _:
1473
+ pass