pyogrio 0.10.0__cp313-cp313-macosx_12_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyogrio might be problematic. Click here for more details.

Files changed (223) hide show
  1. pyogrio/.dylibs/libgdal.35.3.9.1.dylib +0 -0
  2. pyogrio/__init__.py +55 -0
  3. pyogrio/_compat.py +47 -0
  4. pyogrio/_env.py +59 -0
  5. pyogrio/_err.cpython-313-darwin.so +0 -0
  6. pyogrio/_geometry.cpython-313-darwin.so +0 -0
  7. pyogrio/_io.cpython-313-darwin.so +0 -0
  8. pyogrio/_ogr.cpython-313-darwin.so +0 -0
  9. pyogrio/_version.py +21 -0
  10. pyogrio/_vsi.cpython-313-darwin.so +0 -0
  11. pyogrio/core.py +386 -0
  12. pyogrio/errors.py +25 -0
  13. pyogrio/gdal_data/GDAL-targets-release.cmake +19 -0
  14. pyogrio/gdal_data/GDAL-targets.cmake +105 -0
  15. pyogrio/gdal_data/GDALConfig.cmake +24 -0
  16. pyogrio/gdal_data/GDALConfigVersion.cmake +85 -0
  17. pyogrio/gdal_data/GDALLogoBW.svg +138 -0
  18. pyogrio/gdal_data/GDALLogoColor.svg +126 -0
  19. pyogrio/gdal_data/GDALLogoGS.svg +126 -0
  20. pyogrio/gdal_data/LICENSE.TXT +467 -0
  21. pyogrio/gdal_data/MM_m_idofic.csv +321 -0
  22. pyogrio/gdal_data/copyright +467 -0
  23. pyogrio/gdal_data/cubewerx_extra.wkt +48 -0
  24. pyogrio/gdal_data/default.rsc +0 -0
  25. pyogrio/gdal_data/ecw_cs.wkt +1453 -0
  26. pyogrio/gdal_data/eedaconf.json +23 -0
  27. pyogrio/gdal_data/epsg.wkt +1 -0
  28. pyogrio/gdal_data/esri_StatePlane_extra.wkt +631 -0
  29. pyogrio/gdal_data/gdalicon.png +0 -0
  30. pyogrio/gdal_data/gdalinfo_output.schema.json +346 -0
  31. pyogrio/gdal_data/gdalmdiminfo_output.schema.json +321 -0
  32. pyogrio/gdal_data/gdaltileindex.xsd +269 -0
  33. pyogrio/gdal_data/gdalvrt.xsd +880 -0
  34. pyogrio/gdal_data/gfs.xsd +246 -0
  35. pyogrio/gdal_data/gml_registry.xml +117 -0
  36. pyogrio/gdal_data/gml_registry.xsd +66 -0
  37. pyogrio/gdal_data/grib2_center.csv +251 -0
  38. pyogrio/gdal_data/grib2_process.csv +102 -0
  39. pyogrio/gdal_data/grib2_subcenter.csv +63 -0
  40. pyogrio/gdal_data/grib2_table_4_2_0_0.csv +261 -0
  41. pyogrio/gdal_data/grib2_table_4_2_0_1.csv +261 -0
  42. pyogrio/gdal_data/grib2_table_4_2_0_13.csv +261 -0
  43. pyogrio/gdal_data/grib2_table_4_2_0_14.csv +261 -0
  44. pyogrio/gdal_data/grib2_table_4_2_0_15.csv +261 -0
  45. pyogrio/gdal_data/grib2_table_4_2_0_16.csv +261 -0
  46. pyogrio/gdal_data/grib2_table_4_2_0_17.csv +11 -0
  47. pyogrio/gdal_data/grib2_table_4_2_0_18.csv +261 -0
  48. pyogrio/gdal_data/grib2_table_4_2_0_19.csv +261 -0
  49. pyogrio/gdal_data/grib2_table_4_2_0_190.csv +261 -0
  50. pyogrio/gdal_data/grib2_table_4_2_0_191.csv +261 -0
  51. pyogrio/gdal_data/grib2_table_4_2_0_2.csv +261 -0
  52. pyogrio/gdal_data/grib2_table_4_2_0_20.csv +261 -0
  53. pyogrio/gdal_data/grib2_table_4_2_0_21.csv +261 -0
  54. pyogrio/gdal_data/grib2_table_4_2_0_3.csv +261 -0
  55. pyogrio/gdal_data/grib2_table_4_2_0_4.csv +261 -0
  56. pyogrio/gdal_data/grib2_table_4_2_0_5.csv +261 -0
  57. pyogrio/gdal_data/grib2_table_4_2_0_6.csv +261 -0
  58. pyogrio/gdal_data/grib2_table_4_2_0_7.csv +261 -0
  59. pyogrio/gdal_data/grib2_table_4_2_10_0.csv +261 -0
  60. pyogrio/gdal_data/grib2_table_4_2_10_1.csv +261 -0
  61. pyogrio/gdal_data/grib2_table_4_2_10_191.csv +261 -0
  62. pyogrio/gdal_data/grib2_table_4_2_10_2.csv +261 -0
  63. pyogrio/gdal_data/grib2_table_4_2_10_3.csv +261 -0
  64. pyogrio/gdal_data/grib2_table_4_2_10_4.csv +261 -0
  65. pyogrio/gdal_data/grib2_table_4_2_1_0.csv +261 -0
  66. pyogrio/gdal_data/grib2_table_4_2_1_1.csv +261 -0
  67. pyogrio/gdal_data/grib2_table_4_2_1_2.csv +261 -0
  68. pyogrio/gdal_data/grib2_table_4_2_20_0.csv +261 -0
  69. pyogrio/gdal_data/grib2_table_4_2_20_1.csv +261 -0
  70. pyogrio/gdal_data/grib2_table_4_2_20_2.csv +261 -0
  71. pyogrio/gdal_data/grib2_table_4_2_2_0.csv +261 -0
  72. pyogrio/gdal_data/grib2_table_4_2_2_3.csv +261 -0
  73. pyogrio/gdal_data/grib2_table_4_2_2_4.csv +261 -0
  74. pyogrio/gdal_data/grib2_table_4_2_2_5.csv +261 -0
  75. pyogrio/gdal_data/grib2_table_4_2_2_6.csv +261 -0
  76. pyogrio/gdal_data/grib2_table_4_2_3_0.csv +261 -0
  77. pyogrio/gdal_data/grib2_table_4_2_3_1.csv +261 -0
  78. pyogrio/gdal_data/grib2_table_4_2_3_2.csv +28 -0
  79. pyogrio/gdal_data/grib2_table_4_2_3_3.csv +8 -0
  80. pyogrio/gdal_data/grib2_table_4_2_3_4.csv +14 -0
  81. pyogrio/gdal_data/grib2_table_4_2_3_5.csv +11 -0
  82. pyogrio/gdal_data/grib2_table_4_2_3_6.csv +11 -0
  83. pyogrio/gdal_data/grib2_table_4_2_4_0.csv +261 -0
  84. pyogrio/gdal_data/grib2_table_4_2_4_1.csv +261 -0
  85. pyogrio/gdal_data/grib2_table_4_2_4_10.csv +261 -0
  86. pyogrio/gdal_data/grib2_table_4_2_4_2.csv +261 -0
  87. pyogrio/gdal_data/grib2_table_4_2_4_3.csv +261 -0
  88. pyogrio/gdal_data/grib2_table_4_2_4_4.csv +261 -0
  89. pyogrio/gdal_data/grib2_table_4_2_4_5.csv +261 -0
  90. pyogrio/gdal_data/grib2_table_4_2_4_6.csv +261 -0
  91. pyogrio/gdal_data/grib2_table_4_2_4_7.csv +261 -0
  92. pyogrio/gdal_data/grib2_table_4_2_4_8.csv +261 -0
  93. pyogrio/gdal_data/grib2_table_4_2_4_9.csv +261 -0
  94. pyogrio/gdal_data/grib2_table_4_2_local_Canada.csv +5 -0
  95. pyogrio/gdal_data/grib2_table_4_2_local_HPC.csv +2 -0
  96. pyogrio/gdal_data/grib2_table_4_2_local_MRMS.csv +175 -0
  97. pyogrio/gdal_data/grib2_table_4_2_local_NCEP.csv +401 -0
  98. pyogrio/gdal_data/grib2_table_4_2_local_NDFD.csv +38 -0
  99. pyogrio/gdal_data/grib2_table_4_2_local_index.csv +7 -0
  100. pyogrio/gdal_data/grib2_table_4_5.csv +261 -0
  101. pyogrio/gdal_data/grib2_table_versions.csv +3 -0
  102. pyogrio/gdal_data/gt_datum.csv +229 -0
  103. pyogrio/gdal_data/gt_ellips.csv +24 -0
  104. pyogrio/gdal_data/header.dxf +1124 -0
  105. pyogrio/gdal_data/inspire_cp_BasicPropertyUnit.gfs +57 -0
  106. pyogrio/gdal_data/inspire_cp_CadastralBoundary.gfs +60 -0
  107. pyogrio/gdal_data/inspire_cp_CadastralParcel.gfs +81 -0
  108. pyogrio/gdal_data/inspire_cp_CadastralZoning.gfs +161 -0
  109. pyogrio/gdal_data/jpfgdgml_AdmArea.gfs +59 -0
  110. pyogrio/gdal_data/jpfgdgml_AdmBdry.gfs +49 -0
  111. pyogrio/gdal_data/jpfgdgml_AdmPt.gfs +59 -0
  112. pyogrio/gdal_data/jpfgdgml_BldA.gfs +54 -0
  113. pyogrio/gdal_data/jpfgdgml_BldL.gfs +54 -0
  114. pyogrio/gdal_data/jpfgdgml_Cntr.gfs +54 -0
  115. pyogrio/gdal_data/jpfgdgml_CommBdry.gfs +49 -0
  116. pyogrio/gdal_data/jpfgdgml_CommPt.gfs +59 -0
  117. pyogrio/gdal_data/jpfgdgml_Cstline.gfs +54 -0
  118. pyogrio/gdal_data/jpfgdgml_ElevPt.gfs +54 -0
  119. pyogrio/gdal_data/jpfgdgml_GCP.gfs +94 -0
  120. pyogrio/gdal_data/jpfgdgml_LeveeEdge.gfs +49 -0
  121. pyogrio/gdal_data/jpfgdgml_RailCL.gfs +54 -0
  122. pyogrio/gdal_data/jpfgdgml_RdASL.gfs +44 -0
  123. pyogrio/gdal_data/jpfgdgml_RdArea.gfs +54 -0
  124. pyogrio/gdal_data/jpfgdgml_RdCompt.gfs +59 -0
  125. pyogrio/gdal_data/jpfgdgml_RdEdg.gfs +59 -0
  126. pyogrio/gdal_data/jpfgdgml_RdMgtBdry.gfs +49 -0
  127. pyogrio/gdal_data/jpfgdgml_RdSgmtA.gfs +59 -0
  128. pyogrio/gdal_data/jpfgdgml_RvrMgtBdry.gfs +49 -0
  129. pyogrio/gdal_data/jpfgdgml_SBAPt.gfs +49 -0
  130. pyogrio/gdal_data/jpfgdgml_SBArea.gfs +54 -0
  131. pyogrio/gdal_data/jpfgdgml_SBBdry.gfs +44 -0
  132. pyogrio/gdal_data/jpfgdgml_WA.gfs +54 -0
  133. pyogrio/gdal_data/jpfgdgml_WL.gfs +54 -0
  134. pyogrio/gdal_data/jpfgdgml_WStrA.gfs +54 -0
  135. pyogrio/gdal_data/jpfgdgml_WStrL.gfs +54 -0
  136. pyogrio/gdal_data/nitf_spec.xml +3306 -0
  137. pyogrio/gdal_data/nitf_spec.xsd +189 -0
  138. pyogrio/gdal_data/ogrinfo_output.schema.json +528 -0
  139. pyogrio/gdal_data/ogrvrt.xsd +546 -0
  140. pyogrio/gdal_data/osmconf.ini +132 -0
  141. pyogrio/gdal_data/ozi_datum.csv +131 -0
  142. pyogrio/gdal_data/ozi_ellips.csv +35 -0
  143. pyogrio/gdal_data/pci_datum.txt +530 -0
  144. pyogrio/gdal_data/pci_ellips.txt +129 -0
  145. pyogrio/gdal_data/pdfcomposition.xsd +721 -0
  146. pyogrio/gdal_data/pds4_template.xml +65 -0
  147. pyogrio/gdal_data/plscenesconf.json +1985 -0
  148. pyogrio/gdal_data/ruian_vf_ob_v1.gfs +1455 -0
  149. pyogrio/gdal_data/ruian_vf_st_uvoh_v1.gfs +86 -0
  150. pyogrio/gdal_data/ruian_vf_st_v1.gfs +1489 -0
  151. pyogrio/gdal_data/ruian_vf_v1.gfs +2126 -0
  152. pyogrio/gdal_data/s57agencies.csv +249 -0
  153. pyogrio/gdal_data/s57attributes.csv +484 -0
  154. pyogrio/gdal_data/s57expectedinput.csv +1008 -0
  155. pyogrio/gdal_data/s57objectclasses.csv +287 -0
  156. pyogrio/gdal_data/seed_2d.dgn +0 -0
  157. pyogrio/gdal_data/seed_3d.dgn +0 -0
  158. pyogrio/gdal_data/stateplane.csv +259 -0
  159. pyogrio/gdal_data/tms_LINZAntarticaMapTileGrid.json +190 -0
  160. pyogrio/gdal_data/tms_MapML_APSTILE.json +268 -0
  161. pyogrio/gdal_data/tms_MapML_CBMTILE.json +346 -0
  162. pyogrio/gdal_data/tms_NZTM2000.json +243 -0
  163. pyogrio/gdal_data/trailer.dxf +434 -0
  164. pyogrio/gdal_data/usage +4 -0
  165. pyogrio/gdal_data/vcpkg-cmake-wrapper.cmake +23 -0
  166. pyogrio/gdal_data/vcpkg.spdx.json +264 -0
  167. pyogrio/gdal_data/vcpkg_abi_info.txt +41 -0
  168. pyogrio/gdal_data/vdv452.xml +367 -0
  169. pyogrio/gdal_data/vdv452.xsd +63 -0
  170. pyogrio/gdal_data/vicar.json +164 -0
  171. pyogrio/geopandas.py +683 -0
  172. pyogrio/proj_data/CH +22 -0
  173. pyogrio/proj_data/GL27 +23 -0
  174. pyogrio/proj_data/ITRF2000 +24 -0
  175. pyogrio/proj_data/ITRF2008 +94 -0
  176. pyogrio/proj_data/ITRF2014 +55 -0
  177. pyogrio/proj_data/copyright +34 -0
  178. pyogrio/proj_data/deformation_model.schema.json +582 -0
  179. pyogrio/proj_data/nad.lst +142 -0
  180. pyogrio/proj_data/nad27 +810 -0
  181. pyogrio/proj_data/nad83 +745 -0
  182. pyogrio/proj_data/other.extra +53 -0
  183. pyogrio/proj_data/proj-config-version.cmake +44 -0
  184. pyogrio/proj_data/proj-config.cmake +79 -0
  185. pyogrio/proj_data/proj-targets-release.cmake +19 -0
  186. pyogrio/proj_data/proj-targets.cmake +107 -0
  187. pyogrio/proj_data/proj.db +0 -0
  188. pyogrio/proj_data/proj.ini +51 -0
  189. pyogrio/proj_data/proj4-targets-release.cmake +19 -0
  190. pyogrio/proj_data/proj4-targets.cmake +107 -0
  191. pyogrio/proj_data/projjson.schema.json +1174 -0
  192. pyogrio/proj_data/triangulation.schema.json +214 -0
  193. pyogrio/proj_data/usage +4 -0
  194. pyogrio/proj_data/vcpkg.spdx.json +198 -0
  195. pyogrio/proj_data/vcpkg_abi_info.txt +27 -0
  196. pyogrio/proj_data/world +214 -0
  197. pyogrio/raw.py +887 -0
  198. pyogrio/tests/__init__.py +0 -0
  199. pyogrio/tests/conftest.py +398 -0
  200. pyogrio/tests/fixtures/README.md +108 -0
  201. pyogrio/tests/fixtures/curve.gpkg +0 -0
  202. pyogrio/tests/fixtures/curvepolygon.gpkg +0 -0
  203. pyogrio/tests/fixtures/line_zm.gpkg +0 -0
  204. pyogrio/tests/fixtures/multisurface.gpkg +0 -0
  205. pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.cpg +1 -0
  206. pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.dbf +0 -0
  207. pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.prj +1 -0
  208. pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.shp +0 -0
  209. pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.shx +0 -0
  210. pyogrio/tests/fixtures/sample.osm.pbf +0 -0
  211. pyogrio/tests/fixtures/test_gpkg_nulls.gpkg +0 -0
  212. pyogrio/tests/test_arrow.py +1195 -0
  213. pyogrio/tests/test_core.py +678 -0
  214. pyogrio/tests/test_geopandas_io.py +2314 -0
  215. pyogrio/tests/test_path.py +364 -0
  216. pyogrio/tests/test_raw_io.py +1515 -0
  217. pyogrio/tests/test_util.py +56 -0
  218. pyogrio/util.py +247 -0
  219. pyogrio-0.10.0.dist-info/LICENSE +21 -0
  220. pyogrio-0.10.0.dist-info/METADATA +129 -0
  221. pyogrio-0.10.0.dist-info/RECORD +223 -0
  222. pyogrio-0.10.0.dist-info/WHEEL +5 -0
  223. pyogrio-0.10.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1195 @@
1
+ import contextlib
2
+ import json
3
+ import math
4
+ import os
5
+ import sys
6
+ from io import BytesIO
7
+ from packaging.version import Version
8
+ from zipfile import ZipFile
9
+
10
+ import numpy as np
11
+
12
+ import pyogrio
13
+ from pyogrio import (
14
+ __gdal_version__,
15
+ get_gdal_config_option,
16
+ list_layers,
17
+ read_dataframe,
18
+ read_info,
19
+ set_gdal_config_options,
20
+ vsi_listtree,
21
+ )
22
+ from pyogrio.errors import DataLayerError, DataSourceError, FieldError
23
+ from pyogrio.raw import open_arrow, read_arrow, write, write_arrow
24
+ from pyogrio.tests.conftest import (
25
+ ALL_EXTS,
26
+ DRIVER_EXT,
27
+ DRIVERS,
28
+ requires_arrow_write_api,
29
+ requires_pyarrow_api,
30
+ )
31
+
32
+ import pytest
33
+
34
+ try:
35
+ import pandas as pd
36
+ import pyarrow
37
+
38
+ from geopandas.testing import assert_geodataframe_equal
39
+ from pandas.testing import assert_frame_equal, assert_index_equal
40
+ except ImportError:
41
+ pass
42
+
43
+ # skip all tests in this file if Arrow API or GeoPandas are unavailable
44
+ pytestmark = requires_pyarrow_api
45
+ pytest.importorskip("geopandas")
46
+ pa = pytest.importorskip("pyarrow")
47
+
48
+
49
+ def test_read_arrow(naturalearth_lowres_all_ext):
50
+ result = read_dataframe(naturalearth_lowres_all_ext, use_arrow=True)
51
+ expected = read_dataframe(naturalearth_lowres_all_ext, use_arrow=False)
52
+
53
+ if naturalearth_lowres_all_ext.suffix.startswith(".geojson"):
54
+ check_less_precise = True
55
+ else:
56
+ check_less_precise = False
57
+ assert_geodataframe_equal(result, expected, check_less_precise=check_less_precise)
58
+
59
+
60
+ def test_read_arrow_unspecified_layer_warning(data_dir):
61
+ """Reading a multi-layer file without specifying a layer gives a warning."""
62
+ with pytest.warns(UserWarning, match="More than one layer found "):
63
+ read_arrow(data_dir / "sample.osm.pbf")
64
+
65
+
66
+ @pytest.mark.parametrize("skip_features, expected", [(10, 167), (200, 0)])
67
+ def test_read_arrow_skip_features(naturalearth_lowres, skip_features, expected):
68
+ table = read_arrow(naturalearth_lowres, skip_features=skip_features)[1]
69
+ assert len(table) == expected
70
+
71
+
72
+ def test_read_arrow_negative_skip_features(naturalearth_lowres):
73
+ with pytest.raises(ValueError, match="'skip_features' must be >= 0"):
74
+ read_arrow(naturalearth_lowres, skip_features=-1)
75
+
76
+
77
+ @pytest.mark.parametrize(
78
+ "max_features, expected", [(0, 0), (10, 10), (200, 177), (100000, 177)]
79
+ )
80
+ def test_read_arrow_max_features(naturalearth_lowres, max_features, expected):
81
+ table = read_arrow(naturalearth_lowres, max_features=max_features)[1]
82
+ assert len(table) == expected
83
+
84
+
85
+ def test_read_arrow_negative_max_features(naturalearth_lowres):
86
+ with pytest.raises(ValueError, match="'max_features' must be >= 0"):
87
+ read_arrow(naturalearth_lowres, max_features=-1)
88
+
89
+
90
+ @pytest.mark.parametrize(
91
+ "skip_features, max_features, expected",
92
+ [
93
+ (0, 0, 0),
94
+ (10, 0, 0),
95
+ (200, 0, 0),
96
+ (1, 200, 176),
97
+ (176, 10, 1),
98
+ (100, 100, 77),
99
+ (100, 100000, 77),
100
+ ],
101
+ )
102
+ def test_read_arrow_skip_features_max_features(
103
+ naturalearth_lowres, skip_features, max_features, expected
104
+ ):
105
+ table = read_arrow(
106
+ naturalearth_lowres, skip_features=skip_features, max_features=max_features
107
+ )[1]
108
+ assert len(table) == expected
109
+
110
+
111
+ def test_read_arrow_fid(naturalearth_lowres_all_ext):
112
+ kwargs = {"use_arrow": True, "where": "fid >= 2 AND fid <= 3"}
113
+
114
+ df = read_dataframe(naturalearth_lowres_all_ext, fid_as_index=False, **kwargs)
115
+ assert_index_equal(df.index, pd.RangeIndex(0, 2))
116
+
117
+ df = read_dataframe(naturalearth_lowres_all_ext, fid_as_index=True, **kwargs)
118
+ assert_index_equal(df.index, pd.Index([2, 3], name="fid"))
119
+
120
+
121
+ def test_read_arrow_columns(naturalearth_lowres):
122
+ result = read_dataframe(naturalearth_lowres, use_arrow=True, columns=["continent"])
123
+ assert result.columns.tolist() == ["continent", "geometry"]
124
+
125
+
126
+ def test_read_arrow_ignore_geometry(naturalearth_lowres):
127
+ result = read_dataframe(naturalearth_lowres, use_arrow=True, read_geometry=False)
128
+ assert type(result) is pd.DataFrame
129
+
130
+ expected = read_dataframe(naturalearth_lowres, use_arrow=True).drop(
131
+ columns=["geometry"]
132
+ )
133
+ assert_frame_equal(result, expected)
134
+
135
+
136
+ def test_read_arrow_nested_types(list_field_values_file):
137
+ # with arrow, list types are supported
138
+ result = read_dataframe(list_field_values_file, use_arrow=True)
139
+ assert "list_int64" in result.columns
140
+ assert result["list_int64"][0].tolist() == [0, 1]
141
+
142
+
143
+ def test_read_arrow_to_pandas_kwargs(no_geometry_file):
144
+ # with arrow, list types are supported
145
+ arrow_to_pandas_kwargs = {"strings_to_categorical": True}
146
+ df = read_dataframe(
147
+ no_geometry_file,
148
+ read_geometry=False,
149
+ use_arrow=True,
150
+ arrow_to_pandas_kwargs=arrow_to_pandas_kwargs,
151
+ )
152
+ assert df.col.dtype.name == "category"
153
+ assert np.array_equal(df.col.values.categories, ["a", "b", "c"])
154
+
155
+
156
+ def test_read_arrow_raw(naturalearth_lowres):
157
+ meta, table = read_arrow(naturalearth_lowres)
158
+ assert isinstance(meta, dict)
159
+ assert isinstance(table, pyarrow.Table)
160
+
161
+
162
+ def test_read_arrow_vsi(naturalearth_lowres_vsi):
163
+ table = read_arrow(naturalearth_lowres_vsi[1])[1]
164
+ assert len(table) == 177
165
+
166
+ # Check temp file was cleaned up. Filter to files created by pyogrio, as GDAL keeps
167
+ # cache files in /vsimem/.
168
+ assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == []
169
+
170
+
171
+ def test_read_arrow_bytes(geojson_bytes):
172
+ meta, table = read_arrow(geojson_bytes)
173
+
174
+ assert meta["fields"].shape == (5,)
175
+ assert len(table) == 3
176
+
177
+ # Check temp file was cleaned up. Filter, as gdal keeps cache files in /vsimem/.
178
+ assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == []
179
+
180
+
181
+ def test_read_arrow_nonseekable_bytes(nonseekable_bytes):
182
+ meta, table = read_arrow(nonseekable_bytes)
183
+ assert meta["fields"].shape == (0,)
184
+ assert len(table) == 1
185
+
186
+ # Check temp file was cleaned up. Filter, as gdal keeps cache files in /vsimem/.
187
+ assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == []
188
+
189
+
190
+ def test_read_arrow_filelike(geojson_filelike):
191
+ meta, table = read_arrow(geojson_filelike)
192
+
193
+ assert meta["fields"].shape == (5,)
194
+ assert len(table) == 3
195
+
196
+ # Check temp file was cleaned up. Filter, as gdal keeps cache files in /vsimem/.
197
+ assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == []
198
+
199
+
200
+ def test_open_arrow_pyarrow(naturalearth_lowres):
201
+ with open_arrow(naturalearth_lowres, use_pyarrow=True) as (meta, reader):
202
+ assert isinstance(meta, dict)
203
+ assert isinstance(reader, pyarrow.RecordBatchReader)
204
+ assert isinstance(reader.read_all(), pyarrow.Table)
205
+
206
+
207
+ def test_open_arrow_batch_size(naturalearth_lowres):
208
+ _, table = read_arrow(naturalearth_lowres)
209
+ batch_size = math.ceil(len(table) / 2)
210
+
211
+ with open_arrow(naturalearth_lowres, batch_size=batch_size, use_pyarrow=True) as (
212
+ meta,
213
+ reader,
214
+ ):
215
+ assert isinstance(meta, dict)
216
+ assert isinstance(reader, pyarrow.RecordBatchReader)
217
+ count = 0
218
+ tables = []
219
+ for table in reader:
220
+ tables.append(table)
221
+ count += 1
222
+
223
+ assert count == 2, "Should be two batches given the batch_size parameter"
224
+ assert len(tables[0]) == batch_size, "First table should match the batch size"
225
+
226
+
227
+ @pytest.mark.skipif(
228
+ __gdal_version__ >= (3, 8, 0),
229
+ reason="skip_features supported by Arrow stream API for GDAL>=3.8.0",
230
+ )
231
+ @pytest.mark.parametrize("skip_features", [10, 200])
232
+ def test_open_arrow_skip_features_unsupported(naturalearth_lowres, skip_features):
233
+ """skip_features are not supported for the Arrow stream interface for
234
+ GDAL < 3.8.0"""
235
+ with pytest.raises(
236
+ ValueError,
237
+ match="specifying 'skip_features' is not supported for Arrow for GDAL<3.8.0",
238
+ ):
239
+ with open_arrow(naturalearth_lowres, skip_features=skip_features) as (
240
+ meta,
241
+ reader,
242
+ ):
243
+ pass
244
+
245
+
246
+ @pytest.mark.parametrize("max_features", [10, 200])
247
+ def test_open_arrow_max_features_unsupported(naturalearth_lowres, max_features):
248
+ """max_features are not supported for the Arrow stream interface"""
249
+ with pytest.raises(
250
+ ValueError,
251
+ match="specifying 'max_features' is not supported for Arrow",
252
+ ):
253
+ with open_arrow(naturalearth_lowres, max_features=max_features) as (
254
+ meta,
255
+ reader,
256
+ ):
257
+ pass
258
+
259
+
260
+ @pytest.mark.skipif(
261
+ __gdal_version__ < (3, 8, 0),
262
+ reason="returns geoarrow metadata only for GDAL>=3.8.0",
263
+ )
264
+ def test_read_arrow_geoarrow_metadata(naturalearth_lowres):
265
+ _meta, table = read_arrow(naturalearth_lowres)
266
+ field = table.schema.field("wkb_geometry")
267
+ assert field.metadata[b"ARROW:extension:name"] == b"geoarrow.wkb"
268
+ parsed_meta = json.loads(field.metadata[b"ARROW:extension:metadata"])
269
+ assert parsed_meta["crs"]["id"]["authority"] == "EPSG"
270
+ assert parsed_meta["crs"]["id"]["code"] == 4326
271
+
272
+
273
+ def test_open_arrow_capsule_protocol(naturalearth_lowres):
274
+ pytest.importorskip("pyarrow", minversion="14")
275
+
276
+ with open_arrow(naturalearth_lowres) as (meta, reader):
277
+ assert isinstance(meta, dict)
278
+ assert isinstance(reader, pyogrio._io._ArrowStream)
279
+
280
+ result = pyarrow.table(reader)
281
+
282
+ _, expected = read_arrow(naturalearth_lowres)
283
+ assert result.equals(expected)
284
+
285
+
286
+ def test_open_arrow_capsule_protocol_without_pyarrow(naturalearth_lowres):
287
+ pyarrow = pytest.importorskip("pyarrow", minversion="14")
288
+
289
+ # Make PyArrow temporarily unavailable (importing will fail)
290
+ sys.modules["pyarrow"] = None
291
+ try:
292
+ with open_arrow(naturalearth_lowres) as (meta, reader):
293
+ assert isinstance(meta, dict)
294
+ assert isinstance(reader, pyogrio._io._ArrowStream)
295
+ result = pyarrow.table(reader)
296
+ finally:
297
+ sys.modules["pyarrow"] = pyarrow
298
+
299
+ _, expected = read_arrow(naturalearth_lowres)
300
+ assert result.equals(expected)
301
+
302
+
303
+ @contextlib.contextmanager
304
+ def use_arrow_context():
305
+ original = os.environ.get("PYOGRIO_USE_ARROW", None)
306
+ os.environ["PYOGRIO_USE_ARROW"] = "1"
307
+ yield
308
+ if original:
309
+ os.environ["PYOGRIO_USE_ARROW"] = original
310
+ else:
311
+ del os.environ["PYOGRIO_USE_ARROW"]
312
+
313
+
314
+ def test_enable_with_environment_variable(list_field_values_file):
315
+ # list types are only supported with arrow, so don't work by default and work
316
+ # when arrow is enabled through env variable
317
+ result = read_dataframe(list_field_values_file)
318
+ assert "list_int64" not in result.columns
319
+
320
+ with use_arrow_context():
321
+ result = read_dataframe(list_field_values_file)
322
+
323
+ assert "list_int64" in result.columns
324
+
325
+
326
+ @pytest.mark.skipif(
327
+ __gdal_version__ < (3, 8, 3), reason="Arrow bool value bug fixed in GDAL >= 3.8.3"
328
+ )
329
+ @pytest.mark.parametrize("ext", ALL_EXTS)
330
+ def test_arrow_bool_roundtrip(tmp_path, ext):
331
+ filename = tmp_path / f"test{ext}"
332
+
333
+ # Point(0, 0)
334
+ geometry = np.array(
335
+ [bytes.fromhex("010100000000000000000000000000000000000000")] * 5, dtype=object
336
+ )
337
+ bool_col = np.array([True, False, True, False, True])
338
+ field_data = [bool_col]
339
+ fields = ["bool_col"]
340
+
341
+ kwargs = {}
342
+
343
+ if ext == ".fgb":
344
+ # For .fgb, spatial_index=False to avoid the rows being reordered
345
+ kwargs["spatial_index"] = False
346
+
347
+ write(
348
+ filename,
349
+ geometry,
350
+ field_data,
351
+ fields,
352
+ geometry_type="Point",
353
+ crs="EPSG:4326",
354
+ **kwargs,
355
+ )
356
+
357
+ write(
358
+ filename, geometry, field_data, fields, geometry_type="Point", crs="EPSG:4326"
359
+ )
360
+ table = read_arrow(filename)[1]
361
+
362
+ assert np.array_equal(table["bool_col"].to_numpy(), bool_col)
363
+
364
+
365
+ @pytest.mark.skipif(
366
+ __gdal_version__ >= (3, 8, 3), reason="Arrow bool value bug fixed in GDAL >= 3.8.3"
367
+ )
368
+ @pytest.mark.parametrize("ext", ALL_EXTS)
369
+ def test_arrow_bool_exception(tmp_path, ext):
370
+ filename = tmp_path / f"test{ext}"
371
+
372
+ # Point(0, 0)
373
+ geometry = np.array(
374
+ [bytes.fromhex("010100000000000000000000000000000000000000")] * 5, dtype=object
375
+ )
376
+ bool_col = np.array([True, False, True, False, True])
377
+ field_data = [bool_col]
378
+ fields = ["bool_col"]
379
+
380
+ write(
381
+ filename, geometry, field_data, fields, geometry_type="Point", crs="EPSG:4326"
382
+ )
383
+
384
+ if ext in {".fgb", ".gpkg"}:
385
+ # only raise exception for GPKG / FGB
386
+ with pytest.raises(
387
+ RuntimeError,
388
+ match="GDAL < 3.8.3 does not correctly read boolean data values using "
389
+ "the Arrow API",
390
+ ):
391
+ with open_arrow(filename):
392
+ pass
393
+
394
+ # do not raise exception if no bool columns are read
395
+ with open_arrow(filename, columns=[]):
396
+ pass
397
+
398
+ else:
399
+ with open_arrow(filename):
400
+ pass
401
+
402
+
403
+ # Point(0, 0)
404
+ points = np.array(
405
+ [bytes.fromhex("010100000000000000000000000000000000000000")] * 3,
406
+ dtype=object,
407
+ )
408
+
409
+
410
+ @requires_arrow_write_api
411
+ def test_write_shp(tmp_path, naturalearth_lowres):
412
+ meta, table = read_arrow(naturalearth_lowres)
413
+
414
+ filename = tmp_path / "test.shp"
415
+ write_arrow(
416
+ table,
417
+ filename,
418
+ crs=meta["crs"],
419
+ encoding=meta["encoding"],
420
+ geometry_type=meta["geometry_type"],
421
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
422
+ )
423
+
424
+ assert filename.exists()
425
+ for ext in (".dbf", ".prj"):
426
+ assert filename.with_suffix(ext).exists()
427
+
428
+
429
+ @pytest.mark.filterwarnings("ignore:A geometry of type POLYGON is inserted")
430
+ @requires_arrow_write_api
431
+ def test_write_gpkg(tmp_path, naturalearth_lowres):
432
+ meta, table = read_arrow(naturalearth_lowres)
433
+
434
+ filename = tmp_path / "test.gpkg"
435
+ write_arrow(
436
+ table,
437
+ filename,
438
+ driver="GPKG",
439
+ crs=meta["crs"],
440
+ geometry_type="MultiPolygon",
441
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
442
+ )
443
+
444
+ assert filename.exists()
445
+
446
+
447
+ @pytest.mark.filterwarnings("ignore:A geometry of type POLYGON is inserted")
448
+ @requires_arrow_write_api
449
+ def test_write_gpkg_multiple_layers(tmp_path, naturalearth_lowres):
450
+ meta, table = read_arrow(naturalearth_lowres)
451
+ meta["geometry_type"] = "MultiPolygon"
452
+
453
+ filename = tmp_path / "test.gpkg"
454
+ write_arrow(
455
+ table,
456
+ filename,
457
+ driver="GPKG",
458
+ layer="first",
459
+ crs=meta["crs"],
460
+ geometry_type="MultiPolygon",
461
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
462
+ )
463
+
464
+ assert filename.exists()
465
+
466
+ assert np.array_equal(list_layers(filename), [["first", "MultiPolygon"]])
467
+
468
+ write_arrow(
469
+ table,
470
+ filename,
471
+ driver="GPKG",
472
+ layer="second",
473
+ crs=meta["crs"],
474
+ geometry_type="MultiPolygon",
475
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
476
+ )
477
+
478
+ assert np.array_equal(
479
+ list_layers(filename), [["first", "MultiPolygon"], ["second", "MultiPolygon"]]
480
+ )
481
+
482
+
483
+ @requires_arrow_write_api
484
+ def test_write_geojson(tmp_path, naturalearth_lowres):
485
+ meta, table = read_arrow(naturalearth_lowres)
486
+ filename = tmp_path / "test.json"
487
+ write_arrow(
488
+ table,
489
+ filename,
490
+ driver="GeoJSON",
491
+ crs=meta["crs"],
492
+ geometry_type=meta["geometry_type"],
493
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
494
+ )
495
+
496
+ assert filename.exists()
497
+
498
+ data = json.loads(open(filename).read())
499
+
500
+ assert data["type"] == "FeatureCollection"
501
+ assert data["name"] == "test"
502
+ assert "crs" in data
503
+ assert len(data["features"]) == len(table)
504
+ assert not len(
505
+ set(meta["fields"]).difference(data["features"][0]["properties"].keys())
506
+ )
507
+
508
+
509
+ @requires_arrow_write_api
510
+ @pytest.mark.skipif(
511
+ __gdal_version__ < (3, 6, 0),
512
+ reason="OpenFileGDB write support only available for GDAL >= 3.6.0",
513
+ )
514
+ @pytest.mark.parametrize(
515
+ "write_int64",
516
+ [
517
+ False,
518
+ pytest.param(
519
+ True,
520
+ marks=pytest.mark.skipif(
521
+ __gdal_version__ < (3, 9, 0),
522
+ reason="OpenFileGDB write support for int64 values for GDAL >= 3.9.0",
523
+ ),
524
+ ),
525
+ ],
526
+ )
527
+ def test_write_openfilegdb(tmp_path, write_int64):
528
+ expected_field_data = [
529
+ np.array([True, False, True], dtype="bool"),
530
+ np.array([1, 2, 3], dtype="int16"),
531
+ np.array([1, 2, 3], dtype="int32"),
532
+ np.array([1, 2, 3], dtype="int64"),
533
+ np.array([1, 2, 3], dtype="float32"),
534
+ np.array([1, 2, 3], dtype="float64"),
535
+ ]
536
+
537
+ table = pa.table(
538
+ {
539
+ "geometry": points,
540
+ **{field.dtype.name: field for field in expected_field_data},
541
+ }
542
+ )
543
+
544
+ filename = tmp_path / "test.gdb"
545
+
546
+ expected_meta = {"crs": "EPSG:4326"}
547
+
548
+ # int64 is not supported without additional config: https://gdal.org/en/latest/drivers/vector/openfilegdb.html#bit-integer-field-support
549
+ # it is converted to float64 by default and raises a warning
550
+ # (for GDAL >= 3.9.0 only)
551
+ write_params = (
552
+ {"TARGET_ARCGIS_VERSION": "ARCGIS_PRO_3_2_OR_LATER"} if write_int64 else {}
553
+ )
554
+
555
+ if write_int64 or __gdal_version__ < (3, 9, 0):
556
+ ctx = contextlib.nullcontext()
557
+ else:
558
+ ctx = pytest.warns(
559
+ RuntimeWarning, match="Integer64 will be written as a Float64"
560
+ )
561
+
562
+ with ctx:
563
+ write_arrow(
564
+ table,
565
+ filename,
566
+ driver="OpenFileGDB",
567
+ geometry_type="Point",
568
+ geometry_name="geometry",
569
+ **expected_meta,
570
+ **write_params,
571
+ )
572
+
573
+ meta, table = read_arrow(filename)
574
+
575
+ if not write_int64:
576
+ expected_field_data[3] = expected_field_data[3].astype("float64")
577
+
578
+ # bool types are converted to int32
579
+ expected_field_data[0] = expected_field_data[0].astype("int32")
580
+
581
+ assert meta["crs"] == expected_meta["crs"]
582
+
583
+ # NOTE: geometry name is set to "SHAPE" by GDAL
584
+ assert np.array_equal(table[meta["geometry_name"]], points)
585
+ for i in range(len(expected_field_data)):
586
+ values = table[table.schema.names[i]].to_numpy()
587
+ assert values.dtype == expected_field_data[i].dtype
588
+ assert np.array_equal(values, expected_field_data[i])
589
+
590
+
591
+ @pytest.mark.parametrize(
592
+ "driver",
593
+ {
594
+ driver
595
+ for driver in DRIVERS.values()
596
+ if driver not in ("ESRI Shapefile", "GPKG", "GeoJSON")
597
+ },
598
+ )
599
+ @requires_arrow_write_api
600
+ def test_write_supported(tmp_path, naturalearth_lowres, driver):
601
+ """Test drivers known to work that are not specifically tested above"""
602
+ meta, table = read_arrow(naturalearth_lowres, columns=["iso_a3"], max_features=1)
603
+
604
+ # note: naturalearth_lowres contains mixed polygons / multipolygons, which
605
+ # are not supported in mixed form for all drivers. To get around this here
606
+ # we take the first record only.
607
+ meta["geometry_type"] = "MultiPolygon"
608
+
609
+ filename = tmp_path / f"test{DRIVER_EXT[driver]}"
610
+ write_arrow(
611
+ table,
612
+ filename,
613
+ driver=driver,
614
+ crs=meta["crs"],
615
+ geometry_type=meta["geometry_type"],
616
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
617
+ )
618
+ assert filename.exists()
619
+
620
+
621
+ @requires_arrow_write_api
622
+ def test_write_unsupported(tmp_path, naturalearth_lowres):
623
+ meta, table = read_arrow(naturalearth_lowres)
624
+
625
+ with pytest.raises(DataSourceError, match="does not support write functionality"):
626
+ write_arrow(
627
+ table,
628
+ tmp_path / "test.json",
629
+ driver="ESRIJSON",
630
+ crs=meta["crs"],
631
+ geometry_type=meta["geometry_type"],
632
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
633
+ )
634
+
635
+
636
+ @pytest.mark.parametrize("ext", DRIVERS)
637
+ @requires_arrow_write_api
638
+ def test_write_append(request, tmp_path, naturalearth_lowres, ext):
639
+ if ext.startswith(".geojson"):
640
+ # Bug in GDAL when appending int64 to GeoJSON
641
+ # (https://github.com/OSGeo/gdal/issues/9792)
642
+ request.node.add_marker(
643
+ pytest.mark.xfail(reason="Bugs with append when writing Arrow to GeoJSON")
644
+ )
645
+
646
+ meta, table = read_arrow(naturalearth_lowres)
647
+
648
+ # coerce output layer to generic Geometry to avoid mixed type errors
649
+ meta["geometry_type"] = "Unknown"
650
+
651
+ filename = tmp_path / f"test{ext}"
652
+ write_arrow(
653
+ table,
654
+ filename,
655
+ crs=meta["crs"],
656
+ geometry_type=meta["geometry_type"],
657
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
658
+ )
659
+ assert filename.exists()
660
+ assert read_info(filename)["features"] == 177
661
+
662
+ # write the same records again
663
+ write_arrow(
664
+ table,
665
+ filename,
666
+ append=True,
667
+ crs=meta["crs"],
668
+ geometry_type=meta["geometry_type"],
669
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
670
+ )
671
+ assert read_info(filename)["features"] == 354
672
+
673
+
674
+ @pytest.mark.parametrize("driver,ext", [("GML", ".gml"), ("GeoJSONSeq", ".geojsons")])
675
+ @requires_arrow_write_api
676
+ def test_write_append_unsupported(tmp_path, naturalearth_lowres, driver, ext):
677
+ meta, table = read_arrow(naturalearth_lowres)
678
+
679
+ # GML does not support append functionality
680
+ filename = tmp_path / "test.gml"
681
+ write_arrow(
682
+ table,
683
+ filename,
684
+ driver="GML",
685
+ crs=meta["crs"],
686
+ geometry_type=meta["geometry_type"],
687
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
688
+ )
689
+ assert filename.exists()
690
+ assert read_info(filename, force_feature_count=True)["features"] == 177
691
+
692
+ with pytest.raises(DataSourceError):
693
+ write_arrow(
694
+ table,
695
+ filename,
696
+ driver="GML",
697
+ append=True,
698
+ crs=meta["crs"],
699
+ geometry_type=meta["geometry_type"],
700
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
701
+ )
702
+
703
+
704
+ @requires_arrow_write_api
705
+ def test_write_gdalclose_error(naturalearth_lowres):
706
+ meta, table = read_arrow(naturalearth_lowres)
707
+
708
+ filename = "s3://non-existing-bucket/test.geojson"
709
+
710
+ # set config options to avoid errors on open due to GDAL S3 configuration
711
+ set_gdal_config_options(
712
+ {
713
+ "AWS_ACCESS_KEY_ID": "invalid",
714
+ "AWS_SECRET_ACCESS_KEY": "invalid",
715
+ "AWS_NO_SIGN_REQUEST": True,
716
+ }
717
+ )
718
+
719
+ with pytest.raises(DataSourceError, match="Failed to write features to dataset"):
720
+ write_arrow(
721
+ table,
722
+ filename,
723
+ crs=meta["crs"],
724
+ geometry_type=meta["geometry_type"],
725
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
726
+ )
727
+
728
+
729
+ @requires_arrow_write_api
730
+ @pytest.mark.parametrize("name", ["geoarrow.wkb", "ogc.wkb"])
731
+ def test_write_geometry_extension_type(tmp_path, naturalearth_lowres, name):
732
+ # Infer geometry column based on extension name
733
+ # instead of passing `geometry_name` explicitly
734
+ meta, table = read_arrow(naturalearth_lowres)
735
+
736
+ # change extension type name
737
+ idx = table.schema.get_field_index("wkb_geometry")
738
+ new_field = table.schema.field(idx).with_metadata({"ARROW:extension:name": name})
739
+ new_table = table.cast(table.schema.set(idx, new_field))
740
+
741
+ filename = tmp_path / "test_geoarrow.shp"
742
+ write_arrow(
743
+ new_table,
744
+ filename,
745
+ crs=meta["crs"],
746
+ geometry_type=meta["geometry_type"],
747
+ )
748
+ _, table_roundtripped = read_arrow(filename)
749
+ assert table_roundtripped.equals(table)
750
+
751
+
752
+ @requires_arrow_write_api
753
+ def test_write_unsupported_geoarrow(tmp_path, naturalearth_lowres):
754
+ meta, table = read_arrow(naturalearth_lowres)
755
+
756
+ # change extension type name (the name doesn't match with the column type
757
+ # for correct geoarrow data, but our writing code checks it based on the name)
758
+ idx = table.schema.get_field_index("wkb_geometry")
759
+ new_field = table.schema.field(idx).with_metadata(
760
+ {"ARROW:extension:name": "geoarrow.point"}
761
+ )
762
+ new_table = table.cast(table.schema.set(idx, new_field))
763
+
764
+ with pytest.raises(
765
+ NotImplementedError,
766
+ match="Writing a geometry column of type geoarrow.point is not yet supported",
767
+ ):
768
+ write_arrow(
769
+ new_table,
770
+ tmp_path / "test_geoarrow.shp",
771
+ crs=meta["crs"],
772
+ geometry_type=meta["geometry_type"],
773
+ )
774
+
775
+
776
+ @requires_arrow_write_api
777
+ def test_write_no_geom(tmp_path, naturalearth_lowres):
778
+ _, table = read_arrow(naturalearth_lowres)
779
+ table = table.drop_columns("wkb_geometry")
780
+
781
+ # Test
782
+ filename = tmp_path / "test.gpkg"
783
+ write_arrow(table, filename)
784
+ # Check result
785
+ assert filename.exists()
786
+ meta, result = read_arrow(filename)
787
+ assert meta["crs"] is None
788
+ assert meta["geometry_type"] is None
789
+ assert table.equals(result)
790
+
791
+
792
+ @requires_arrow_write_api
793
+ def test_write_geometry_type(tmp_path, naturalearth_lowres):
794
+ meta, table = read_arrow(naturalearth_lowres)
795
+
796
+ # Not specifying the geometry currently raises an error
797
+ with pytest.raises(ValueError, match="'geometry_type' keyword is required"):
798
+ write_arrow(
799
+ table,
800
+ tmp_path / "test.shp",
801
+ crs=meta["crs"],
802
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
803
+ )
804
+
805
+ # Specifying "Unknown" works and will create generic layer
806
+ filename = tmp_path / "test.gpkg"
807
+ write_arrow(
808
+ table,
809
+ filename,
810
+ crs=meta["crs"],
811
+ geometry_type="Unknown",
812
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
813
+ )
814
+ assert filename.exists()
815
+ meta_written, _ = read_arrow(filename)
816
+ assert meta_written["geometry_type"] == "Unknown"
817
+
818
+
819
+ @requires_arrow_write_api
820
+ def test_write_raise_promote_to_multi(tmp_path, naturalearth_lowres):
821
+ meta, table = read_arrow(naturalearth_lowres)
822
+
823
+ with pytest.raises(
824
+ ValueError, match="The 'promote_to_multi' option is not supported"
825
+ ):
826
+ write_arrow(
827
+ table,
828
+ tmp_path / "test.shp",
829
+ crs=meta["crs"],
830
+ geometry_type=meta["geometry_type"],
831
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
832
+ promote_to_multi=True,
833
+ )
834
+
835
+
836
+ @requires_arrow_write_api
837
+ def test_write_no_crs(tmp_path, naturalearth_lowres):
838
+ meta, table = read_arrow(naturalearth_lowres)
839
+
840
+ filename = tmp_path / "test.shp"
841
+ with pytest.warns(UserWarning, match="'crs' was not provided"):
842
+ write_arrow(
843
+ table,
844
+ filename,
845
+ geometry_type=meta["geometry_type"],
846
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
847
+ )
848
+ # apart from CRS warning, it did write correctly
849
+ meta_result, result = read_arrow(filename)
850
+ assert table.equals(result)
851
+ assert meta_result["crs"] is None
852
+
853
+
854
+ @requires_arrow_write_api
855
+ def test_write_non_arrow_data(tmp_path):
856
+ data = np.array([1, 2, 3])
857
+ with pytest.raises(
858
+ ValueError, match="The provided data is not recognized as Arrow data"
859
+ ):
860
+ write_arrow(
861
+ data,
862
+ tmp_path / "test_no_arrow_data.shp",
863
+ crs="EPSG:4326",
864
+ geometry_type="Point",
865
+ geometry_name="geometry",
866
+ )
867
+
868
+
869
+ @pytest.mark.skipif(
870
+ Version(pa.__version__) < Version("16.0.0.dev0"),
871
+ reason="PyCapsule protocol only added to pyarrow.ChunkedArray in pyarrow 16",
872
+ )
873
+ @requires_arrow_write_api
874
+ def test_write_non_arrow_tabular_data(tmp_path):
875
+ data = pa.chunked_array([[1, 2, 3], [4, 5, 6]])
876
+ with pytest.raises(
877
+ DataLayerError,
878
+ match=".*should be called on a schema that is a struct of fields",
879
+ ):
880
+ write_arrow(
881
+ data,
882
+ tmp_path / "test_no_arrow_tabular_data.shp",
883
+ crs="EPSG:4326",
884
+ geometry_type="Point",
885
+ geometry_name="geometry",
886
+ )
887
+
888
+
889
+ @pytest.mark.filterwarnings("ignore:.*not handled natively:RuntimeWarning")
890
+ @requires_arrow_write_api
891
+ def test_write_batch_error_message(tmp_path):
892
+ # raise the correct error and message from GDAL when an error happens
893
+ # while writing
894
+
895
+ # invalid dictionary array that will only error while writing (schema
896
+ # itself is OK)
897
+ arr = pa.DictionaryArray.from_buffers(
898
+ pa.dictionary(pa.int64(), pa.string()),
899
+ length=3,
900
+ buffers=pa.array([0, 1, 2]).buffers(),
901
+ dictionary=pa.array(["a", "b"]),
902
+ )
903
+ table = pa.table({"geometry": points, "col": arr})
904
+
905
+ with pytest.raises(DataLayerError, match=".*invalid dictionary index"):
906
+ write_arrow(
907
+ table,
908
+ tmp_path / "test_unsupported_list_type.fgb",
909
+ crs="EPSG:4326",
910
+ geometry_type="Point",
911
+ geometry_name="geometry",
912
+ )
913
+
914
+
915
+ @requires_arrow_write_api
916
+ def test_write_schema_error_message(tmp_path):
917
+ # raise the correct error and message from GDAL when an error happens
918
+ # creating the fields from the schema
919
+ # (using complex list of map of integer->integer which is not supported by GDAL)
920
+ table = pa.table(
921
+ {
922
+ "geometry": points,
923
+ "col": pa.array(
924
+ [[[(1, 2), (3, 4)], None, [(5, 6)]]] * 3,
925
+ pa.list_(pa.map_(pa.int64(), pa.int64())),
926
+ ),
927
+ }
928
+ )
929
+
930
+ with pytest.raises(FieldError, match=".*not supported"):
931
+ write_arrow(
932
+ table,
933
+ tmp_path / "test_unsupported_map_type.shp",
934
+ crs="EPSG:4326",
935
+ geometry_type="Point",
936
+ geometry_name="geometry",
937
+ )
938
+
939
+
940
+ @requires_arrow_write_api
941
+ @pytest.mark.filterwarnings("ignore:File /vsimem:RuntimeWarning")
942
+ @pytest.mark.parametrize("driver", ["GeoJSON", "GPKG"])
943
+ def test_write_memory(naturalearth_lowres, driver):
944
+ meta, table = read_arrow(naturalearth_lowres, max_features=1)
945
+ meta["geometry_type"] = "MultiPolygon"
946
+
947
+ buffer = BytesIO()
948
+ write_arrow(
949
+ table,
950
+ buffer,
951
+ driver=driver,
952
+ layer="test",
953
+ crs=meta["crs"],
954
+ geometry_type=meta["geometry_type"],
955
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
956
+ )
957
+
958
+ assert len(buffer.getbuffer()) > 0
959
+ assert list_layers(buffer)[0][0] == "test"
960
+
961
+ actual_meta, actual_table = read_arrow(buffer)
962
+ assert len(actual_table) == len(table)
963
+ assert np.array_equal(actual_meta["fields"], meta["fields"])
964
+
965
+
966
+ @requires_arrow_write_api
967
+ def test_write_memory_driver_required(naturalearth_lowres):
968
+ meta, table = read_arrow(naturalearth_lowres, max_features=1)
969
+
970
+ buffer = BytesIO()
971
+ with pytest.raises(
972
+ ValueError,
973
+ match="driver must be provided to write to in-memory file",
974
+ ):
975
+ write_arrow(
976
+ table,
977
+ buffer,
978
+ driver=None,
979
+ layer="test",
980
+ crs=meta["crs"],
981
+ geometry_type=meta["geometry_type"],
982
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
983
+ )
984
+
985
+ # Check temp file was cleaned up. Filter, as gdal keeps cache files in /vsimem/.
986
+ assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == []
987
+
988
+
989
+ @requires_arrow_write_api
990
+ @pytest.mark.parametrize("driver", ["ESRI Shapefile", "OpenFileGDB"])
991
+ def test_write_memory_unsupported_driver(naturalearth_lowres, driver):
992
+ if driver == "OpenFileGDB" and __gdal_version__ < (3, 6, 0):
993
+ pytest.skip("OpenFileGDB write support only available for GDAL >= 3.6.0")
994
+
995
+ meta, table = read_arrow(naturalearth_lowres, max_features=1)
996
+
997
+ buffer = BytesIO()
998
+
999
+ with pytest.raises(
1000
+ ValueError, match=f"writing to in-memory file is not supported for {driver}"
1001
+ ):
1002
+ write_arrow(
1003
+ table,
1004
+ buffer,
1005
+ driver=driver,
1006
+ layer="test",
1007
+ crs=meta["crs"],
1008
+ geometry_type=meta["geometry_type"],
1009
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
1010
+ )
1011
+
1012
+
1013
+ @requires_arrow_write_api
1014
+ @pytest.mark.parametrize("driver", ["GeoJSON", "GPKG"])
1015
+ def test_write_memory_append_unsupported(naturalearth_lowres, driver):
1016
+ meta, table = read_arrow(naturalearth_lowres, max_features=1)
1017
+ meta["geometry_type"] = "MultiPolygon"
1018
+
1019
+ buffer = BytesIO()
1020
+ with pytest.raises(
1021
+ NotImplementedError, match="append is not supported for in-memory files"
1022
+ ):
1023
+ write_arrow(
1024
+ table,
1025
+ buffer,
1026
+ driver=driver,
1027
+ layer="test",
1028
+ crs=meta["crs"],
1029
+ geometry_type=meta["geometry_type"],
1030
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
1031
+ append=True,
1032
+ )
1033
+
1034
+
1035
+ @requires_arrow_write_api
1036
+ def test_write_memory_existing_unsupported(naturalearth_lowres):
1037
+ meta, table = read_arrow(naturalearth_lowres, max_features=1)
1038
+ meta["geometry_type"] = "MultiPolygon"
1039
+
1040
+ buffer = BytesIO(b"0000")
1041
+ with pytest.raises(
1042
+ NotImplementedError,
1043
+ match="writing to existing in-memory object is not supported",
1044
+ ):
1045
+ write_arrow(
1046
+ table,
1047
+ buffer,
1048
+ driver="GeoJSON",
1049
+ layer="test",
1050
+ crs=meta["crs"],
1051
+ geometry_type=meta["geometry_type"],
1052
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
1053
+ )
1054
+
1055
+
1056
+ @requires_arrow_write_api
1057
+ def test_write_open_file_handle(tmp_path, naturalearth_lowres):
1058
+ """Verify that writing to an open file handle is not currently supported"""
1059
+
1060
+ meta, table = read_arrow(naturalearth_lowres, max_features=1)
1061
+ meta["geometry_type"] = "MultiPolygon"
1062
+
1063
+ # verify it fails for regular file handle
1064
+ with pytest.raises(
1065
+ NotImplementedError, match="writing to an open file handle is not yet supported"
1066
+ ):
1067
+ with open(tmp_path / "test.geojson", "wb") as f:
1068
+ write_arrow(
1069
+ table,
1070
+ f,
1071
+ driver="GeoJSON",
1072
+ layer="test",
1073
+ crs=meta["crs"],
1074
+ geometry_type=meta["geometry_type"],
1075
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
1076
+ )
1077
+
1078
+ # verify it fails for ZipFile
1079
+ with pytest.raises(
1080
+ NotImplementedError, match="writing to an open file handle is not yet supported"
1081
+ ):
1082
+ with ZipFile(tmp_path / "test.geojson.zip", "w") as z:
1083
+ with z.open("test.geojson", "w") as f:
1084
+ write_arrow(
1085
+ table,
1086
+ f,
1087
+ driver="GeoJSON",
1088
+ layer="test",
1089
+ crs=meta["crs"],
1090
+ geometry_type=meta["geometry_type"],
1091
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
1092
+ )
1093
+
1094
+ # Check temp file was cleaned up. Filter, as gdal keeps cache files in /vsimem/.
1095
+ assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == []
1096
+
1097
+
1098
+ @requires_arrow_write_api
1099
+ def test_non_utf8_encoding_io_shapefile(tmp_path, encoded_text):
1100
+ encoding, text = encoded_text
1101
+
1102
+ table = pa.table(
1103
+ {
1104
+ # Point(0, 0)
1105
+ "geometry": pa.array(
1106
+ [bytes.fromhex("010100000000000000000000000000000000000000")]
1107
+ ),
1108
+ text: pa.array([text]),
1109
+ }
1110
+ )
1111
+
1112
+ filename = tmp_path / "test.shp"
1113
+ write_arrow(
1114
+ table,
1115
+ filename,
1116
+ geometry_type="Point",
1117
+ geometry_name="geometry",
1118
+ crs="EPSG:4326",
1119
+ encoding=encoding,
1120
+ )
1121
+
1122
+ # NOTE: GDAL automatically creates a cpg file with the encoding name, which
1123
+ # means that if we read this without specifying the encoding it uses the
1124
+ # correct one
1125
+ schema, table = read_arrow(filename)
1126
+ assert schema["fields"][0] == text
1127
+ assert table[text][0].as_py() == text
1128
+
1129
+ # verify that if cpg file is not present, that user-provided encoding must be used
1130
+ filename.with_suffix(".cpg").unlink()
1131
+
1132
+ # We will assume ISO-8859-1, which is wrong
1133
+ miscoded = text.encode(encoding).decode("ISO-8859-1")
1134
+ bad_schema = read_arrow(filename)[0]
1135
+ assert bad_schema["fields"][0] == miscoded
1136
+ # table cannot be decoded to UTF-8 without UnicodeDecodeErrors
1137
+
1138
+ # If encoding is provided, that should yield correct text
1139
+ schema, table = read_arrow(filename, encoding=encoding)
1140
+ assert schema["fields"][0] == text
1141
+ assert table[text][0].as_py() == text
1142
+
1143
+ # verify that setting encoding does not corrupt SHAPE_ENCODING option if set
1144
+ # globally (it is ignored during read when encoding is specified by user)
1145
+ try:
1146
+ set_gdal_config_options({"SHAPE_ENCODING": "CP1254"})
1147
+ _ = read_arrow(filename, encoding=encoding)
1148
+ assert get_gdal_config_option("SHAPE_ENCODING") == "CP1254"
1149
+
1150
+ finally:
1151
+ # reset to clear between tests
1152
+ set_gdal_config_options({"SHAPE_ENCODING": None})
1153
+
1154
+
1155
+ @requires_arrow_write_api
1156
+ def test_encoding_write_layer_option_collision_shapefile(tmp_path, naturalearth_lowres):
1157
+ """Providing both encoding parameter and ENCODING layer creation option
1158
+ (even if blank) is not allowed."""
1159
+
1160
+ meta, table = read_arrow(naturalearth_lowres)
1161
+
1162
+ with pytest.raises(
1163
+ ValueError,
1164
+ match=(
1165
+ 'cannot provide both encoding parameter and "ENCODING" layer creation '
1166
+ "option"
1167
+ ),
1168
+ ):
1169
+ write_arrow(
1170
+ table,
1171
+ tmp_path / "test.shp",
1172
+ crs=meta["crs"],
1173
+ geometry_type="MultiPolygon",
1174
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
1175
+ encoding="CP936",
1176
+ layer_options={"ENCODING": ""},
1177
+ )
1178
+
1179
+
1180
+ @requires_arrow_write_api
1181
+ @pytest.mark.parametrize("ext", ["gpkg", "geojson"])
1182
+ def test_non_utf8_encoding_io_arrow_exception(tmp_path, naturalearth_lowres, ext):
1183
+ meta, table = read_arrow(naturalearth_lowres)
1184
+
1185
+ with pytest.raises(
1186
+ ValueError, match="non-UTF-8 encoding is not supported for Arrow"
1187
+ ):
1188
+ write_arrow(
1189
+ table,
1190
+ tmp_path / f"test.{ext}",
1191
+ crs=meta["crs"],
1192
+ geometry_type="MultiPolygon",
1193
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
1194
+ encoding="CP936",
1195
+ )