pyogrio 0.9.0__cp38-cp38-macosx_12_0_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyogrio might be problematic. Click here for more details.

Files changed (239) hide show
  1. pyogrio/.dylibs/libgdal.34.3.8.5.dylib +0 -0
  2. pyogrio/__init__.py +48 -0
  3. pyogrio/_compat.py +41 -0
  4. pyogrio/_env.py +61 -0
  5. pyogrio/_err.cpython-38-darwin.so +0 -0
  6. pyogrio/_err.pxd +4 -0
  7. pyogrio/_err.pyx +250 -0
  8. pyogrio/_geometry.cpython-38-darwin.so +0 -0
  9. pyogrio/_geometry.pxd +4 -0
  10. pyogrio/_geometry.pyx +129 -0
  11. pyogrio/_io.cpython-38-darwin.so +0 -0
  12. pyogrio/_io.pxd +0 -0
  13. pyogrio/_io.pyx +2742 -0
  14. pyogrio/_ogr.cpython-38-darwin.so +0 -0
  15. pyogrio/_ogr.pxd +444 -0
  16. pyogrio/_ogr.pyx +346 -0
  17. pyogrio/_version.py +21 -0
  18. pyogrio/_vsi.cpython-38-darwin.so +0 -0
  19. pyogrio/_vsi.pxd +4 -0
  20. pyogrio/_vsi.pyx +140 -0
  21. pyogrio/arrow_bridge.h +115 -0
  22. pyogrio/core.py +320 -0
  23. pyogrio/errors.py +32 -0
  24. pyogrio/gdal_data/GDAL-targets-release.cmake +19 -0
  25. pyogrio/gdal_data/GDAL-targets.cmake +105 -0
  26. pyogrio/gdal_data/GDALConfig.cmake +25 -0
  27. pyogrio/gdal_data/GDALConfigVersion.cmake +85 -0
  28. pyogrio/gdal_data/GDALLogoBW.svg +138 -0
  29. pyogrio/gdal_data/GDALLogoColor.svg +126 -0
  30. pyogrio/gdal_data/GDALLogoGS.svg +126 -0
  31. pyogrio/gdal_data/LICENSE.TXT +467 -0
  32. pyogrio/gdal_data/bag_template.xml +201 -0
  33. pyogrio/gdal_data/copyright +467 -0
  34. pyogrio/gdal_data/cubewerx_extra.wkt +48 -0
  35. pyogrio/gdal_data/default.rsc +0 -0
  36. pyogrio/gdal_data/ecw_cs.wkt +1453 -0
  37. pyogrio/gdal_data/eedaconf.json +23 -0
  38. pyogrio/gdal_data/epsg.wkt +1 -0
  39. pyogrio/gdal_data/esri_StatePlane_extra.wkt +631 -0
  40. pyogrio/gdal_data/gdalicon.png +0 -0
  41. pyogrio/gdal_data/gdalinfo_output.schema.json +346 -0
  42. pyogrio/gdal_data/gdalmdiminfo_output.schema.json +321 -0
  43. pyogrio/gdal_data/gdalvrt.xsd +772 -0
  44. pyogrio/gdal_data/gfs.xsd +246 -0
  45. pyogrio/gdal_data/gml_registry.xml +117 -0
  46. pyogrio/gdal_data/gml_registry.xsd +66 -0
  47. pyogrio/gdal_data/gmlasconf.xml +169 -0
  48. pyogrio/gdal_data/gmlasconf.xsd +1066 -0
  49. pyogrio/gdal_data/grib2_center.csv +251 -0
  50. pyogrio/gdal_data/grib2_process.csv +102 -0
  51. pyogrio/gdal_data/grib2_subcenter.csv +63 -0
  52. pyogrio/gdal_data/grib2_table_4_2_0_0.csv +261 -0
  53. pyogrio/gdal_data/grib2_table_4_2_0_1.csv +261 -0
  54. pyogrio/gdal_data/grib2_table_4_2_0_13.csv +261 -0
  55. pyogrio/gdal_data/grib2_table_4_2_0_14.csv +261 -0
  56. pyogrio/gdal_data/grib2_table_4_2_0_15.csv +261 -0
  57. pyogrio/gdal_data/grib2_table_4_2_0_16.csv +261 -0
  58. pyogrio/gdal_data/grib2_table_4_2_0_17.csv +11 -0
  59. pyogrio/gdal_data/grib2_table_4_2_0_18.csv +261 -0
  60. pyogrio/gdal_data/grib2_table_4_2_0_19.csv +261 -0
  61. pyogrio/gdal_data/grib2_table_4_2_0_190.csv +261 -0
  62. pyogrio/gdal_data/grib2_table_4_2_0_191.csv +261 -0
  63. pyogrio/gdal_data/grib2_table_4_2_0_2.csv +261 -0
  64. pyogrio/gdal_data/grib2_table_4_2_0_20.csv +261 -0
  65. pyogrio/gdal_data/grib2_table_4_2_0_21.csv +261 -0
  66. pyogrio/gdal_data/grib2_table_4_2_0_3.csv +261 -0
  67. pyogrio/gdal_data/grib2_table_4_2_0_4.csv +261 -0
  68. pyogrio/gdal_data/grib2_table_4_2_0_5.csv +261 -0
  69. pyogrio/gdal_data/grib2_table_4_2_0_6.csv +261 -0
  70. pyogrio/gdal_data/grib2_table_4_2_0_7.csv +261 -0
  71. pyogrio/gdal_data/grib2_table_4_2_10_0.csv +261 -0
  72. pyogrio/gdal_data/grib2_table_4_2_10_1.csv +261 -0
  73. pyogrio/gdal_data/grib2_table_4_2_10_191.csv +261 -0
  74. pyogrio/gdal_data/grib2_table_4_2_10_2.csv +261 -0
  75. pyogrio/gdal_data/grib2_table_4_2_10_3.csv +261 -0
  76. pyogrio/gdal_data/grib2_table_4_2_10_4.csv +261 -0
  77. pyogrio/gdal_data/grib2_table_4_2_1_0.csv +261 -0
  78. pyogrio/gdal_data/grib2_table_4_2_1_1.csv +261 -0
  79. pyogrio/gdal_data/grib2_table_4_2_1_2.csv +261 -0
  80. pyogrio/gdal_data/grib2_table_4_2_20_0.csv +261 -0
  81. pyogrio/gdal_data/grib2_table_4_2_20_1.csv +261 -0
  82. pyogrio/gdal_data/grib2_table_4_2_20_2.csv +261 -0
  83. pyogrio/gdal_data/grib2_table_4_2_2_0.csv +261 -0
  84. pyogrio/gdal_data/grib2_table_4_2_2_3.csv +261 -0
  85. pyogrio/gdal_data/grib2_table_4_2_2_4.csv +261 -0
  86. pyogrio/gdal_data/grib2_table_4_2_2_5.csv +261 -0
  87. pyogrio/gdal_data/grib2_table_4_2_2_6.csv +261 -0
  88. pyogrio/gdal_data/grib2_table_4_2_3_0.csv +261 -0
  89. pyogrio/gdal_data/grib2_table_4_2_3_1.csv +261 -0
  90. pyogrio/gdal_data/grib2_table_4_2_3_2.csv +28 -0
  91. pyogrio/gdal_data/grib2_table_4_2_3_3.csv +8 -0
  92. pyogrio/gdal_data/grib2_table_4_2_3_4.csv +14 -0
  93. pyogrio/gdal_data/grib2_table_4_2_3_5.csv +11 -0
  94. pyogrio/gdal_data/grib2_table_4_2_3_6.csv +11 -0
  95. pyogrio/gdal_data/grib2_table_4_2_4_0.csv +261 -0
  96. pyogrio/gdal_data/grib2_table_4_2_4_1.csv +261 -0
  97. pyogrio/gdal_data/grib2_table_4_2_4_10.csv +261 -0
  98. pyogrio/gdal_data/grib2_table_4_2_4_2.csv +261 -0
  99. pyogrio/gdal_data/grib2_table_4_2_4_3.csv +261 -0
  100. pyogrio/gdal_data/grib2_table_4_2_4_4.csv +261 -0
  101. pyogrio/gdal_data/grib2_table_4_2_4_5.csv +261 -0
  102. pyogrio/gdal_data/grib2_table_4_2_4_6.csv +261 -0
  103. pyogrio/gdal_data/grib2_table_4_2_4_7.csv +261 -0
  104. pyogrio/gdal_data/grib2_table_4_2_4_8.csv +261 -0
  105. pyogrio/gdal_data/grib2_table_4_2_4_9.csv +261 -0
  106. pyogrio/gdal_data/grib2_table_4_2_local_Canada.csv +5 -0
  107. pyogrio/gdal_data/grib2_table_4_2_local_HPC.csv +2 -0
  108. pyogrio/gdal_data/grib2_table_4_2_local_MRMS.csv +175 -0
  109. pyogrio/gdal_data/grib2_table_4_2_local_NCEP.csv +401 -0
  110. pyogrio/gdal_data/grib2_table_4_2_local_NDFD.csv +38 -0
  111. pyogrio/gdal_data/grib2_table_4_2_local_index.csv +7 -0
  112. pyogrio/gdal_data/grib2_table_4_5.csv +261 -0
  113. pyogrio/gdal_data/grib2_table_versions.csv +3 -0
  114. pyogrio/gdal_data/gt_datum.csv +229 -0
  115. pyogrio/gdal_data/gt_ellips.csv +24 -0
  116. pyogrio/gdal_data/header.dxf +1124 -0
  117. pyogrio/gdal_data/inspire_cp_BasicPropertyUnit.gfs +57 -0
  118. pyogrio/gdal_data/inspire_cp_CadastralBoundary.gfs +60 -0
  119. pyogrio/gdal_data/inspire_cp_CadastralParcel.gfs +81 -0
  120. pyogrio/gdal_data/inspire_cp_CadastralZoning.gfs +161 -0
  121. pyogrio/gdal_data/jpfgdgml_AdmArea.gfs +59 -0
  122. pyogrio/gdal_data/jpfgdgml_AdmBdry.gfs +49 -0
  123. pyogrio/gdal_data/jpfgdgml_AdmPt.gfs +59 -0
  124. pyogrio/gdal_data/jpfgdgml_BldA.gfs +54 -0
  125. pyogrio/gdal_data/jpfgdgml_BldL.gfs +54 -0
  126. pyogrio/gdal_data/jpfgdgml_Cntr.gfs +54 -0
  127. pyogrio/gdal_data/jpfgdgml_CommBdry.gfs +49 -0
  128. pyogrio/gdal_data/jpfgdgml_CommPt.gfs +59 -0
  129. pyogrio/gdal_data/jpfgdgml_Cstline.gfs +54 -0
  130. pyogrio/gdal_data/jpfgdgml_ElevPt.gfs +54 -0
  131. pyogrio/gdal_data/jpfgdgml_GCP.gfs +94 -0
  132. pyogrio/gdal_data/jpfgdgml_LeveeEdge.gfs +49 -0
  133. pyogrio/gdal_data/jpfgdgml_RailCL.gfs +54 -0
  134. pyogrio/gdal_data/jpfgdgml_RdASL.gfs +44 -0
  135. pyogrio/gdal_data/jpfgdgml_RdArea.gfs +54 -0
  136. pyogrio/gdal_data/jpfgdgml_RdCompt.gfs +59 -0
  137. pyogrio/gdal_data/jpfgdgml_RdEdg.gfs +59 -0
  138. pyogrio/gdal_data/jpfgdgml_RdMgtBdry.gfs +49 -0
  139. pyogrio/gdal_data/jpfgdgml_RdSgmtA.gfs +59 -0
  140. pyogrio/gdal_data/jpfgdgml_RvrMgtBdry.gfs +49 -0
  141. pyogrio/gdal_data/jpfgdgml_SBAPt.gfs +49 -0
  142. pyogrio/gdal_data/jpfgdgml_SBArea.gfs +54 -0
  143. pyogrio/gdal_data/jpfgdgml_SBBdry.gfs +44 -0
  144. pyogrio/gdal_data/jpfgdgml_WA.gfs +54 -0
  145. pyogrio/gdal_data/jpfgdgml_WL.gfs +54 -0
  146. pyogrio/gdal_data/jpfgdgml_WStrA.gfs +54 -0
  147. pyogrio/gdal_data/jpfgdgml_WStrL.gfs +54 -0
  148. pyogrio/gdal_data/netcdf_config.xsd +143 -0
  149. pyogrio/gdal_data/nitf_spec.xml +3306 -0
  150. pyogrio/gdal_data/nitf_spec.xsd +189 -0
  151. pyogrio/gdal_data/ogrinfo_output.schema.json +505 -0
  152. pyogrio/gdal_data/ogrvrt.xsd +543 -0
  153. pyogrio/gdal_data/osmconf.ini +132 -0
  154. pyogrio/gdal_data/ozi_datum.csv +131 -0
  155. pyogrio/gdal_data/ozi_ellips.csv +35 -0
  156. pyogrio/gdal_data/pci_datum.txt +463 -0
  157. pyogrio/gdal_data/pci_ellips.txt +77 -0
  158. pyogrio/gdal_data/pdfcomposition.xsd +721 -0
  159. pyogrio/gdal_data/pds4_template.xml +65 -0
  160. pyogrio/gdal_data/plscenesconf.json +1985 -0
  161. pyogrio/gdal_data/ruian_vf_ob_v1.gfs +1455 -0
  162. pyogrio/gdal_data/ruian_vf_st_uvoh_v1.gfs +86 -0
  163. pyogrio/gdal_data/ruian_vf_st_v1.gfs +1489 -0
  164. pyogrio/gdal_data/ruian_vf_v1.gfs +2126 -0
  165. pyogrio/gdal_data/s57agencies.csv +249 -0
  166. pyogrio/gdal_data/s57attributes.csv +484 -0
  167. pyogrio/gdal_data/s57expectedinput.csv +1008 -0
  168. pyogrio/gdal_data/s57objectclasses.csv +287 -0
  169. pyogrio/gdal_data/seed_2d.dgn +0 -0
  170. pyogrio/gdal_data/seed_3d.dgn +0 -0
  171. pyogrio/gdal_data/stateplane.csv +259 -0
  172. pyogrio/gdal_data/template_tiles.mapml +28 -0
  173. pyogrio/gdal_data/tms_LINZAntarticaMapTileGrid.json +190 -0
  174. pyogrio/gdal_data/tms_MapML_APSTILE.json +268 -0
  175. pyogrio/gdal_data/tms_MapML_CBMTILE.json +346 -0
  176. pyogrio/gdal_data/tms_NZTM2000.json +243 -0
  177. pyogrio/gdal_data/trailer.dxf +434 -0
  178. pyogrio/gdal_data/usage +4 -0
  179. pyogrio/gdal_data/vcpkg-cmake-wrapper.cmake +23 -0
  180. pyogrio/gdal_data/vcpkg.spdx.json +264 -0
  181. pyogrio/gdal_data/vcpkg_abi_info.txt +41 -0
  182. pyogrio/gdal_data/vdv452.xml +367 -0
  183. pyogrio/gdal_data/vdv452.xsd +63 -0
  184. pyogrio/gdal_data/vicar.json +164 -0
  185. pyogrio/geopandas.py +675 -0
  186. pyogrio/proj_data/CH +22 -0
  187. pyogrio/proj_data/GL27 +23 -0
  188. pyogrio/proj_data/ITRF2000 +24 -0
  189. pyogrio/proj_data/ITRF2008 +94 -0
  190. pyogrio/proj_data/ITRF2014 +55 -0
  191. pyogrio/proj_data/copyright +34 -0
  192. pyogrio/proj_data/deformation_model.schema.json +582 -0
  193. pyogrio/proj_data/nad.lst +142 -0
  194. pyogrio/proj_data/nad27 +810 -0
  195. pyogrio/proj_data/nad83 +745 -0
  196. pyogrio/proj_data/other.extra +53 -0
  197. pyogrio/proj_data/proj-config-version.cmake +44 -0
  198. pyogrio/proj_data/proj-config.cmake +79 -0
  199. pyogrio/proj_data/proj-targets-release.cmake +19 -0
  200. pyogrio/proj_data/proj-targets.cmake +107 -0
  201. pyogrio/proj_data/proj.db +0 -0
  202. pyogrio/proj_data/proj.ini +51 -0
  203. pyogrio/proj_data/proj4-targets-release.cmake +19 -0
  204. pyogrio/proj_data/proj4-targets.cmake +107 -0
  205. pyogrio/proj_data/projjson.schema.json +1174 -0
  206. pyogrio/proj_data/triangulation.schema.json +214 -0
  207. pyogrio/proj_data/usage +4 -0
  208. pyogrio/proj_data/vcpkg.spdx.json +198 -0
  209. pyogrio/proj_data/vcpkg_abi_info.txt +27 -0
  210. pyogrio/proj_data/world +214 -0
  211. pyogrio/raw.py +871 -0
  212. pyogrio/tests/__init__.py +0 -0
  213. pyogrio/tests/conftest.py +204 -0
  214. pyogrio/tests/fixtures/README.md +89 -0
  215. pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.cpg +1 -0
  216. pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.dbf +0 -0
  217. pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.prj +1 -0
  218. pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.shp +0 -0
  219. pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.shx +0 -0
  220. pyogrio/tests/fixtures/poly_not_enough_points.shp.zip +0 -0
  221. pyogrio/tests/fixtures/sample.osm.pbf +0 -0
  222. pyogrio/tests/fixtures/test_datetime.geojson +7 -0
  223. pyogrio/tests/fixtures/test_datetime_tz.geojson +8 -0
  224. pyogrio/tests/fixtures/test_fgdb.gdb.zip +0 -0
  225. pyogrio/tests/fixtures/test_gpkg_nulls.gpkg +0 -0
  226. pyogrio/tests/fixtures/test_multisurface.gpkg +0 -0
  227. pyogrio/tests/fixtures/test_nested.geojson +18 -0
  228. pyogrio/tests/fixtures/test_ogr_types_list.geojson +12 -0
  229. pyogrio/tests/test_arrow.py +1041 -0
  230. pyogrio/tests/test_core.py +588 -0
  231. pyogrio/tests/test_geopandas_io.py +2174 -0
  232. pyogrio/tests/test_path.py +352 -0
  233. pyogrio/tests/test_raw_io.py +1404 -0
  234. pyogrio/util.py +223 -0
  235. pyogrio-0.9.0.dist-info/LICENSE +21 -0
  236. pyogrio-0.9.0.dist-info/METADATA +100 -0
  237. pyogrio-0.9.0.dist-info/RECORD +239 -0
  238. pyogrio-0.9.0.dist-info/WHEEL +5 -0
  239. pyogrio-0.9.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1041 @@
1
+ import contextlib
2
+ from io import BytesIO
3
+ import json
4
+ import math
5
+ import os
6
+ from packaging.version import Version
7
+ import sys
8
+
9
+ import pytest
10
+ import numpy as np
11
+
12
+ import pyogrio
13
+ from pyogrio import (
14
+ __gdal_version__,
15
+ read_dataframe,
16
+ read_info,
17
+ list_layers,
18
+ get_gdal_config_option,
19
+ set_gdal_config_options,
20
+ )
21
+ from pyogrio.raw import open_arrow, read_arrow, write, write_arrow
22
+ from pyogrio.errors import DataSourceError, FieldError, DataLayerError
23
+ from pyogrio.tests.conftest import (
24
+ ALL_EXTS,
25
+ DRIVERS,
26
+ DRIVER_EXT,
27
+ requires_arrow_write_api,
28
+ requires_pyarrow_api,
29
+ )
30
+
31
+ try:
32
+ import pandas as pd
33
+ from pandas.testing import assert_frame_equal, assert_index_equal
34
+ from geopandas.testing import assert_geodataframe_equal
35
+
36
+ import pyarrow
37
+ except ImportError:
38
+ pass
39
+
40
+ # skip all tests in this file if Arrow API or GeoPandas are unavailable
41
+ pytestmark = requires_pyarrow_api
42
+ pytest.importorskip("geopandas")
43
+ pa = pytest.importorskip("pyarrow")
44
+
45
+
46
+ def test_read_arrow(naturalearth_lowres_all_ext):
47
+ result = read_dataframe(naturalearth_lowres_all_ext, use_arrow=True)
48
+ expected = read_dataframe(naturalearth_lowres_all_ext, use_arrow=False)
49
+
50
+ if naturalearth_lowres_all_ext.suffix.startswith(".geojson"):
51
+ check_less_precise = True
52
+ else:
53
+ check_less_precise = False
54
+ assert_geodataframe_equal(result, expected, check_less_precise=check_less_precise)
55
+
56
+
57
+ def test_read_arrow_unspecified_layer_warning(data_dir):
58
+ """Reading a multi-layer file without specifying a layer gives a warning."""
59
+ with pytest.warns(UserWarning, match="More than one layer found "):
60
+ read_arrow(data_dir / "sample.osm.pbf")
61
+
62
+
63
+ @pytest.mark.parametrize("skip_features, expected", [(10, 167), (200, 0)])
64
+ def test_read_arrow_skip_features(naturalearth_lowres, skip_features, expected):
65
+ table = read_arrow(naturalearth_lowres, skip_features=skip_features)[1]
66
+ assert len(table) == expected
67
+
68
+
69
+ def test_read_arrow_negative_skip_features(naturalearth_lowres):
70
+ with pytest.raises(ValueError, match="'skip_features' must be >= 0"):
71
+ read_arrow(naturalearth_lowres, skip_features=-1)
72
+
73
+
74
+ @pytest.mark.parametrize(
75
+ "max_features, expected", [(0, 0), (10, 10), (200, 177), (100000, 177)]
76
+ )
77
+ def test_read_arrow_max_features(naturalearth_lowres, max_features, expected):
78
+ table = read_arrow(naturalearth_lowres, max_features=max_features)[1]
79
+ assert len(table) == expected
80
+
81
+
82
+ def test_read_arrow_negative_max_features(naturalearth_lowres):
83
+ with pytest.raises(ValueError, match="'max_features' must be >= 0"):
84
+ read_arrow(naturalearth_lowres, max_features=-1)
85
+
86
+
87
+ @pytest.mark.parametrize(
88
+ "skip_features, max_features, expected",
89
+ [
90
+ (0, 0, 0),
91
+ (10, 0, 0),
92
+ (200, 0, 0),
93
+ (1, 200, 176),
94
+ (176, 10, 1),
95
+ (100, 100, 77),
96
+ (100, 100000, 77),
97
+ ],
98
+ )
99
+ def test_read_arrow_skip_features_max_features(
100
+ naturalearth_lowres, skip_features, max_features, expected
101
+ ):
102
+ table = read_arrow(
103
+ naturalearth_lowres, skip_features=skip_features, max_features=max_features
104
+ )[1]
105
+ assert len(table) == expected
106
+
107
+
108
+ def test_read_arrow_fid(naturalearth_lowres_all_ext):
109
+ kwargs = {"use_arrow": True, "where": "fid >= 2 AND fid <= 3"}
110
+
111
+ df = read_dataframe(naturalearth_lowres_all_ext, fid_as_index=False, **kwargs)
112
+ assert_index_equal(df.index, pd.RangeIndex(0, 2))
113
+
114
+ df = read_dataframe(naturalearth_lowres_all_ext, fid_as_index=True, **kwargs)
115
+ assert_index_equal(df.index, pd.Index([2, 3], name="fid"))
116
+
117
+
118
+ def test_read_arrow_columns(naturalearth_lowres):
119
+ result = read_dataframe(naturalearth_lowres, use_arrow=True, columns=["continent"])
120
+ assert result.columns.tolist() == ["continent", "geometry"]
121
+
122
+
123
+ def test_read_arrow_ignore_geometry(naturalearth_lowres):
124
+ result = read_dataframe(naturalearth_lowres, use_arrow=True, read_geometry=False)
125
+ assert type(result) is pd.DataFrame
126
+
127
+ expected = read_dataframe(naturalearth_lowres, use_arrow=True).drop(
128
+ columns=["geometry"]
129
+ )
130
+ assert_frame_equal(result, expected)
131
+
132
+
133
+ def test_read_arrow_nested_types(test_ogr_types_list):
134
+ # with arrow, list types are supported
135
+ result = read_dataframe(test_ogr_types_list, use_arrow=True)
136
+ assert "list_int64" in result.columns
137
+ assert result["list_int64"][0].tolist() == [0, 1]
138
+
139
+
140
+ def test_read_arrow_to_pandas_kwargs(test_fgdb_vsi):
141
+ # with arrow, list types are supported
142
+ arrow_to_pandas_kwargs = {"strings_to_categorical": True}
143
+ result = read_dataframe(
144
+ test_fgdb_vsi,
145
+ layer="basetable_2",
146
+ use_arrow=True,
147
+ arrow_to_pandas_kwargs=arrow_to_pandas_kwargs,
148
+ )
149
+ assert "SEGMENT_NAME" in result.columns
150
+ assert result["SEGMENT_NAME"].dtype.name == "category"
151
+
152
+
153
+ def test_read_arrow_raw(naturalearth_lowres):
154
+ meta, table = read_arrow(naturalearth_lowres)
155
+ assert isinstance(meta, dict)
156
+ assert isinstance(table, pyarrow.Table)
157
+
158
+
159
+ def test_read_arrow_vsi(naturalearth_lowres_vsi):
160
+ table = read_arrow(naturalearth_lowres_vsi[1])[1]
161
+ assert len(table) == 177
162
+
163
+
164
+ def test_read_arrow_bytes(geojson_bytes):
165
+ meta, table = read_arrow(geojson_bytes)
166
+
167
+ assert meta["fields"].shape == (5,)
168
+ assert len(table) == 3
169
+
170
+
171
+ def test_read_arrow_filelike(geojson_filelike):
172
+ meta, table = read_arrow(geojson_filelike)
173
+
174
+ assert meta["fields"].shape == (5,)
175
+ assert len(table) == 3
176
+
177
+
178
+ def test_open_arrow_pyarrow(naturalearth_lowres):
179
+ with open_arrow(naturalearth_lowres, use_pyarrow=True) as (meta, reader):
180
+ assert isinstance(meta, dict)
181
+ assert isinstance(reader, pyarrow.RecordBatchReader)
182
+ assert isinstance(reader.read_all(), pyarrow.Table)
183
+
184
+
185
+ def test_open_arrow_batch_size(naturalearth_lowres):
186
+ meta, table = read_arrow(naturalearth_lowres)
187
+ batch_size = math.ceil(len(table) / 2)
188
+
189
+ with open_arrow(naturalearth_lowres, batch_size=batch_size, use_pyarrow=True) as (
190
+ meta,
191
+ reader,
192
+ ):
193
+ assert isinstance(meta, dict)
194
+ assert isinstance(reader, pyarrow.RecordBatchReader)
195
+ count = 0
196
+ tables = []
197
+ for table in reader:
198
+ tables.append(table)
199
+ count += 1
200
+
201
+ assert count == 2, "Should be two batches given the batch_size parameter"
202
+ assert len(tables[0]) == batch_size, "First table should match the batch size"
203
+
204
+
205
+ @pytest.mark.skipif(
206
+ __gdal_version__ >= (3, 8, 0),
207
+ reason="skip_features supported by Arrow stream API for GDAL>=3.8.0",
208
+ )
209
+ @pytest.mark.parametrize("skip_features", [10, 200])
210
+ def test_open_arrow_skip_features_unsupported(naturalearth_lowres, skip_features):
211
+ """skip_features are not supported for the Arrow stream interface for
212
+ GDAL < 3.8.0"""
213
+ with pytest.raises(
214
+ ValueError,
215
+ match="specifying 'skip_features' is not supported for Arrow for GDAL<3.8.0",
216
+ ):
217
+ with open_arrow(naturalearth_lowres, skip_features=skip_features) as (
218
+ meta,
219
+ reader,
220
+ ):
221
+ pass
222
+
223
+
224
+ @pytest.mark.parametrize("max_features", [10, 200])
225
+ def test_open_arrow_max_features_unsupported(naturalearth_lowres, max_features):
226
+ """max_features are not supported for the Arrow stream interface"""
227
+ with pytest.raises(
228
+ ValueError,
229
+ match="specifying 'max_features' is not supported for Arrow",
230
+ ):
231
+ with open_arrow(naturalearth_lowres, max_features=max_features) as (
232
+ meta,
233
+ reader,
234
+ ):
235
+ pass
236
+
237
+
238
+ @pytest.mark.skipif(
239
+ __gdal_version__ < (3, 8, 0),
240
+ reason="returns geoarrow metadata only for GDAL>=3.8.0",
241
+ )
242
+ def test_read_arrow_geoarrow_metadata(naturalearth_lowres):
243
+ _meta, table = read_arrow(naturalearth_lowres)
244
+ field = table.schema.field("wkb_geometry")
245
+ assert field.metadata[b"ARROW:extension:name"] == b"geoarrow.wkb"
246
+ parsed_meta = json.loads(field.metadata[b"ARROW:extension:metadata"])
247
+ assert parsed_meta["crs"]["id"]["authority"] == "EPSG"
248
+ assert parsed_meta["crs"]["id"]["code"] == 4326
249
+
250
+
251
+ def test_open_arrow_capsule_protocol(naturalearth_lowres):
252
+ pytest.importorskip("pyarrow", minversion="14")
253
+
254
+ with open_arrow(naturalearth_lowres) as (meta, reader):
255
+ assert isinstance(meta, dict)
256
+ assert isinstance(reader, pyogrio._io._ArrowStream)
257
+
258
+ result = pyarrow.table(reader)
259
+
260
+ _, expected = read_arrow(naturalearth_lowres)
261
+ assert result.equals(expected)
262
+
263
+
264
+ def test_open_arrow_capsule_protocol_without_pyarrow(naturalearth_lowres):
265
+ pyarrow = pytest.importorskip("pyarrow", minversion="14")
266
+
267
+ # Make PyArrow temporarily unavailable (importing will fail)
268
+ sys.modules["pyarrow"] = None
269
+ try:
270
+ with open_arrow(naturalearth_lowres) as (meta, reader):
271
+ assert isinstance(meta, dict)
272
+ assert isinstance(reader, pyogrio._io._ArrowStream)
273
+ result = pyarrow.table(reader)
274
+ finally:
275
+ sys.modules["pyarrow"] = pyarrow
276
+
277
+ _, expected = read_arrow(naturalearth_lowres)
278
+ assert result.equals(expected)
279
+
280
+
281
+ @contextlib.contextmanager
282
+ def use_arrow_context():
283
+ original = os.environ.get("PYOGRIO_USE_ARROW", None)
284
+ os.environ["PYOGRIO_USE_ARROW"] = "1"
285
+ yield
286
+ if original:
287
+ os.environ["PYOGRIO_USE_ARROW"] = original
288
+ else:
289
+ del os.environ["PYOGRIO_USE_ARROW"]
290
+
291
+
292
+ def test_enable_with_environment_variable(test_ogr_types_list):
293
+ # list types are only supported with arrow, so don't work by default and work
294
+ # when arrow is enabled through env variable
295
+ result = read_dataframe(test_ogr_types_list)
296
+ assert "list_int64" not in result.columns
297
+
298
+ with use_arrow_context():
299
+ result = read_dataframe(test_ogr_types_list)
300
+ assert "list_int64" in result.columns
301
+
302
+
303
+ @pytest.mark.skipif(
304
+ __gdal_version__ < (3, 8, 3), reason="Arrow bool value bug fixed in GDAL >= 3.8.3"
305
+ )
306
+ @pytest.mark.parametrize("ext", ALL_EXTS)
307
+ def test_arrow_bool_roundtrip(tmp_path, ext):
308
+ filename = tmp_path / f"test{ext}"
309
+
310
+ # Point(0, 0)
311
+ geometry = np.array(
312
+ [bytes.fromhex("010100000000000000000000000000000000000000")] * 5, dtype=object
313
+ )
314
+ bool_col = np.array([True, False, True, False, True])
315
+ field_data = [bool_col]
316
+ fields = ["bool_col"]
317
+
318
+ kwargs = {}
319
+
320
+ if ext == ".fgb":
321
+ # For .fgb, spatial_index=False to avoid the rows being reordered
322
+ kwargs["spatial_index"] = False
323
+
324
+ write(
325
+ filename,
326
+ geometry,
327
+ field_data,
328
+ fields,
329
+ geometry_type="Point",
330
+ crs="EPSG:4326",
331
+ **kwargs,
332
+ )
333
+
334
+ write(
335
+ filename, geometry, field_data, fields, geometry_type="Point", crs="EPSG:4326"
336
+ )
337
+ table = read_arrow(filename)[1]
338
+
339
+ assert np.array_equal(table["bool_col"].to_numpy(), bool_col)
340
+
341
+
342
+ @pytest.mark.skipif(
343
+ __gdal_version__ >= (3, 8, 3), reason="Arrow bool value bug fixed in GDAL >= 3.8.3"
344
+ )
345
+ @pytest.mark.parametrize("ext", ALL_EXTS)
346
+ def test_arrow_bool_exception(tmp_path, ext):
347
+ filename = tmp_path / f"test{ext}"
348
+
349
+ # Point(0, 0)
350
+ geometry = np.array(
351
+ [bytes.fromhex("010100000000000000000000000000000000000000")] * 5, dtype=object
352
+ )
353
+ bool_col = np.array([True, False, True, False, True])
354
+ field_data = [bool_col]
355
+ fields = ["bool_col"]
356
+
357
+ write(
358
+ filename, geometry, field_data, fields, geometry_type="Point", crs="EPSG:4326"
359
+ )
360
+
361
+ if ext in {".fgb", ".gpkg"}:
362
+ # only raise exception for GPKG / FGB
363
+ with pytest.raises(
364
+ RuntimeError,
365
+ match="GDAL < 3.8.3 does not correctly read boolean data values using "
366
+ "the Arrow API",
367
+ ):
368
+ with open_arrow(filename):
369
+ pass
370
+
371
+ # do not raise exception if no bool columns are read
372
+ with open_arrow(filename, columns=[]):
373
+ pass
374
+
375
+ else:
376
+ with open_arrow(filename):
377
+ pass
378
+
379
+
380
+ # Point(0, 0)
381
+ points = np.array(
382
+ [bytes.fromhex("010100000000000000000000000000000000000000")] * 3,
383
+ dtype=object,
384
+ )
385
+
386
+
387
+ @requires_arrow_write_api
388
+ def test_write_shp(tmp_path, naturalearth_lowres):
389
+ meta, table = read_arrow(naturalearth_lowres)
390
+
391
+ filename = tmp_path / "test.shp"
392
+ write_arrow(
393
+ table,
394
+ filename,
395
+ crs=meta["crs"],
396
+ encoding=meta["encoding"],
397
+ geometry_type=meta["geometry_type"],
398
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
399
+ )
400
+
401
+ assert filename.exists()
402
+ for ext in (".dbf", ".prj"):
403
+ assert filename.with_suffix(ext).exists()
404
+
405
+
406
+ @pytest.mark.filterwarnings("ignore:A geometry of type POLYGON is inserted")
407
+ @requires_arrow_write_api
408
+ def test_write_gpkg(tmp_path, naturalearth_lowres):
409
+ meta, table = read_arrow(naturalearth_lowres)
410
+
411
+ filename = tmp_path / "test.gpkg"
412
+ write_arrow(
413
+ table,
414
+ filename,
415
+ driver="GPKG",
416
+ crs=meta["crs"],
417
+ geometry_type="MultiPolygon",
418
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
419
+ )
420
+
421
+ assert filename.exists()
422
+
423
+
424
+ @pytest.mark.filterwarnings("ignore:A geometry of type POLYGON is inserted")
425
+ @requires_arrow_write_api
426
+ def test_write_gpkg_multiple_layers(tmp_path, naturalearth_lowres):
427
+ meta, table = read_arrow(naturalearth_lowres)
428
+ meta["geometry_type"] = "MultiPolygon"
429
+
430
+ filename = tmp_path / "test.gpkg"
431
+ write_arrow(
432
+ table,
433
+ filename,
434
+ driver="GPKG",
435
+ layer="first",
436
+ crs=meta["crs"],
437
+ geometry_type="MultiPolygon",
438
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
439
+ )
440
+
441
+ assert filename.exists()
442
+
443
+ assert np.array_equal(list_layers(filename), [["first", "MultiPolygon"]])
444
+
445
+ write_arrow(
446
+ table,
447
+ filename,
448
+ driver="GPKG",
449
+ layer="second",
450
+ crs=meta["crs"],
451
+ geometry_type="MultiPolygon",
452
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
453
+ )
454
+
455
+ assert np.array_equal(
456
+ list_layers(filename), [["first", "MultiPolygon"], ["second", "MultiPolygon"]]
457
+ )
458
+
459
+
460
+ @requires_arrow_write_api
461
+ def test_write_geojson(tmp_path, naturalearth_lowres):
462
+ meta, table = read_arrow(naturalearth_lowres)
463
+ filename = tmp_path / "test.json"
464
+ write_arrow(
465
+ table,
466
+ filename,
467
+ driver="GeoJSON",
468
+ crs=meta["crs"],
469
+ geometry_type=meta["geometry_type"],
470
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
471
+ )
472
+
473
+ assert filename.exists()
474
+
475
+ data = json.loads(open(filename).read())
476
+
477
+ assert data["type"] == "FeatureCollection"
478
+ assert data["name"] == "test"
479
+ assert "crs" in data
480
+ assert len(data["features"]) == len(table)
481
+ assert not len(
482
+ set(meta["fields"]).difference(data["features"][0]["properties"].keys())
483
+ )
484
+
485
+
486
+ @pytest.mark.parametrize(
487
+ "driver",
488
+ {
489
+ driver
490
+ for driver in DRIVERS.values()
491
+ if driver not in ("ESRI Shapefile", "GPKG", "GeoJSON")
492
+ },
493
+ )
494
+ @requires_arrow_write_api
495
+ def test_write_supported(tmp_path, naturalearth_lowres, driver):
496
+ """Test drivers known to work that are not specifically tested above"""
497
+ meta, table = read_arrow(naturalearth_lowres, columns=["iso_a3"], max_features=1)
498
+
499
+ # note: naturalearth_lowres contains mixed polygons / multipolygons, which
500
+ # are not supported in mixed form for all drivers. To get around this here
501
+ # we take the first record only.
502
+ meta["geometry_type"] = "MultiPolygon"
503
+
504
+ filename = tmp_path / f"test{DRIVER_EXT[driver]}"
505
+ write_arrow(
506
+ table,
507
+ filename,
508
+ driver=driver,
509
+ crs=meta["crs"],
510
+ geometry_type=meta["geometry_type"],
511
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
512
+ )
513
+ assert filename.exists()
514
+
515
+
516
+ @requires_arrow_write_api
517
+ def test_write_unsupported(tmp_path, naturalearth_lowres):
518
+ meta, table = read_arrow(naturalearth_lowres)
519
+
520
+ with pytest.raises(DataSourceError, match="does not support write functionality"):
521
+ write_arrow(
522
+ table,
523
+ tmp_path / "test.json",
524
+ driver="ESRIJSON",
525
+ crs=meta["crs"],
526
+ geometry_type=meta["geometry_type"],
527
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
528
+ )
529
+
530
+
531
+ @pytest.mark.parametrize("ext", DRIVERS)
532
+ @requires_arrow_write_api
533
+ def test_write_append(request, tmp_path, naturalearth_lowres, ext):
534
+ if ext.startswith(".geojson"):
535
+ # Bug in GDAL when appending int64 to GeoJSON
536
+ # (https://github.com/OSGeo/gdal/issues/9792)
537
+ request.node.add_marker(
538
+ pytest.mark.xfail(reason="Bugs with append when writing Arrow to GeoJSON")
539
+ )
540
+
541
+ meta, table = read_arrow(naturalearth_lowres)
542
+
543
+ # coerce output layer to generic Geometry to avoid mixed type errors
544
+ meta["geometry_type"] = "Unknown"
545
+
546
+ filename = tmp_path / f"test{ext}"
547
+ write_arrow(
548
+ table,
549
+ filename,
550
+ crs=meta["crs"],
551
+ geometry_type=meta["geometry_type"],
552
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
553
+ )
554
+ assert filename.exists()
555
+ assert read_info(filename)["features"] == 177
556
+
557
+ # write the same records again
558
+ write_arrow(
559
+ table,
560
+ filename,
561
+ append=True,
562
+ crs=meta["crs"],
563
+ geometry_type=meta["geometry_type"],
564
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
565
+ )
566
+ assert read_info(filename)["features"] == 354
567
+
568
+
569
+ @pytest.mark.parametrize("driver,ext", [("GML", ".gml"), ("GeoJSONSeq", ".geojsons")])
570
+ @requires_arrow_write_api
571
+ def test_write_append_unsupported(tmp_path, naturalearth_lowres, driver, ext):
572
+ meta, table = read_arrow(naturalearth_lowres)
573
+
574
+ # GML does not support append functionality
575
+ filename = tmp_path / "test.gml"
576
+ write_arrow(
577
+ table,
578
+ filename,
579
+ driver="GML",
580
+ crs=meta["crs"],
581
+ geometry_type=meta["geometry_type"],
582
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
583
+ )
584
+ assert filename.exists()
585
+ assert read_info(filename, force_feature_count=True)["features"] == 177
586
+
587
+ with pytest.raises(DataSourceError):
588
+ write_arrow(
589
+ table,
590
+ filename,
591
+ driver="GML",
592
+ append=True,
593
+ crs=meta["crs"],
594
+ geometry_type=meta["geometry_type"],
595
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
596
+ )
597
+
598
+
599
+ @requires_arrow_write_api
600
+ def test_write_gdalclose_error(naturalearth_lowres):
601
+ meta, table = read_arrow(naturalearth_lowres)
602
+
603
+ filename = "s3://non-existing-bucket/test.geojson"
604
+
605
+ # set config options to avoid errors on open due to GDAL S3 configuration
606
+ set_gdal_config_options(
607
+ {
608
+ "AWS_ACCESS_KEY_ID": "invalid",
609
+ "AWS_SECRET_ACCESS_KEY": "invalid",
610
+ "AWS_NO_SIGN_REQUEST": True,
611
+ }
612
+ )
613
+
614
+ with pytest.raises(DataSourceError, match="Failed to write features to dataset"):
615
+ write_arrow(
616
+ table,
617
+ filename,
618
+ crs=meta["crs"],
619
+ geometry_type=meta["geometry_type"],
620
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
621
+ )
622
+
623
+
624
+ @requires_arrow_write_api
625
+ @pytest.mark.parametrize("name", ["geoarrow.wkb", "ogc.wkb"])
626
+ def test_write_geometry_extension_type(tmp_path, naturalearth_lowres, name):
627
+ # Infer geometry column based on extension name
628
+ # instead of passing `geometry_name` explicitly
629
+ meta, table = read_arrow(naturalearth_lowres)
630
+
631
+ # change extension type name
632
+ idx = table.schema.get_field_index("wkb_geometry")
633
+ new_field = table.schema.field(idx).with_metadata({"ARROW:extension:name": name})
634
+ new_table = table.cast(table.schema.set(idx, new_field))
635
+
636
+ filename = tmp_path / "test_geoarrow.shp"
637
+ write_arrow(
638
+ new_table,
639
+ filename,
640
+ crs=meta["crs"],
641
+ geometry_type=meta["geometry_type"],
642
+ )
643
+ _, table_roundtripped = read_arrow(filename)
644
+ assert table_roundtripped.equals(table)
645
+
646
+
647
+ @requires_arrow_write_api
648
+ def test_write_unsupported_geoarrow(tmp_path, naturalearth_lowres):
649
+ meta, table = read_arrow(naturalearth_lowres)
650
+
651
+ # change extension type name (the name doesn't match with the column type
652
+ # for correct geoarrow data, but our writing code checks it based on the name)
653
+ idx = table.schema.get_field_index("wkb_geometry")
654
+ new_field = table.schema.field(idx).with_metadata(
655
+ {"ARROW:extension:name": "geoarrow.point"}
656
+ )
657
+ new_table = table.cast(table.schema.set(idx, new_field))
658
+
659
+ with pytest.raises(
660
+ NotImplementedError,
661
+ match="Writing a geometry column of type geoarrow.point is not yet supported",
662
+ ):
663
+ write_arrow(
664
+ new_table,
665
+ tmp_path / "test_geoarrow.shp",
666
+ crs=meta["crs"],
667
+ geometry_type=meta["geometry_type"],
668
+ )
669
+
670
+
671
+ @requires_arrow_write_api
672
+ def test_write_no_geom(tmp_path, naturalearth_lowres):
673
+ _, table = read_arrow(naturalearth_lowres)
674
+ table = table.drop_columns("wkb_geometry")
675
+
676
+ # Test
677
+ filename = tmp_path / "test.gpkg"
678
+ write_arrow(table, filename)
679
+ # Check result
680
+ assert filename.exists()
681
+ meta, result = read_arrow(filename)
682
+ assert meta["crs"] is None
683
+ assert meta["geometry_type"] is None
684
+ assert table.equals(result)
685
+
686
+
687
+ @requires_arrow_write_api
688
+ def test_write_geometry_type(tmp_path, naturalearth_lowres):
689
+ meta, table = read_arrow(naturalearth_lowres)
690
+
691
+ # Not specifying the geometry currently raises an error
692
+ with pytest.raises(ValueError, match="'geometry_type' keyword is required"):
693
+ write_arrow(
694
+ table,
695
+ tmp_path / "test.shp",
696
+ crs=meta["crs"],
697
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
698
+ )
699
+
700
+ # Specifying "Unknown" works and will create generic layer
701
+ filename = tmp_path / "test.gpkg"
702
+ write_arrow(
703
+ table,
704
+ filename,
705
+ crs=meta["crs"],
706
+ geometry_type="Unknown",
707
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
708
+ )
709
+ assert filename.exists()
710
+ meta_written, _ = read_arrow(filename)
711
+ assert meta_written["geometry_type"] == "Unknown"
712
+
713
+
714
+ @requires_arrow_write_api
715
+ def test_write_raise_promote_to_multi(tmp_path, naturalearth_lowres):
716
+ meta, table = read_arrow(naturalearth_lowres)
717
+
718
+ with pytest.raises(
719
+ ValueError, match="The 'promote_to_multi' option is not supported"
720
+ ):
721
+ write_arrow(
722
+ table,
723
+ tmp_path / "test.shp",
724
+ crs=meta["crs"],
725
+ geometry_type=meta["geometry_type"],
726
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
727
+ promote_to_multi=True,
728
+ )
729
+
730
+
731
+ @requires_arrow_write_api
732
+ def test_write_no_crs(tmp_path, naturalearth_lowres):
733
+ meta, table = read_arrow(naturalearth_lowres)
734
+
735
+ filename = tmp_path / "test.shp"
736
+ with pytest.warns(UserWarning, match="'crs' was not provided"):
737
+ write_arrow(
738
+ table,
739
+ filename,
740
+ geometry_type=meta["geometry_type"],
741
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
742
+ )
743
+ # apart from CRS warning, it did write correctly
744
+ meta_result, result = read_arrow(filename)
745
+ assert table.equals(result)
746
+ assert meta_result["crs"] is None
747
+
748
+
749
+ @requires_arrow_write_api
750
+ def test_write_non_arrow_data(tmp_path):
751
+ data = np.array([1, 2, 3])
752
+ with pytest.raises(
753
+ ValueError, match="The provided data is not recognized as Arrow data"
754
+ ):
755
+ write_arrow(
756
+ data,
757
+ tmp_path / "test_no_arrow_data.shp",
758
+ crs="EPSG:4326",
759
+ geometry_type="Point",
760
+ geometry_name="geometry",
761
+ )
762
+
763
+
764
+ @pytest.mark.skipif(
765
+ Version(pa.__version__) < Version("16.0.0.dev0"),
766
+ reason="PyCapsule protocol only added to pyarrow.ChunkedArray in pyarrow 16",
767
+ )
768
+ @requires_arrow_write_api
769
+ def test_write_non_arrow_tabular_data(tmp_path):
770
+ data = pa.chunked_array([[1, 2, 3], [4, 5, 6]])
771
+ with pytest.raises(
772
+ DataLayerError,
773
+ match=".*should be called on a schema that is a struct of fields",
774
+ ):
775
+ write_arrow(
776
+ data,
777
+ tmp_path / "test_no_arrow_tabular_data.shp",
778
+ crs="EPSG:4326",
779
+ geometry_type="Point",
780
+ geometry_name="geometry",
781
+ )
782
+
783
+
784
+ @pytest.mark.filterwarnings("ignore:.*not handled natively:RuntimeWarning")
785
+ @requires_arrow_write_api
786
+ def test_write_batch_error_message(tmp_path):
787
+ # raise the correct error and message from GDAL when an error happens
788
+ # while writing
789
+
790
+ # invalid dictionary array that will only error while writing (schema
791
+ # itself is OK)
792
+ arr = pa.DictionaryArray.from_buffers(
793
+ pa.dictionary(pa.int64(), pa.string()),
794
+ length=3,
795
+ buffers=pa.array([0, 1, 2]).buffers(),
796
+ dictionary=pa.array(["a", "b"]),
797
+ )
798
+ table = pa.table({"geometry": points, "col": arr})
799
+
800
+ with pytest.raises(DataLayerError, match=".*invalid dictionary index"):
801
+ write_arrow(
802
+ table,
803
+ tmp_path / "test_unsupported_list_type.fgb",
804
+ crs="EPSG:4326",
805
+ geometry_type="Point",
806
+ geometry_name="geometry",
807
+ )
808
+
809
+
810
+ @requires_arrow_write_api
811
+ def test_write_schema_error_message(tmp_path):
812
+ # raise the correct error and message from GDAL when an error happens
813
+ # creating the fields from the schema
814
+ # (using complex list of map of integer->integer which is not supported by GDAL)
815
+ table = pa.table(
816
+ {
817
+ "geometry": points,
818
+ "col": pa.array(
819
+ [[[(1, 2), (3, 4)], None, [(5, 6)]]] * 3,
820
+ pa.list_(pa.map_(pa.int64(), pa.int64())),
821
+ ),
822
+ }
823
+ )
824
+
825
+ with pytest.raises(FieldError, match=".*not supported"):
826
+ write_arrow(
827
+ table,
828
+ tmp_path / "test_unsupported_map_type.shp",
829
+ crs="EPSG:4326",
830
+ geometry_type="Point",
831
+ geometry_name="geometry",
832
+ )
833
+
834
+
835
+ @requires_arrow_write_api
836
+ @pytest.mark.filterwarnings("ignore:File /vsimem:RuntimeWarning")
837
+ @pytest.mark.parametrize("driver", ["GeoJSON", "GPKG"])
838
+ def test_write_memory(naturalearth_lowres, driver):
839
+ meta, table = read_arrow(naturalearth_lowres, max_features=1)
840
+ meta["geometry_type"] = "MultiPolygon"
841
+
842
+ buffer = BytesIO()
843
+ write_arrow(
844
+ table,
845
+ buffer,
846
+ driver=driver,
847
+ layer="test",
848
+ crs=meta["crs"],
849
+ geometry_type=meta["geometry_type"],
850
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
851
+ )
852
+
853
+ assert len(buffer.getbuffer()) > 0
854
+ assert list_layers(buffer)[0][0] == "test"
855
+
856
+ actual_meta, actual_table = read_arrow(buffer)
857
+ assert len(actual_table) == len(table)
858
+ assert np.array_equal(actual_meta["fields"], meta["fields"])
859
+
860
+
861
+ @requires_arrow_write_api
862
+ def test_write_memory_driver_required(naturalearth_lowres):
863
+ meta, table = read_arrow(naturalearth_lowres, max_features=1)
864
+
865
+ buffer = BytesIO()
866
+ with pytest.raises(
867
+ ValueError,
868
+ match="driver must be provided to write to in-memory file",
869
+ ):
870
+ write_arrow(
871
+ table,
872
+ buffer,
873
+ driver=None,
874
+ layer="test",
875
+ crs=meta["crs"],
876
+ geometry_type=meta["geometry_type"],
877
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
878
+ )
879
+
880
+
881
+ @requires_arrow_write_api
882
+ @pytest.mark.parametrize("driver", ["ESRI Shapefile", "OpenFileGDB"])
883
+ def test_write_memory_unsupported_driver(naturalearth_lowres, driver):
884
+ if driver == "OpenFileGDB" and __gdal_version__ < (3, 6, 0):
885
+ pytest.skip("OpenFileGDB write support only available for GDAL >= 3.6.0")
886
+
887
+ meta, table = read_arrow(naturalearth_lowres, max_features=1)
888
+
889
+ buffer = BytesIO()
890
+
891
+ with pytest.raises(
892
+ ValueError, match=f"writing to in-memory file is not supported for {driver}"
893
+ ):
894
+ write_arrow(
895
+ table,
896
+ buffer,
897
+ driver=driver,
898
+ layer="test",
899
+ crs=meta["crs"],
900
+ geometry_type=meta["geometry_type"],
901
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
902
+ )
903
+
904
+
905
+ @requires_arrow_write_api
906
+ @pytest.mark.parametrize("driver", ["GeoJSON", "GPKG"])
907
+ def test_write_memory_append_unsupported(naturalearth_lowres, driver):
908
+ meta, table = read_arrow(naturalearth_lowres, max_features=1)
909
+ meta["geometry_type"] = "MultiPolygon"
910
+
911
+ buffer = BytesIO()
912
+ with pytest.raises(
913
+ NotImplementedError, match="append is not supported for in-memory files"
914
+ ):
915
+ write_arrow(
916
+ table,
917
+ buffer,
918
+ driver=driver,
919
+ layer="test",
920
+ crs=meta["crs"],
921
+ geometry_type=meta["geometry_type"],
922
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
923
+ append=True,
924
+ )
925
+
926
+
927
+ @requires_arrow_write_api
928
+ def test_write_memory_existing_unsupported(naturalearth_lowres):
929
+ meta, table = read_arrow(naturalearth_lowres, max_features=1)
930
+ meta["geometry_type"] = "MultiPolygon"
931
+
932
+ buffer = BytesIO(b"0000")
933
+ with pytest.raises(
934
+ NotImplementedError,
935
+ match="writing to existing in-memory object is not supported",
936
+ ):
937
+ write_arrow(
938
+ table,
939
+ buffer,
940
+ driver="GeoJSON",
941
+ layer="test",
942
+ crs=meta["crs"],
943
+ geometry_type=meta["geometry_type"],
944
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
945
+ )
946
+
947
+
948
+ @requires_arrow_write_api
949
+ def test_non_utf8_encoding_io_shapefile(tmp_path, encoded_text):
950
+ encoding, text = encoded_text
951
+
952
+ table = pa.table(
953
+ {
954
+ # Point(0, 0)
955
+ "geometry": pa.array(
956
+ [bytes.fromhex("010100000000000000000000000000000000000000")]
957
+ ),
958
+ text: pa.array([text]),
959
+ }
960
+ )
961
+
962
+ filename = tmp_path / "test.shp"
963
+ write_arrow(
964
+ table,
965
+ filename,
966
+ geometry_type="Point",
967
+ geometry_name="geometry",
968
+ crs="EPSG:4326",
969
+ encoding=encoding,
970
+ )
971
+
972
+ # NOTE: GDAL automatically creates a cpg file with the encoding name, which
973
+ # means that if we read this without specifying the encoding it uses the
974
+ # correct one
975
+ schema, table = read_arrow(filename)
976
+ assert schema["fields"][0] == text
977
+ assert table[text][0].as_py() == text
978
+
979
+ # verify that if cpg file is not present, that user-provided encoding must be used
980
+ filename.with_suffix(".cpg").unlink()
981
+
982
+ # We will assume ISO-8859-1, which is wrong
983
+ miscoded = text.encode(encoding).decode("ISO-8859-1")
984
+ bad_schema = read_arrow(filename)[0]
985
+ assert bad_schema["fields"][0] == miscoded
986
+ # table cannot be decoded to UTF-8 without UnicodeDecodeErrors
987
+
988
+ # If encoding is provided, that should yield correct text
989
+ schema, table = read_arrow(filename, encoding=encoding)
990
+ assert schema["fields"][0] == text
991
+ assert table[text][0].as_py() == text
992
+
993
+ # verify that setting encoding does not corrupt SHAPE_ENCODING option if set
994
+ # globally (it is ignored during read when encoding is specified by user)
995
+ try:
996
+ set_gdal_config_options({"SHAPE_ENCODING": "CP1254"})
997
+ _ = read_arrow(filename, encoding=encoding)
998
+ assert get_gdal_config_option("SHAPE_ENCODING") == "CP1254"
999
+
1000
+ finally:
1001
+ # reset to clear between tests
1002
+ set_gdal_config_options({"SHAPE_ENCODING": None})
1003
+
1004
+
1005
+ @requires_arrow_write_api
1006
+ def test_encoding_write_layer_option_collision_shapefile(tmp_path, naturalearth_lowres):
1007
+ """Providing both encoding parameter and ENCODING layer creation option (even if blank) is not allowed"""
1008
+
1009
+ meta, table = read_arrow(naturalearth_lowres)
1010
+
1011
+ with pytest.raises(
1012
+ ValueError,
1013
+ match='cannot provide both encoding parameter and "ENCODING" layer creation option',
1014
+ ):
1015
+ write_arrow(
1016
+ table,
1017
+ tmp_path / "test.shp",
1018
+ crs=meta["crs"],
1019
+ geometry_type="MultiPolygon",
1020
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
1021
+ encoding="CP936",
1022
+ layer_options={"ENCODING": ""},
1023
+ )
1024
+
1025
+
1026
+ @requires_arrow_write_api
1027
+ @pytest.mark.parametrize("ext", ["gpkg", "geojson"])
1028
+ def test_non_utf8_encoding_io_arrow_exception(tmp_path, naturalearth_lowres, ext):
1029
+ meta, table = read_arrow(naturalearth_lowres)
1030
+
1031
+ with pytest.raises(
1032
+ ValueError, match="non-UTF-8 encoding is not supported for Arrow"
1033
+ ):
1034
+ write_arrow(
1035
+ table,
1036
+ tmp_path / f"test.{ext}",
1037
+ crs=meta["crs"],
1038
+ geometry_type="MultiPolygon",
1039
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
1040
+ encoding="CP936",
1041
+ )