pyogrio 0.9.0__cp39-cp39-manylinux_2_28_aarch64.whl → 0.11.0__cp39-cp39-manylinux_2_28_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyogrio might be problematic. Click here for more details.

Files changed (86) hide show
  1. pyogrio/__init__.py +28 -21
  2. pyogrio/_compat.py +15 -1
  3. pyogrio/_env.py +4 -6
  4. pyogrio/_err.cpython-39-aarch64-linux-gnu.so +0 -0
  5. pyogrio/_geometry.cpython-39-aarch64-linux-gnu.so +0 -0
  6. pyogrio/_io.cpython-39-aarch64-linux-gnu.so +0 -0
  7. pyogrio/_ogr.cpython-39-aarch64-linux-gnu.so +0 -0
  8. pyogrio/_version.py +3 -3
  9. pyogrio/_vsi.cpython-39-aarch64-linux-gnu.so +0 -0
  10. pyogrio/core.py +86 -20
  11. pyogrio/errors.py +9 -16
  12. pyogrio/gdal_data/GDAL-targets-release.cmake +3 -3
  13. pyogrio/gdal_data/GDAL-targets.cmake +2 -2
  14. pyogrio/gdal_data/GDALConfig.cmake +0 -1
  15. pyogrio/gdal_data/GDALConfigVersion.cmake +3 -3
  16. pyogrio/gdal_data/MM_m_idofic.csv +321 -0
  17. pyogrio/gdal_data/gdalinfo_output.schema.json +3 -3
  18. pyogrio/gdal_data/gdaltileindex.xsd +253 -0
  19. pyogrio/gdal_data/gdalvrt.xsd +178 -63
  20. pyogrio/gdal_data/nitf_spec.xml +1 -17
  21. pyogrio/gdal_data/nitf_spec.xsd +1 -17
  22. pyogrio/gdal_data/ogrinfo_output.schema.json +23 -0
  23. pyogrio/gdal_data/ogrvrt.xsd +4 -17
  24. pyogrio/gdal_data/osmconf.ini +3 -1
  25. pyogrio/gdal_data/pci_datum.txt +222 -155
  26. pyogrio/gdal_data/pci_ellips.txt +90 -38
  27. pyogrio/gdal_data/pdfcomposition.xsd +1 -17
  28. pyogrio/gdal_data/vcpkg.spdx.json +32 -27
  29. pyogrio/gdal_data/vcpkg_abi_info.txt +30 -29
  30. pyogrio/gdal_data/vdv452.xml +1 -17
  31. pyogrio/gdal_data/vdv452.xsd +1 -17
  32. pyogrio/geopandas.py +122 -66
  33. pyogrio/proj_data/ITRF2014 +1 -1
  34. pyogrio/proj_data/ITRF2020 +91 -0
  35. pyogrio/proj_data/proj-config-version.cmake +2 -2
  36. pyogrio/proj_data/proj-config.cmake +1 -1
  37. pyogrio/proj_data/proj-targets.cmake +3 -3
  38. pyogrio/proj_data/proj.db +0 -0
  39. pyogrio/proj_data/proj.ini +11 -3
  40. pyogrio/proj_data/proj4-targets.cmake +3 -3
  41. pyogrio/proj_data/projjson.schema.json +1 -1
  42. pyogrio/proj_data/usage +7 -2
  43. pyogrio/proj_data/vcpkg.spdx.json +27 -22
  44. pyogrio/proj_data/vcpkg_abi_info.txt +17 -16
  45. pyogrio/raw.py +46 -30
  46. pyogrio/tests/conftest.py +214 -12
  47. pyogrio/tests/fixtures/README.md +32 -13
  48. pyogrio/tests/fixtures/curve.gpkg +0 -0
  49. pyogrio/tests/fixtures/{test_multisurface.gpkg → curvepolygon.gpkg} +0 -0
  50. pyogrio/tests/fixtures/line_zm.gpkg +0 -0
  51. pyogrio/tests/fixtures/multisurface.gpkg +0 -0
  52. pyogrio/tests/test_arrow.py +181 -24
  53. pyogrio/tests/test_core.py +170 -76
  54. pyogrio/tests/test_geopandas_io.py +483 -135
  55. pyogrio/tests/test_path.py +39 -17
  56. pyogrio/tests/test_raw_io.py +170 -55
  57. pyogrio/tests/test_util.py +56 -0
  58. pyogrio/util.py +69 -32
  59. pyogrio-0.11.0.dist-info/METADATA +124 -0
  60. {pyogrio-0.9.0.dist-info → pyogrio-0.11.0.dist-info}/RECORD +200 -214
  61. {pyogrio-0.9.0.dist-info → pyogrio-0.11.0.dist-info}/WHEEL +1 -1
  62. {pyogrio-0.9.0.dist-info → pyogrio-0.11.0.dist-info/licenses}/LICENSE +1 -1
  63. pyogrio.libs/{libgdal-6ff0914e.so.34.3.8.5 → libgdal-4bc0d15f.so.36.3.10.3} +0 -0
  64. pyogrio/_err.pxd +0 -4
  65. pyogrio/_err.pyx +0 -250
  66. pyogrio/_geometry.pxd +0 -4
  67. pyogrio/_geometry.pyx +0 -129
  68. pyogrio/_io.pxd +0 -0
  69. pyogrio/_io.pyx +0 -2742
  70. pyogrio/_ogr.pxd +0 -444
  71. pyogrio/_ogr.pyx +0 -346
  72. pyogrio/_vsi.pxd +0 -4
  73. pyogrio/_vsi.pyx +0 -140
  74. pyogrio/arrow_bridge.h +0 -115
  75. pyogrio/gdal_data/bag_template.xml +0 -201
  76. pyogrio/gdal_data/gmlasconf.xml +0 -169
  77. pyogrio/gdal_data/gmlasconf.xsd +0 -1066
  78. pyogrio/gdal_data/netcdf_config.xsd +0 -143
  79. pyogrio/tests/fixtures/poly_not_enough_points.shp.zip +0 -0
  80. pyogrio/tests/fixtures/test_datetime.geojson +0 -7
  81. pyogrio/tests/fixtures/test_datetime_tz.geojson +0 -8
  82. pyogrio/tests/fixtures/test_fgdb.gdb.zip +0 -0
  83. pyogrio/tests/fixtures/test_nested.geojson +0 -18
  84. pyogrio/tests/fixtures/test_ogr_types_list.geojson +0 -12
  85. pyogrio-0.9.0.dist-info/METADATA +0 -100
  86. {pyogrio-0.9.0.dist-info → pyogrio-0.11.0.dist-info}/top_level.txt +0 -0
@@ -1,14 +1,33 @@
1
1
  import contextlib
2
+ import locale
3
+ import warnings
2
4
  from datetime import datetime
3
5
  from io import BytesIO
4
- import locale
6
+ from zipfile import ZipFile
5
7
 
6
8
  import numpy as np
7
- import pytest
8
9
 
9
- from pyogrio import list_layers, list_drivers, read_info, __gdal_version__
10
+ from pyogrio import (
11
+ __gdal_version__,
12
+ list_drivers,
13
+ list_layers,
14
+ read_info,
15
+ set_gdal_config_options,
16
+ vsi_listtree,
17
+ vsi_unlink,
18
+ )
19
+ from pyogrio._compat import (
20
+ GDAL_GE_37,
21
+ GDAL_GE_311,
22
+ GDAL_GE_352,
23
+ HAS_ARROW_WRITE_API,
24
+ HAS_PYPROJ,
25
+ PANDAS_GE_15,
26
+ PANDAS_GE_30,
27
+ SHAPELY_GE_21,
28
+ )
10
29
  from pyogrio.errors import DataLayerError, DataSourceError, FeatureError, GeometryError
11
- from pyogrio.geopandas import read_dataframe, write_dataframe, PANDAS_GE_20
30
+ from pyogrio.geopandas import PANDAS_GE_20, read_dataframe, write_dataframe
12
31
  from pyogrio.raw import (
13
32
  DRIVERS_NO_MIXED_DIMENSIONS,
14
33
  DRIVERS_NO_MIXED_SINGLE_MULTI,
@@ -16,27 +35,29 @@ from pyogrio.raw import (
16
35
  from pyogrio.tests.conftest import (
17
36
  ALL_EXTS,
18
37
  DRIVERS,
19
- requires_pyarrow_api,
38
+ START_FID,
20
39
  requires_arrow_write_api,
21
40
  requires_gdal_geos,
41
+ requires_pyarrow_api,
42
+ requires_pyproj,
22
43
  )
23
- from pyogrio._compat import PANDAS_GE_15, HAS_ARROW_WRITE_API
24
44
 
25
- try:
26
- import pandas as pd
27
- from pandas.testing import (
28
- assert_frame_equal,
29
- assert_index_equal,
30
- assert_series_equal,
31
- )
45
+ import pytest
32
46
 
47
+ try:
33
48
  import geopandas as gp
49
+ import pandas as pd
34
50
  from geopandas.array import from_wkt
35
- from geopandas.testing import assert_geodataframe_equal
36
51
 
37
52
  import shapely # if geopandas is present, shapely is expected to be present
38
53
  from shapely.geometry import Point
39
54
 
55
+ from geopandas.testing import assert_geodataframe_equal
56
+ from pandas.testing import (
57
+ assert_index_equal,
58
+ assert_series_equal,
59
+ )
60
+
40
61
  except ImportError:
41
62
  pass
42
63
 
@@ -82,8 +103,20 @@ def spatialite_available(path):
82
103
  return False
83
104
 
84
105
 
85
- @pytest.mark.parametrize("encoding", ["utf-8", "cp1252", None])
86
- def test_read_csv_encoding(tmp_path, encoding):
106
+ @pytest.mark.parametrize(
107
+ "encoding, arrow",
108
+ [
109
+ ("utf-8", False),
110
+ pytest.param("utf-8", True, marks=requires_pyarrow_api),
111
+ ("cp1252", False),
112
+ (None, False),
113
+ ],
114
+ )
115
+ def test_read_csv_encoding(tmp_path, encoding, arrow):
116
+ """ "Test reading CSV files with different encodings.
117
+
118
+ Arrow only supports utf-8 encoding.
119
+ """
87
120
  # Write csv test file. Depending on the os this will be written in a different
88
121
  # encoding: for linux and macos this is utf-8, for windows it is cp1252.
89
122
  csv_path = tmp_path / "test.csv"
@@ -94,7 +127,7 @@ def test_read_csv_encoding(tmp_path, encoding):
94
127
  # Read csv. The data should be read with the same default encoding as the csv file
95
128
  # was written in, but should have been converted to utf-8 in the dataframe returned.
96
129
  # Hence, the asserts below, with strings in utf-8, be OK.
97
- df = read_dataframe(csv_path, encoding=encoding)
130
+ df = read_dataframe(csv_path, encoding=encoding, use_arrow=arrow)
98
131
 
99
132
  assert len(df) == 1
100
133
  assert df.columns.tolist() == ["näme", "city"]
@@ -106,25 +139,36 @@ def test_read_csv_encoding(tmp_path, encoding):
106
139
  locale.getpreferredencoding().upper() == "UTF-8",
107
140
  reason="test requires non-UTF-8 default platform",
108
141
  )
109
- def test_read_csv_platform_encoding(tmp_path):
110
- """verify that read defaults to platform encoding; only works on Windows (CP1252)"""
142
+ def test_read_csv_platform_encoding(tmp_path, use_arrow):
143
+ """Verify that read defaults to platform encoding; only works on Windows (CP1252).
144
+
145
+ When use_arrow=True, reading an non-UTF8 fails.
146
+ """
111
147
  csv_path = tmp_path / "test.csv"
112
148
  with open(csv_path, "w", encoding=locale.getpreferredencoding()) as csv:
113
149
  csv.write("näme,city\n")
114
150
  csv.write("Wilhelm Röntgen,Zürich\n")
115
151
 
116
- df = read_dataframe(csv_path)
152
+ if use_arrow:
153
+ with pytest.raises(
154
+ DataSourceError,
155
+ match="; please use_arrow=False",
156
+ ):
157
+ df = read_dataframe(csv_path, use_arrow=use_arrow)
158
+ else:
159
+ df = read_dataframe(csv_path, use_arrow=use_arrow)
117
160
 
118
- assert len(df) == 1
119
- assert df.columns.tolist() == ["näme", "city"]
120
- assert df.city.tolist() == ["Zürich"]
121
- assert df.näme.tolist() == ["Wilhelm Röntgen"]
161
+ assert len(df) == 1
162
+ assert df.columns.tolist() == ["näme", "city"]
163
+ assert df.city.tolist() == ["Zürich"]
164
+ assert df.näme.tolist() == ["Wilhelm Röntgen"]
122
165
 
123
166
 
124
167
  def test_read_dataframe(naturalearth_lowres_all_ext):
125
168
  df = read_dataframe(naturalearth_lowres_all_ext)
126
169
 
127
- assert df.crs == "EPSG:4326"
170
+ if HAS_PYPROJ:
171
+ assert df.crs == "EPSG:4326"
128
172
  assert len(df) == 177
129
173
  assert df.columns.tolist() == [
130
174
  "pop_est",
@@ -142,14 +186,13 @@ def test_read_dataframe_vsi(naturalearth_lowres_vsi, use_arrow):
142
186
 
143
187
 
144
188
  @pytest.mark.parametrize(
145
- "columns, fid_as_index, exp_len", [(None, False, 2), ([], True, 2), ([], False, 0)]
189
+ "columns, fid_as_index, exp_len", [(None, False, 3), ([], True, 3), ([], False, 0)]
146
190
  )
147
191
  def test_read_layer_without_geometry(
148
- test_fgdb_vsi, columns, fid_as_index, use_arrow, exp_len
192
+ no_geometry_file, columns, fid_as_index, use_arrow, exp_len
149
193
  ):
150
194
  result = read_dataframe(
151
- test_fgdb_vsi,
152
- layer="basetable",
195
+ no_geometry_file,
153
196
  columns=columns,
154
197
  fid_as_index=fid_as_index,
155
198
  use_arrow=use_arrow,
@@ -195,38 +238,85 @@ def test_read_no_geometry_no_columns_no_fids(naturalearth_lowres, use_arrow):
195
238
  )
196
239
 
197
240
 
198
- def test_read_force_2d(test_fgdb_vsi, use_arrow):
199
- with pytest.warns(
200
- UserWarning, match=r"Measured \(M\) geometry types are not supported"
201
- ):
202
- df = read_dataframe(test_fgdb_vsi, layer="test_lines", max_features=1)
203
- assert df.iloc[0].geometry.has_z
241
+ def test_read_force_2d(tmp_path, use_arrow):
242
+ filename = tmp_path / "test.gpkg"
243
+
244
+ # create a GPKG with 3D point values
245
+ expected = gp.GeoDataFrame(
246
+ geometry=[Point(0, 0, 0), Point(1, 1, 0)], crs="EPSG:4326"
247
+ )
248
+ write_dataframe(expected, filename)
249
+
250
+ df = read_dataframe(filename)
251
+ assert df.iloc[0].geometry.has_z
252
+
253
+ df = read_dataframe(
254
+ filename,
255
+ force_2d=True,
256
+ max_features=1,
257
+ use_arrow=use_arrow,
258
+ )
259
+ assert not df.iloc[0].geometry.has_z
260
+
261
+
262
+ @pytest.mark.skipif(
263
+ not GDAL_GE_352,
264
+ reason="gdal >= 3.5.2 needed to use OGR_GEOJSON_MAX_OBJ_SIZE with a float value",
265
+ )
266
+ def test_read_geojson_error(naturalearth_lowres_geojson, use_arrow):
267
+ try:
268
+ set_gdal_config_options({"OGR_GEOJSON_MAX_OBJ_SIZE": 0.01})
269
+ with pytest.raises(
270
+ DataSourceError,
271
+ match="Failed to read GeoJSON data; .* GeoJSON object too complex",
272
+ ):
273
+ read_dataframe(naturalearth_lowres_geojson, use_arrow=use_arrow)
274
+ finally:
275
+ set_gdal_config_options({"OGR_GEOJSON_MAX_OBJ_SIZE": None})
204
276
 
205
- df = read_dataframe(
206
- test_fgdb_vsi,
207
- layer="test_lines",
208
- force_2d=True,
209
- max_features=1,
210
- use_arrow=use_arrow,
211
- )
212
- assert not df.iloc[0].geometry.has_z
213
277
 
278
+ def test_read_layer(tmp_path, use_arrow):
279
+ filename = tmp_path / "test.gpkg"
214
280
 
215
- @pytest.mark.filterwarnings("ignore: Measured")
216
- @pytest.mark.filterwarnings("ignore: More than one layer found in")
217
- def test_read_layer(test_fgdb_vsi, use_arrow):
218
- layers = list_layers(test_fgdb_vsi)
219
- kwargs = {"use_arrow": use_arrow, "read_geometry": False, "max_features": 1}
281
+ # create a multilayer GPKG
282
+ expected1 = gp.GeoDataFrame(geometry=[Point(0, 0)], crs="EPSG:4326")
283
+ if use_arrow:
284
+ # TODO this needs to be fixed on the geopandas side (to ensure the
285
+ # GeoDataFrame() constructor does this), when use_arrow we already
286
+ # get columns Index with string dtype
287
+ expected1.columns = expected1.columns.astype("str")
288
+ write_dataframe(
289
+ expected1,
290
+ filename,
291
+ layer="layer1",
292
+ )
220
293
 
221
- # The first layer is read by default (NOTE: first layer has no features)
222
- df = read_dataframe(test_fgdb_vsi, **kwargs)
223
- df2 = read_dataframe(test_fgdb_vsi, layer=layers[0][0], **kwargs)
224
- assert_frame_equal(df, df2)
294
+ expected2 = gp.GeoDataFrame(geometry=[Point(1, 1)], crs="EPSG:4326")
295
+ if use_arrow:
296
+ expected2.columns = expected2.columns.astype("str")
297
+ write_dataframe(expected2, filename, layer="layer2", append=True)
225
298
 
226
- # Reading a specific layer should return that layer.
299
+ assert np.array_equal(
300
+ list_layers(filename), [["layer1", "Point"], ["layer2", "Point"]]
301
+ )
302
+
303
+ kwargs = {"use_arrow": use_arrow, "max_features": 1}
304
+
305
+ # The first layer is read by default, which will warn when there are multiple
306
+ # layers
307
+ with pytest.warns(UserWarning, match="More than one layer found"):
308
+ df = read_dataframe(filename, **kwargs)
309
+
310
+ assert_geodataframe_equal(df, expected1)
311
+
312
+ # Reading a specific layer by name should return that layer.
227
313
  # Detected here by a known column.
228
- df = read_dataframe(test_fgdb_vsi, layer="test_lines", **kwargs)
229
- assert "RIVER_MILE" in df.columns
314
+ df = read_dataframe(filename, layer="layer2", **kwargs)
315
+ assert_geodataframe_equal(df, expected2)
316
+
317
+ # Reading a specific layer by index should return that layer
318
+ df = read_dataframe(filename, layer=1, **kwargs)
319
+ assert_geodataframe_equal(df, expected2)
230
320
 
231
321
 
232
322
  def test_read_layer_invalid(naturalearth_lowres_all_ext, use_arrow):
@@ -234,22 +324,19 @@ def test_read_layer_invalid(naturalearth_lowres_all_ext, use_arrow):
234
324
  read_dataframe(naturalearth_lowres_all_ext, layer="wrong", use_arrow=use_arrow)
235
325
 
236
326
 
237
- @pytest.mark.filterwarnings("ignore: Measured")
238
- def test_read_datetime(test_fgdb_vsi, use_arrow):
239
- df = read_dataframe(
240
- test_fgdb_vsi, layer="test_lines", use_arrow=use_arrow, max_features=1
241
- )
327
+ def test_read_datetime(datetime_file, use_arrow):
328
+ df = read_dataframe(datetime_file, use_arrow=use_arrow)
242
329
  if PANDAS_GE_20:
243
330
  # starting with pandas 2.0, it preserves the passed datetime resolution
244
- assert df.SURVEY_DAT.dtype.name == "datetime64[ms]"
331
+ assert df.col.dtype.name == "datetime64[ms]"
245
332
  else:
246
- assert df.SURVEY_DAT.dtype.name == "datetime64[ns]"
333
+ assert df.col.dtype.name == "datetime64[ns]"
247
334
 
248
335
 
249
336
  @pytest.mark.filterwarnings("ignore: Non-conformant content for record 1 in column ")
250
337
  @pytest.mark.requires_arrow_write_api
251
- def test_read_datetime_tz(test_datetime_tz, tmp_path, use_arrow):
252
- df = read_dataframe(test_datetime_tz)
338
+ def test_read_datetime_tz(datetime_tz_file, tmp_path, use_arrow):
339
+ df = read_dataframe(datetime_tz_file)
253
340
  # Make the index non-consecutive to test this case as well. Added for issue
254
341
  # https://github.com/geopandas/pyogrio/issues/324
255
342
  df = df.set_index(np.array([0, 2]))
@@ -319,14 +406,17 @@ def test_read_write_datetime_tz_with_nulls(tmp_path, use_arrow):
319
406
  assert_geodataframe_equal(df, result)
320
407
 
321
408
 
322
- def test_read_null_values(test_fgdb_vsi, use_arrow):
323
- df = read_dataframe(
324
- test_fgdb_vsi, layer="basetable_2", use_arrow=use_arrow, read_geometry=False
325
- )
409
+ def test_read_null_values(tmp_path, use_arrow):
410
+ filename = tmp_path / "test_null_values_no_geometry.gpkg"
411
+
412
+ # create a GPKG with no geometries and only null values
413
+ expected = pd.DataFrame({"col": [None, None]})
414
+ write_dataframe(expected, filename)
415
+
416
+ df = read_dataframe(filename, use_arrow=use_arrow, read_geometry=False)
326
417
 
327
418
  # make sure that Null values are preserved
328
- assert df.SEGMENT_NAME.isnull().max()
329
- assert df.loc[df.SEGMENT_NAME.isnull()].SEGMENT_NAME.iloc[0] is None
419
+ assert df["col"].isna().all()
330
420
 
331
421
 
332
422
  def test_read_fid_as_index(naturalearth_lowres_all_ext, use_arrow):
@@ -344,12 +434,9 @@ def test_read_fid_as_index(naturalearth_lowres_all_ext, use_arrow):
344
434
  fid_as_index=True,
345
435
  **kwargs,
346
436
  )
347
- if naturalearth_lowres_all_ext.suffix in [".gpkg"]:
348
- # File format where fid starts at 1
349
- assert_index_equal(df.index, pd.Index([3, 4], name="fid"))
350
- else:
351
- # File format where fid starts at 0
352
- assert_index_equal(df.index, pd.Index([2, 3], name="fid"))
437
+ fids_expected = pd.Index([2, 3], name="fid")
438
+ fids_expected += START_FID[naturalearth_lowres_all_ext.suffix]
439
+ assert_index_equal(df.index, fids_expected)
353
440
 
354
441
 
355
442
  def test_read_fid_as_index_only(naturalearth_lowres, use_arrow):
@@ -406,10 +493,17 @@ def test_read_where_invalid(request, naturalearth_lowres_all_ext, use_arrow):
406
493
  if use_arrow and naturalearth_lowres_all_ext.suffix == ".gpkg":
407
494
  # https://github.com/OSGeo/gdal/issues/8492
408
495
  request.node.add_marker(pytest.mark.xfail(reason="GDAL doesn't error for GPGK"))
409
- with pytest.raises(ValueError, match="Invalid SQL"):
410
- read_dataframe(
411
- naturalearth_lowres_all_ext, use_arrow=use_arrow, where="invalid"
412
- )
496
+
497
+ if naturalearth_lowres_all_ext.suffix == ".gpkg" and __gdal_version__ >= (3, 11, 0):
498
+ with pytest.raises(DataLayerError, match="no such column"):
499
+ read_dataframe(
500
+ naturalearth_lowres_all_ext, use_arrow=use_arrow, where="invalid"
501
+ )
502
+ else:
503
+ with pytest.raises(ValueError, match="Invalid SQL"):
504
+ read_dataframe(
505
+ naturalearth_lowres_all_ext, use_arrow=use_arrow, where="invalid"
506
+ )
413
507
 
414
508
 
415
509
  def test_read_where_ignored_field(naturalearth_lowres, use_arrow):
@@ -605,17 +699,22 @@ def test_read_fids_arrow_warning_old_gdal(naturalearth_lowres_all_ext):
605
699
  assert len(df) == 1
606
700
 
607
701
 
608
- def test_read_fids_force_2d(test_fgdb_vsi):
609
- with pytest.warns(
610
- UserWarning, match=r"Measured \(M\) geometry types are not supported"
611
- ):
612
- df = read_dataframe(test_fgdb_vsi, layer="test_lines", fids=[22])
613
- assert len(df) == 1
614
- assert df.iloc[0].geometry.has_z
702
+ def test_read_fids_force_2d(tmp_path):
703
+ filename = tmp_path / "test.gpkg"
615
704
 
616
- df = read_dataframe(test_fgdb_vsi, layer="test_lines", force_2d=True, fids=[22])
617
- assert len(df) == 1
618
- assert not df.iloc[0].geometry.has_z
705
+ # create a GPKG with 3D point values
706
+ expected = gp.GeoDataFrame(
707
+ geometry=[Point(0, 0, 0), Point(1, 1, 0)], crs="EPSG:4326"
708
+ )
709
+ write_dataframe(expected, filename)
710
+
711
+ df = read_dataframe(filename, fids=[1])
712
+ assert_geodataframe_equal(df, expected.iloc[:1])
713
+
714
+ df = read_dataframe(filename, force_2d=True, fids=[1])
715
+ assert np.array_equal(
716
+ df.geometry.values, shapely.force_2d(expected.iloc[:1].geometry.values)
717
+ )
619
718
 
620
719
 
621
720
  @pytest.mark.parametrize("skip_features", [10, 200])
@@ -638,6 +737,13 @@ def test_read_skip_features(naturalearth_lowres_all_ext, use_arrow, skip_feature
638
737
  # In .geojsonl the vertices are reordered, so normalize
639
738
  is_jsons = ext == ".geojsonl"
640
739
 
740
+ if skip_features == 200 and not use_arrow:
741
+ # result is an empty dataframe, so no proper dtype inference happens
742
+ # for the numpy object dtype arrays
743
+ df[["continent", "name", "iso_a3"]] = df[
744
+ ["continent", "name", "iso_a3"]
745
+ ].astype("str")
746
+
641
747
  assert_geodataframe_equal(
642
748
  df,
643
749
  expected,
@@ -769,7 +875,7 @@ def test_read_sql_invalid(naturalearth_lowres_all_ext, use_arrow):
769
875
  )
770
876
 
771
877
  with pytest.raises(
772
- ValueError, match="'sql' paramater cannot be combined with 'layer'"
878
+ ValueError, match="'sql' parameter cannot be combined with 'layer'"
773
879
  ):
774
880
  read_dataframe(
775
881
  naturalearth_lowres_all_ext,
@@ -906,9 +1012,20 @@ def test_read_sql_dialect_sqlite_gpkg(naturalearth_lowres, use_arrow):
906
1012
  assert df.iloc[0].geometry.area > area_canada
907
1013
 
908
1014
 
909
- @pytest.mark.parametrize("encoding", ["utf-8", "cp1252", None])
910
- def test_write_csv_encoding(tmp_path, encoding):
911
- """Test if write_dataframe uses the default encoding correctly."""
1015
+ @pytest.mark.parametrize(
1016
+ "encoding, arrow",
1017
+ [
1018
+ ("utf-8", False),
1019
+ pytest.param("utf-8", True, marks=requires_arrow_write_api),
1020
+ ("cp1252", False),
1021
+ (None, False),
1022
+ ],
1023
+ )
1024
+ def test_write_csv_encoding(tmp_path, encoding, arrow):
1025
+ """Test if write_dataframe uses the default encoding correctly.
1026
+
1027
+ Arrow only supports utf-8 encoding.
1028
+ """
912
1029
  # Write csv test file. Depending on the os this will be written in a different
913
1030
  # encoding: for linux and macos this is utf-8, for windows it is cp1252.
914
1031
  csv_path = tmp_path / "test.csv"
@@ -921,12 +1038,12 @@ def test_write_csv_encoding(tmp_path, encoding):
921
1038
  # same encoding as above.
922
1039
  df = pd.DataFrame({"näme": ["Wilhelm Röntgen"], "city": ["Zürich"]})
923
1040
  csv_pyogrio_path = tmp_path / "test_pyogrio.csv"
924
- write_dataframe(df, csv_pyogrio_path, encoding=encoding)
1041
+ write_dataframe(df, csv_pyogrio_path, encoding=encoding, use_arrow=arrow)
925
1042
 
926
1043
  # Check if the text files written both ways can be read again and give same result.
927
- with open(csv_path, "r", encoding=encoding) as csv:
1044
+ with open(csv_path, encoding=encoding) as csv:
928
1045
  csv_str = csv.read()
929
- with open(csv_pyogrio_path, "r", encoding=encoding) as csv_pyogrio:
1046
+ with open(csv_pyogrio_path, encoding=encoding) as csv_pyogrio:
930
1047
  csv_pyogrio_str = csv_pyogrio.read()
931
1048
  assert csv_str == csv_pyogrio_str
932
1049
 
@@ -939,6 +1056,48 @@ def test_write_csv_encoding(tmp_path, encoding):
939
1056
  assert csv_bytes == csv_pyogrio_bytes
940
1057
 
941
1058
 
1059
+ @pytest.mark.parametrize(
1060
+ "ext, fid_column, fid_param_value",
1061
+ [
1062
+ (".gpkg", "fid", None),
1063
+ (".gpkg", "FID", None),
1064
+ (".sqlite", "ogc_fid", None),
1065
+ (".gpkg", "fid_custom", "fid_custom"),
1066
+ (".gpkg", "FID_custom", "fid_custom"),
1067
+ (".sqlite", "ogc_fid_custom", "ogc_fid_custom"),
1068
+ ],
1069
+ )
1070
+ @pytest.mark.requires_arrow_write_api
1071
+ def test_write_custom_fids(tmp_path, ext, fid_column, fid_param_value, use_arrow):
1072
+ """Test to specify FIDs to save when writing to a file.
1073
+
1074
+ Saving custom FIDs is only supported for formats that actually store the FID, like
1075
+ e.g. GPKG and SQLite. The fid_column name check is case-insensitive.
1076
+
1077
+ Typically, GDAL supports using a custom FID column for these file formats via a
1078
+ `FID` layer creation option, which is also tested here. If `fid_param_value` is
1079
+ specified (not None), an `fid` parameter is passed to `write_dataframe`, causing
1080
+ GDAL to use the column name specified for the FID.
1081
+ """
1082
+ input_gdf = gp.GeoDataFrame(
1083
+ {fid_column: [5]}, geometry=[shapely.Point(0, 0)], crs="epsg:4326"
1084
+ )
1085
+ kwargs = {}
1086
+ if fid_param_value is not None:
1087
+ kwargs["fid"] = fid_param_value
1088
+ path = tmp_path / f"test{ext}"
1089
+
1090
+ write_dataframe(input_gdf, path, use_arrow=use_arrow, **kwargs)
1091
+
1092
+ assert path.exists()
1093
+ output_gdf = read_dataframe(path, fid_as_index=True, use_arrow=use_arrow)
1094
+ output_gdf = output_gdf.reset_index()
1095
+
1096
+ # pyogrio always sets "fid" as index name with `fid_as_index`
1097
+ expected_gdf = input_gdf.rename(columns={fid_column: "fid"})
1098
+ assert_geodataframe_equal(output_gdf, expected_gdf)
1099
+
1100
+
942
1101
  @pytest.mark.parametrize("ext", ALL_EXTS)
943
1102
  @pytest.mark.requires_arrow_write_api
944
1103
  def test_write_dataframe(tmp_path, naturalearth_lowres, ext, use_arrow):
@@ -960,7 +1119,7 @@ def test_write_dataframe(tmp_path, naturalearth_lowres, ext, use_arrow):
960
1119
  if DRIVERS[ext] in DRIVERS_NO_MIXED_SINGLE_MULTI:
961
1120
  assert list(geometry_types) == ["MultiPolygon"]
962
1121
  else:
963
- assert set(geometry_types) == set(["MultiPolygon", "Polygon"])
1122
+ assert set(geometry_types) == {"MultiPolygon", "Polygon"}
964
1123
 
965
1124
  # Coordinates are not precisely equal when written to JSON
966
1125
  # dtypes do not necessarily round-trip precisely through JSON
@@ -1050,18 +1209,79 @@ def test_write_dataframe_index(tmp_path, naturalearth_lowres, use_arrow):
1050
1209
 
1051
1210
 
1052
1211
  @pytest.mark.parametrize("ext", [ext for ext in ALL_EXTS if ext not in ".geojsonl"])
1212
+ @pytest.mark.parametrize(
1213
+ "columns, dtype",
1214
+ [
1215
+ ([], None),
1216
+ (["col_int"], np.int64),
1217
+ (["col_float"], np.float64),
1218
+ (["col_object"], object),
1219
+ ],
1220
+ )
1053
1221
  @pytest.mark.requires_arrow_write_api
1054
- def test_write_empty_dataframe(tmp_path, ext, use_arrow):
1055
- expected = gp.GeoDataFrame(geometry=[], crs=4326)
1222
+ def test_write_empty_dataframe(tmp_path, ext, columns, dtype, use_arrow):
1223
+ """Test writing dataframe with no rows.
1056
1224
 
1225
+ With use_arrow, object type columns with no rows are converted to null type columns
1226
+ by pyarrow, but null columns are not supported by GDAL. Added to test fix for #513.
1227
+ """
1228
+ expected = gp.GeoDataFrame(geometry=[], columns=columns, dtype=dtype, crs=4326)
1057
1229
  filename = tmp_path / f"test{ext}"
1058
1230
  write_dataframe(expected, filename, use_arrow=use_arrow)
1059
1231
 
1060
1232
  assert filename.exists()
1233
+ df = read_dataframe(filename, use_arrow=use_arrow)
1234
+
1235
+ # Check result
1236
+ # For older pandas versions, the index is created as Object dtype but read as
1237
+ # RangeIndex, so don't check the index dtype in that case.
1238
+ check_index_type = True if PANDAS_GE_20 else False
1239
+ # with pandas 3+ and reading through arrow, we preserve the string dtype
1240
+ # (no proper dtype inference happens for the empty numpy object dtype arrays)
1241
+ if use_arrow and dtype is object:
1242
+ expected["col_object"] = expected["col_object"].astype("str")
1243
+ assert_geodataframe_equal(df, expected, check_index_type=check_index_type)
1244
+
1245
+
1246
+ def test_write_empty_geometry(tmp_path):
1247
+ expected = gp.GeoDataFrame({"x": [0]}, geometry=from_wkt(["POINT EMPTY"]), crs=4326)
1248
+ filename = tmp_path / "test.gpkg"
1249
+
1250
+ # Check that no warning is raised with GeoSeries.notna()
1251
+ with warnings.catch_warnings():
1252
+ warnings.simplefilter("error", UserWarning)
1253
+ if not HAS_PYPROJ:
1254
+ warnings.filterwarnings("ignore", message="'crs' was not provided.")
1255
+ write_dataframe(expected, filename)
1256
+ assert filename.exists()
1257
+
1258
+ # Xref GH-436: round-tripping possible with GPKG but not others
1061
1259
  df = read_dataframe(filename)
1062
1260
  assert_geodataframe_equal(df, expected)
1063
1261
 
1064
1262
 
1263
+ @pytest.mark.requires_arrow_write_api
1264
+ def test_write_None_string_column(tmp_path, use_arrow):
1265
+ """Test pandas object columns with all None values.
1266
+
1267
+ With use_arrow, such columns are converted to null type columns by pyarrow, but null
1268
+ columns are not supported by GDAL. Added to test fix for #513.
1269
+ """
1270
+ gdf = gp.GeoDataFrame({"object_col": [None]}, geometry=[Point(0, 0)], crs=4326)
1271
+ filename = tmp_path / "test.gpkg"
1272
+
1273
+ write_dataframe(gdf, filename, use_arrow=use_arrow)
1274
+ assert filename.exists()
1275
+
1276
+ result_gdf = read_dataframe(filename, use_arrow=use_arrow)
1277
+ if PANDAS_GE_30 and use_arrow:
1278
+ assert result_gdf.object_col.dtype == "str"
1279
+ gdf["object_col"] = gdf["object_col"].astype("str")
1280
+ else:
1281
+ assert result_gdf.object_col.dtype == object
1282
+ assert_geodataframe_equal(result_gdf, gdf)
1283
+
1284
+
1065
1285
  @pytest.mark.parametrize("ext", [".geojsonl", ".geojsons"])
1066
1286
  @pytest.mark.requires_arrow_write_api
1067
1287
  def test_write_read_empty_dataframe_unsupported(tmp_path, ext, use_arrow):
@@ -1161,7 +1381,7 @@ def test_write_dataframe_gdal_options(
1161
1381
  df,
1162
1382
  outfilename2,
1163
1383
  use_arrow=use_arrow,
1164
- layer_options=dict(spatial_index=spatial_index),
1384
+ layer_options={"spatial_index": spatial_index},
1165
1385
  )
1166
1386
  assert outfilename2.exists() is True
1167
1387
  index_filename2 = tmp_path / "test2.qix"
@@ -1207,7 +1427,7 @@ def test_write_dataframe_gdal_options_dataset(tmp_path, naturalearth_lowres, use
1207
1427
  df,
1208
1428
  test_no_contents_filename2,
1209
1429
  use_arrow=use_arrow,
1210
- dataset_options=dict(add_gpkg_ogr_contents=False),
1430
+ dataset_options={"add_gpkg_ogr_contents": False},
1211
1431
  )
1212
1432
  assert "gpkg_ogr_contents" not in _get_gpkg_table_names(test_no_contents_filename2)
1213
1433
 
@@ -1320,7 +1540,8 @@ def test_write_dataframe_promote_to_multi_layer_geom_type(
1320
1540
  ".shp",
1321
1541
  None,
1322
1542
  "Point",
1323
- "Could not add feature to layer at index|Error while writing batch to OGR layer",
1543
+ "Could not add feature to layer at index|Error while writing batch to OGR "
1544
+ "layer",
1324
1545
  ),
1325
1546
  ],
1326
1547
  )
@@ -1443,6 +1664,7 @@ def test_write_dataframe_infer_geometry_with_nulls(tmp_path, geoms, ext, use_arr
1443
1664
  "ignore: You will likely lose important projection information"
1444
1665
  )
1445
1666
  @pytest.mark.requires_arrow_write_api
1667
+ @requires_pyproj
1446
1668
  def test_custom_crs_io(tmp_path, naturalearth_lowres_all_ext, use_arrow):
1447
1669
  df = read_dataframe(naturalearth_lowres_all_ext)
1448
1670
  # project Belgium to a custom Albers Equal Area projection
@@ -1465,6 +1687,30 @@ def test_custom_crs_io(tmp_path, naturalearth_lowres_all_ext, use_arrow):
1465
1687
  assert df.crs.equals(expected.crs)
1466
1688
 
1467
1689
 
1690
+ @pytest.mark.parametrize("ext", [".gpkg.zip", ".shp.zip", ".shz"])
1691
+ @pytest.mark.requires_arrow_write_api
1692
+ def test_write_read_zipped_ext(tmp_path, naturalearth_lowres, ext, use_arrow):
1693
+ """Run a basic read and write test on some extra (zipped) extensions."""
1694
+ if ext == ".gpkg.zip" and not GDAL_GE_37:
1695
+ pytest.skip(".gpkg.zip support requires GDAL >= 3.7")
1696
+
1697
+ input_gdf = read_dataframe(naturalearth_lowres)
1698
+ output_path = tmp_path / f"test{ext}"
1699
+
1700
+ write_dataframe(input_gdf, output_path, use_arrow=use_arrow)
1701
+
1702
+ assert output_path.exists()
1703
+ result_gdf = read_dataframe(output_path)
1704
+
1705
+ geometry_types = result_gdf.geometry.type.unique()
1706
+ if DRIVERS[ext] in DRIVERS_NO_MIXED_SINGLE_MULTI:
1707
+ assert list(geometry_types) == ["MultiPolygon"]
1708
+ else:
1709
+ assert set(geometry_types) == {"MultiPolygon", "Polygon"}
1710
+
1711
+ assert_geodataframe_equal(result_gdf, input_gdf, check_index_type=False)
1712
+
1713
+
1468
1714
  def test_write_read_mixed_column_values(tmp_path):
1469
1715
  # use_arrow=True is tested separately below
1470
1716
  mixed_values = ["test", 1.0, 1, datetime.now(), None, np.nan]
@@ -1476,11 +1722,13 @@ def test_write_read_mixed_column_values(tmp_path):
1476
1722
  write_dataframe(test_gdf, output_path)
1477
1723
  output_gdf = read_dataframe(output_path)
1478
1724
  assert len(test_gdf) == len(output_gdf)
1479
- for idx, value in enumerate(mixed_values):
1480
- if value in (None, np.nan):
1481
- assert output_gdf["mixed"][idx] is None
1482
- else:
1483
- assert output_gdf["mixed"][idx] == str(value)
1725
+ # mixed values as object dtype are currently written as strings
1726
+ # (but preserving nulls)
1727
+ expected = pd.Series(
1728
+ [str(value) if value not in (None, np.nan) else None for value in mixed_values],
1729
+ name="mixed",
1730
+ )
1731
+ assert_series_equal(output_gdf["mixed"], expected)
1484
1732
 
1485
1733
 
1486
1734
  @requires_arrow_write_api
@@ -1513,8 +1761,24 @@ def test_write_read_null(tmp_path, use_arrow):
1513
1761
  assert pd.isna(result_gdf["float64"][1])
1514
1762
  assert pd.isna(result_gdf["float64"][2])
1515
1763
  assert result_gdf["object_str"][0] == "test"
1516
- assert result_gdf["object_str"][1] is None
1517
- assert result_gdf["object_str"][2] is None
1764
+ assert pd.isna(result_gdf["object_str"][1])
1765
+ assert pd.isna(result_gdf["object_str"][2])
1766
+
1767
+
1768
+ @pytest.mark.requires_arrow_write_api
1769
+ def test_write_read_vsimem(naturalearth_lowres_vsi, use_arrow):
1770
+ path, _ = naturalearth_lowres_vsi
1771
+ mem_path = f"/vsimem/{path.name}"
1772
+
1773
+ input = read_dataframe(path, use_arrow=use_arrow)
1774
+ assert len(input) == 177
1775
+
1776
+ try:
1777
+ write_dataframe(input, mem_path, use_arrow=use_arrow)
1778
+ result = read_dataframe(mem_path, use_arrow=use_arrow)
1779
+ assert len(result) == 177
1780
+ finally:
1781
+ vsi_unlink(mem_path)
1518
1782
 
1519
1783
 
1520
1784
  @pytest.mark.parametrize(
@@ -1529,7 +1793,7 @@ def test_write_read_null(tmp_path, use_arrow):
1529
1793
  ["2.5D MultiLineString", "MultiLineString Z"],
1530
1794
  ),
1531
1795
  (
1532
- "MultiPolygon Z (((0 0 0, 0 1 0, 1 1 0, 0 0 0)), ((1 1 1, 1 2 1, 2 2 1, 1 1 1)))",
1796
+ "MultiPolygon Z (((0 0 0, 0 1 0, 1 1 0, 0 0 0)), ((1 1 1, 1 2 1, 2 2 1, 1 1 1)))", # noqa: E501
1533
1797
  ["2.5D MultiPolygon", "MultiPolygon Z"],
1534
1798
  ),
1535
1799
  (
@@ -1572,7 +1836,7 @@ def test_write_geometry_z_types(tmp_path, wkt, geom_types, use_arrow):
1572
1836
  "MultiPolygon Z",
1573
1837
  False,
1574
1838
  [
1575
- "MultiPolygon Z (((0 0 0, 0 1 0, 1 1 0, 0 0 0)), ((1 1 1, 1 2 1, 2 2 1, 1 1 1)))"
1839
+ "MultiPolygon Z (((0 0 0, 0 1 0, 1 1 0, 0 0 0)), ((1 1 1, 1 2 1, 2 2 1, 1 1 1)))" # noqa: E501
1576
1840
  ],
1577
1841
  ),
1578
1842
  (
@@ -1642,54 +1906,84 @@ def test_write_geometry_z_types_auto(
1642
1906
 
1643
1907
 
1644
1908
  @pytest.mark.parametrize(
1645
- "on_invalid, message",
1909
+ "on_invalid, message, expected_wkt",
1646
1910
  [
1647
1911
  (
1648
1912
  "warn",
1649
1913
  "Invalid WKB: geometry is returned as None. IllegalArgumentException: "
1650
- "Invalid number of points in LinearRing found 2 - must be 0 or >=",
1914
+ "Points of LinearRing do not form a closed linestring",
1915
+ None,
1651
1916
  ),
1652
- ("raise", "Invalid number of points in LinearRing found 2 - must be 0 or >="),
1653
- ("ignore", None),
1917
+ ("raise", "Points of LinearRing do not form a closed linestring", None),
1918
+ ("ignore", None, None),
1919
+ ("fix", None, "POLYGON ((0 0, 0 1, 0 0))"),
1654
1920
  ],
1655
1921
  )
1656
- def test_read_invalid_shp(data_dir, use_arrow, on_invalid, message):
1922
+ @pytest.mark.filterwarnings("ignore:Non closed ring detected:RuntimeWarning")
1923
+ def test_read_invalid_poly_ring(tmp_path, use_arrow, on_invalid, message, expected_wkt):
1924
+ if on_invalid == "fix" and not SHAPELY_GE_21:
1925
+ pytest.skip("on_invalid=fix not available for Shapely < 2.1")
1926
+
1657
1927
  if on_invalid == "raise":
1658
1928
  handler = pytest.raises(shapely.errors.GEOSException, match=message)
1659
1929
  elif on_invalid == "warn":
1660
1930
  handler = pytest.warns(match=message)
1661
- elif on_invalid == "ignore":
1931
+ elif on_invalid in ("fix", "ignore"):
1662
1932
  handler = contextlib.nullcontext()
1663
1933
  else:
1664
1934
  raise ValueError(f"unknown value for on_invalid: {on_invalid}")
1665
1935
 
1936
+ # create a GeoJSON file with an invalid exterior ring
1937
+ invalid_geojson = """{
1938
+ "type": "FeatureCollection",
1939
+ "features": [
1940
+ {
1941
+ "type": "Feature",
1942
+ "properties": {},
1943
+ "geometry": {
1944
+ "type": "Polygon",
1945
+ "coordinates": [ [ [0, 0], [0, 1] ] ]
1946
+ }
1947
+ }
1948
+ ]
1949
+ }"""
1950
+
1951
+ filename = tmp_path / "test.geojson"
1952
+ with open(filename, "w") as f:
1953
+ _ = f.write(invalid_geojson)
1954
+
1666
1955
  with handler:
1667
1956
  df = read_dataframe(
1668
- data_dir / "poly_not_enough_points.shp.zip",
1957
+ filename,
1669
1958
  use_arrow=use_arrow,
1670
1959
  on_invalid=on_invalid,
1671
1960
  )
1672
- df.geometry.isnull().all()
1961
+ if expected_wkt is None:
1962
+ assert df.geometry.iloc[0] is None
1963
+ else:
1964
+ assert df.geometry.iloc[0].wkt == expected_wkt
1673
1965
 
1674
1966
 
1675
- def test_read_multisurface(data_dir, use_arrow):
1967
+ def test_read_multisurface(multisurface_file, use_arrow):
1676
1968
  if use_arrow:
1969
+ # TODO: revisit once https://github.com/geopandas/pyogrio/issues/478
1970
+ # is resolved.
1971
+ pytest.skip("Shapely + GEOS 3.13 crashes in from_wkb for this case")
1972
+
1677
1973
  with pytest.raises(shapely.errors.GEOSException):
1678
1974
  # TODO(Arrow)
1679
1975
  # shapely fails parsing the WKB
1680
- read_dataframe(data_dir / "test_multisurface.gpkg", use_arrow=True)
1976
+ read_dataframe(multisurface_file, use_arrow=True)
1681
1977
  else:
1682
- df = read_dataframe(data_dir / "test_multisurface.gpkg")
1978
+ df = read_dataframe(multisurface_file)
1683
1979
 
1684
1980
  # MultiSurface should be converted to MultiPolygon
1685
1981
  assert df.geometry.type.tolist() == ["MultiPolygon"]
1686
1982
 
1687
1983
 
1688
- def test_read_dataset_kwargs(data_dir, use_arrow):
1689
- filename = data_dir / "test_nested.geojson"
1690
-
1984
+ def test_read_dataset_kwargs(nested_geojson_file, use_arrow):
1691
1985
  # by default, nested data are not flattened
1692
- df = read_dataframe(filename, use_arrow=use_arrow)
1986
+ df = read_dataframe(nested_geojson_file, use_arrow=use_arrow)
1693
1987
 
1694
1988
  expected = gp.GeoDataFrame(
1695
1989
  {
@@ -1699,10 +1993,16 @@ def test_read_dataset_kwargs(data_dir, use_arrow):
1699
1993
  geometry=[shapely.Point(0, 0)],
1700
1994
  crs="EPSG:4326",
1701
1995
  )
1996
+ if GDAL_GE_311 and use_arrow:
1997
+ # GDAL 3.11 started to use json extension type, which is not yet handled
1998
+ # correctly in the arrow->pandas conversion (using object instead of str dtype)
1999
+ expected["intermediate_level"] = expected["intermediate_level"].astype(object)
1702
2000
 
1703
2001
  assert_geodataframe_equal(df, expected)
1704
2002
 
1705
- df = read_dataframe(filename, use_arrow=use_arrow, FLATTEN_NESTED_ATTRIBUTES="YES")
2003
+ df = read_dataframe(
2004
+ nested_geojson_file, use_arrow=use_arrow, FLATTEN_NESTED_ATTRIBUTES="YES"
2005
+ )
1706
2006
 
1707
2007
  expected = gp.GeoDataFrame(
1708
2008
  {
@@ -1742,7 +2042,7 @@ def test_write_nullable_dtypes(tmp_path, use_arrow):
1742
2042
  expected["col2"] = expected["col2"].astype("float64")
1743
2043
  expected["col3"] = expected["col3"].astype("float32")
1744
2044
  expected["col4"] = expected["col4"].astype("float64")
1745
- expected["col5"] = expected["col5"].astype(object)
2045
+ expected["col5"] = expected["col5"].astype("str")
1746
2046
  expected.loc[1, "col5"] = None # pandas converts to pd.NA on line above
1747
2047
  assert_geodataframe_equal(output_gdf, expected)
1748
2048
 
@@ -1904,6 +2204,9 @@ def test_write_memory(naturalearth_lowres, driver):
1904
2204
  check_dtype=not is_json,
1905
2205
  )
1906
2206
 
2207
+ # Check temp file was cleaned up. Filter, as gdal keeps cache files in /vsimem/.
2208
+ assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == []
2209
+
1907
2210
 
1908
2211
  def test_write_memory_driver_required(naturalearth_lowres):
1909
2212
  df = read_dataframe(naturalearth_lowres)
@@ -1916,6 +2219,9 @@ def test_write_memory_driver_required(naturalearth_lowres):
1916
2219
  ):
1917
2220
  write_dataframe(df.head(1), buffer, driver=None, layer="test")
1918
2221
 
2222
+ # Check temp file was cleaned up. Filter, as gdal keeps cache files in /vsimem/.
2223
+ assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == []
2224
+
1919
2225
 
1920
2226
  @pytest.mark.parametrize("driver", ["ESRI Shapefile", "OpenFileGDB"])
1921
2227
  def test_write_memory_unsupported_driver(naturalearth_lowres, driver):
@@ -1931,6 +2237,9 @@ def test_write_memory_unsupported_driver(naturalearth_lowres, driver):
1931
2237
  ):
1932
2238
  write_dataframe(df, buffer, driver=driver, layer="test")
1933
2239
 
2240
+ # Check temp file was cleaned up. Filter, as gdal keeps cache files in /vsimem/.
2241
+ assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == []
2242
+
1934
2243
 
1935
2244
  @pytest.mark.parametrize("driver", ["GeoJSON", "GPKG"])
1936
2245
  def test_write_memory_append_unsupported(naturalearth_lowres, driver):
@@ -1943,6 +2252,9 @@ def test_write_memory_append_unsupported(naturalearth_lowres, driver):
1943
2252
  ):
1944
2253
  write_dataframe(df.head(1), buffer, driver=driver, layer="test", append=True)
1945
2254
 
2255
+ # Check temp file was cleaned up. Filter, as gdal keeps cache files in /vsimem/.
2256
+ assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == []
2257
+
1946
2258
 
1947
2259
  def test_write_memory_existing_unsupported(naturalearth_lowres):
1948
2260
  df = read_dataframe(naturalearth_lowres)
@@ -1954,6 +2266,33 @@ def test_write_memory_existing_unsupported(naturalearth_lowres):
1954
2266
  ):
1955
2267
  write_dataframe(df.head(1), buffer, driver="GeoJSON", layer="test")
1956
2268
 
2269
+ # Check temp file was cleaned up. Filter, as gdal keeps cache files in /vsimem/.
2270
+ assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == []
2271
+
2272
+
2273
+ def test_write_open_file_handle(tmp_path, naturalearth_lowres):
2274
+ """Verify that writing to an open file handle is not currently supported"""
2275
+
2276
+ df = read_dataframe(naturalearth_lowres)
2277
+
2278
+ # verify it fails for regular file handle
2279
+ with pytest.raises(
2280
+ NotImplementedError, match="writing to an open file handle is not yet supported"
2281
+ ):
2282
+ with open(tmp_path / "test.geojson", "wb") as f:
2283
+ write_dataframe(df.head(1), f)
2284
+
2285
+ # verify it fails for ZipFile
2286
+ with pytest.raises(
2287
+ NotImplementedError, match="writing to an open file handle is not yet supported"
2288
+ ):
2289
+ with ZipFile(tmp_path / "test.geojson.zip", "w") as z:
2290
+ with z.open("test.geojson", "w") as f:
2291
+ write_dataframe(df.head(1), f)
2292
+
2293
+ # Check temp file was cleaned up. Filter, as gdal keeps cache files in /vsimem/.
2294
+ assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == []
2295
+
1957
2296
 
1958
2297
  @pytest.mark.parametrize("ext", ["gpkg", "geojson"])
1959
2298
  def test_non_utf8_encoding_io(tmp_path, ext, encoded_text):
@@ -2026,7 +2365,10 @@ def test_non_utf8_encoding_io_shapefile(tmp_path, encoded_text, use_arrow):
2026
2365
 
2027
2366
  if use_arrow:
2028
2367
  # pyarrow cannot decode column name with incorrect encoding
2029
- with pytest.raises(UnicodeDecodeError):
2368
+ with pytest.raises(
2369
+ DataSourceError,
2370
+ match="The file being read is not encoded in UTF-8; please use_arrow=False",
2371
+ ):
2030
2372
  read_dataframe(output_path, use_arrow=True)
2031
2373
  else:
2032
2374
  bad = read_dataframe(output_path, use_arrow=False)
@@ -2045,7 +2387,8 @@ def test_non_utf8_encoding_io_shapefile(tmp_path, encoded_text, use_arrow):
2045
2387
 
2046
2388
 
2047
2389
  def test_encoding_read_option_collision_shapefile(naturalearth_lowres, use_arrow):
2048
- """Providing both encoding parameter and ENCODING open option (even if blank) is not allowed"""
2390
+ """Providing both encoding parameter and ENCODING open option
2391
+ (even if blank) is not allowed."""
2049
2392
 
2050
2393
  with pytest.raises(
2051
2394
  ValueError, match='cannot provide both encoding parameter and "ENCODING" option'
@@ -2056,7 +2399,8 @@ def test_encoding_read_option_collision_shapefile(naturalearth_lowres, use_arrow
2056
2399
 
2057
2400
 
2058
2401
  def test_encoding_write_layer_option_collision_shapefile(tmp_path, encoded_text):
2059
- """Providing both encoding parameter and ENCODING layer creation option (even if blank) is not allowed"""
2402
+ """Providing both encoding parameter and ENCODING layer creation option
2403
+ (even if blank) is not allowed."""
2060
2404
  encoding, text = encoded_text
2061
2405
 
2062
2406
  output_path = tmp_path / "test.shp"
@@ -2064,7 +2408,10 @@ def test_encoding_write_layer_option_collision_shapefile(tmp_path, encoded_text)
2064
2408
 
2065
2409
  with pytest.raises(
2066
2410
  ValueError,
2067
- match='cannot provide both encoding parameter and "ENCODING" layer creation option',
2411
+ match=(
2412
+ 'cannot provide both encoding parameter and "ENCODING" layer creation '
2413
+ "option"
2414
+ ),
2068
2415
  ):
2069
2416
  write_dataframe(
2070
2417
  df, output_path, encoding=encoding, layer_options={"ENCODING": ""}
@@ -2102,7 +2449,8 @@ def test_non_utf8_encoding_shapefile_sql(tmp_path, use_arrow):
2102
2449
 
2103
2450
  @pytest.mark.requires_arrow_write_api
2104
2451
  def test_write_kml_file_coordinate_order(tmp_path, use_arrow):
2105
- # confirm KML coordinates are written in lon, lat order even if CRS axis specifies otherwise
2452
+ # confirm KML coordinates are written in lon, lat order even if CRS axis
2453
+ # specifies otherwise
2106
2454
  points = [Point(10, 20), Point(30, 40), Point(50, 60)]
2107
2455
  gdf = gp.GeoDataFrame(geometry=points, crs="EPSG:4326")
2108
2456
  output_path = tmp_path / "test.kml"
@@ -2117,7 +2465,7 @@ def test_write_kml_file_coordinate_order(tmp_path, use_arrow):
2117
2465
  if "LIBKML" in list_drivers():
2118
2466
  # test appending to the existing file only if LIBKML is available
2119
2467
  # as it appears to fall back on LIBKML driver when appending.
2120
- points_append = [Point(70, 80), Point(90, 100), Point(110, 120)]
2468
+ points_append = [Point(7, 8), Point(9, 10), Point(11, 12)]
2121
2469
  gdf_append = gp.GeoDataFrame(geometry=points_append, crs="EPSG:4326")
2122
2470
 
2123
2471
  write_dataframe(