pyogrio 0.10.0__cp311-cp311-macosx_12_0_arm64.whl → 0.11.0__cp311-cp311-macosx_12_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyogrio might be problematic. Click here for more details.

Files changed (49) hide show
  1. pyogrio/.dylibs/{libgdal.35.3.9.1.dylib → libgdal.36.3.10.3.dylib} +0 -0
  2. pyogrio/__init__.py +9 -9
  3. pyogrio/_compat.py +8 -0
  4. pyogrio/_err.cpython-311-darwin.so +0 -0
  5. pyogrio/_geometry.cpython-311-darwin.so +0 -0
  6. pyogrio/_io.cpython-311-darwin.so +0 -0
  7. pyogrio/_ogr.cpython-311-darwin.so +0 -0
  8. pyogrio/_version.py +3 -3
  9. pyogrio/_vsi.cpython-311-darwin.so +0 -0
  10. pyogrio/gdal_data/GDAL-targets-release.cmake +3 -3
  11. pyogrio/gdal_data/GDAL-targets.cmake +2 -2
  12. pyogrio/gdal_data/GDALConfigVersion.cmake +3 -3
  13. pyogrio/gdal_data/gdalinfo_output.schema.json +3 -3
  14. pyogrio/gdal_data/gdaltileindex.xsd +1 -17
  15. pyogrio/gdal_data/gdalvrt.xsd +48 -41
  16. pyogrio/gdal_data/nitf_spec.xml +1 -17
  17. pyogrio/gdal_data/nitf_spec.xsd +1 -17
  18. pyogrio/gdal_data/ogrvrt.xsd +1 -17
  19. pyogrio/gdal_data/osmconf.ini +3 -1
  20. pyogrio/gdal_data/pdfcomposition.xsd +1 -17
  21. pyogrio/gdal_data/template_tiles.mapml +28 -0
  22. pyogrio/gdal_data/vcpkg.spdx.json +28 -23
  23. pyogrio/gdal_data/vcpkg_abi_info.txt +28 -27
  24. pyogrio/gdal_data/vdv452.xml +1 -17
  25. pyogrio/gdal_data/vdv452.xsd +1 -17
  26. pyogrio/geopandas.py +91 -43
  27. pyogrio/proj_data/ITRF2014 +1 -1
  28. pyogrio/proj_data/ITRF2020 +91 -0
  29. pyogrio/proj_data/proj-config-version.cmake +3 -3
  30. pyogrio/proj_data/proj-config.cmake +1 -1
  31. pyogrio/proj_data/proj-targets.cmake +3 -3
  32. pyogrio/proj_data/proj.db +0 -0
  33. pyogrio/proj_data/proj.ini +11 -3
  34. pyogrio/proj_data/proj4-targets.cmake +3 -3
  35. pyogrio/proj_data/usage +7 -2
  36. pyogrio/proj_data/vcpkg.spdx.json +27 -22
  37. pyogrio/proj_data/vcpkg_abi_info.txt +16 -15
  38. pyogrio/tests/conftest.py +8 -0
  39. pyogrio/tests/test_arrow.py +3 -0
  40. pyogrio/tests/test_core.py +8 -4
  41. pyogrio/tests/test_geopandas_io.py +250 -42
  42. pyogrio/tests/test_path.py +10 -0
  43. pyogrio/tests/test_raw_io.py +6 -2
  44. pyogrio/util.py +15 -2
  45. {pyogrio-0.10.0.dist-info → pyogrio-0.11.0.dist-info}/METADATA +32 -37
  46. {pyogrio-0.10.0.dist-info → pyogrio-0.11.0.dist-info}/RECORD +49 -47
  47. {pyogrio-0.10.0.dist-info → pyogrio-0.11.0.dist-info}/WHEEL +2 -1
  48. {pyogrio-0.10.0.dist-info → pyogrio-0.11.0.dist-info/licenses}/LICENSE +0 -0
  49. {pyogrio-0.10.0.dist-info → pyogrio-0.11.0.dist-info}/top_level.txt +0 -0
@@ -12,10 +12,20 @@ from pyogrio import (
12
12
  list_drivers,
13
13
  list_layers,
14
14
  read_info,
15
+ set_gdal_config_options,
15
16
  vsi_listtree,
16
17
  vsi_unlink,
17
18
  )
18
- from pyogrio._compat import HAS_ARROW_WRITE_API, HAS_PYPROJ, PANDAS_GE_15
19
+ from pyogrio._compat import (
20
+ GDAL_GE_37,
21
+ GDAL_GE_311,
22
+ GDAL_GE_352,
23
+ HAS_ARROW_WRITE_API,
24
+ HAS_PYPROJ,
25
+ PANDAS_GE_15,
26
+ PANDAS_GE_30,
27
+ SHAPELY_GE_21,
28
+ )
19
29
  from pyogrio.errors import DataLayerError, DataSourceError, FeatureError, GeometryError
20
30
  from pyogrio.geopandas import PANDAS_GE_20, read_dataframe, write_dataframe
21
31
  from pyogrio.raw import (
@@ -93,8 +103,20 @@ def spatialite_available(path):
93
103
  return False
94
104
 
95
105
 
96
- @pytest.mark.parametrize("encoding", ["utf-8", "cp1252", None])
97
- def test_read_csv_encoding(tmp_path, encoding):
106
+ @pytest.mark.parametrize(
107
+ "encoding, arrow",
108
+ [
109
+ ("utf-8", False),
110
+ pytest.param("utf-8", True, marks=requires_pyarrow_api),
111
+ ("cp1252", False),
112
+ (None, False),
113
+ ],
114
+ )
115
+ def test_read_csv_encoding(tmp_path, encoding, arrow):
116
+ """ "Test reading CSV files with different encodings.
117
+
118
+ Arrow only supports utf-8 encoding.
119
+ """
98
120
  # Write csv test file. Depending on the os this will be written in a different
99
121
  # encoding: for linux and macos this is utf-8, for windows it is cp1252.
100
122
  csv_path = tmp_path / "test.csv"
@@ -105,7 +127,7 @@ def test_read_csv_encoding(tmp_path, encoding):
105
127
  # Read csv. The data should be read with the same default encoding as the csv file
106
128
  # was written in, but should have been converted to utf-8 in the dataframe returned.
107
129
  # Hence, the asserts below, with strings in utf-8, be OK.
108
- df = read_dataframe(csv_path, encoding=encoding)
130
+ df = read_dataframe(csv_path, encoding=encoding, use_arrow=arrow)
109
131
 
110
132
  assert len(df) == 1
111
133
  assert df.columns.tolist() == ["näme", "city"]
@@ -117,19 +139,29 @@ def test_read_csv_encoding(tmp_path, encoding):
117
139
  locale.getpreferredencoding().upper() == "UTF-8",
118
140
  reason="test requires non-UTF-8 default platform",
119
141
  )
120
- def test_read_csv_platform_encoding(tmp_path):
121
- """verify that read defaults to platform encoding; only works on Windows (CP1252)"""
142
+ def test_read_csv_platform_encoding(tmp_path, use_arrow):
143
+ """Verify that read defaults to platform encoding; only works on Windows (CP1252).
144
+
145
+ When use_arrow=True, reading an non-UTF8 fails.
146
+ """
122
147
  csv_path = tmp_path / "test.csv"
123
148
  with open(csv_path, "w", encoding=locale.getpreferredencoding()) as csv:
124
149
  csv.write("näme,city\n")
125
150
  csv.write("Wilhelm Röntgen,Zürich\n")
126
151
 
127
- df = read_dataframe(csv_path)
152
+ if use_arrow:
153
+ with pytest.raises(
154
+ DataSourceError,
155
+ match="; please use_arrow=False",
156
+ ):
157
+ df = read_dataframe(csv_path, use_arrow=use_arrow)
158
+ else:
159
+ df = read_dataframe(csv_path, use_arrow=use_arrow)
128
160
 
129
- assert len(df) == 1
130
- assert df.columns.tolist() == ["näme", "city"]
131
- assert df.city.tolist() == ["Zürich"]
132
- assert df.näme.tolist() == ["Wilhelm Röntgen"]
161
+ assert len(df) == 1
162
+ assert df.columns.tolist() == ["näme", "city"]
163
+ assert df.city.tolist() == ["Zürich"]
164
+ assert df.näme.tolist() == ["Wilhelm Röntgen"]
133
165
 
134
166
 
135
167
  def test_read_dataframe(naturalearth_lowres_all_ext):
@@ -227,11 +259,32 @@ def test_read_force_2d(tmp_path, use_arrow):
227
259
  assert not df.iloc[0].geometry.has_z
228
260
 
229
261
 
262
+ @pytest.mark.skipif(
263
+ not GDAL_GE_352,
264
+ reason="gdal >= 3.5.2 needed to use OGR_GEOJSON_MAX_OBJ_SIZE with a float value",
265
+ )
266
+ def test_read_geojson_error(naturalearth_lowres_geojson, use_arrow):
267
+ try:
268
+ set_gdal_config_options({"OGR_GEOJSON_MAX_OBJ_SIZE": 0.01})
269
+ with pytest.raises(
270
+ DataSourceError,
271
+ match="Failed to read GeoJSON data; .* GeoJSON object too complex",
272
+ ):
273
+ read_dataframe(naturalearth_lowres_geojson, use_arrow=use_arrow)
274
+ finally:
275
+ set_gdal_config_options({"OGR_GEOJSON_MAX_OBJ_SIZE": None})
276
+
277
+
230
278
  def test_read_layer(tmp_path, use_arrow):
231
279
  filename = tmp_path / "test.gpkg"
232
280
 
233
281
  # create a multilayer GPKG
234
282
  expected1 = gp.GeoDataFrame(geometry=[Point(0, 0)], crs="EPSG:4326")
283
+ if use_arrow:
284
+ # TODO this needs to be fixed on the geopandas side (to ensure the
285
+ # GeoDataFrame() constructor does this), when use_arrow we already
286
+ # get columns Index with string dtype
287
+ expected1.columns = expected1.columns.astype("str")
235
288
  write_dataframe(
236
289
  expected1,
237
290
  filename,
@@ -239,6 +292,8 @@ def test_read_layer(tmp_path, use_arrow):
239
292
  )
240
293
 
241
294
  expected2 = gp.GeoDataFrame(geometry=[Point(1, 1)], crs="EPSG:4326")
295
+ if use_arrow:
296
+ expected2.columns = expected2.columns.astype("str")
242
297
  write_dataframe(expected2, filename, layer="layer2", append=True)
243
298
 
244
299
  assert np.array_equal(
@@ -361,7 +416,7 @@ def test_read_null_values(tmp_path, use_arrow):
361
416
  df = read_dataframe(filename, use_arrow=use_arrow, read_geometry=False)
362
417
 
363
418
  # make sure that Null values are preserved
364
- assert np.array_equal(df.col.values, expected.col.values)
419
+ assert df["col"].isna().all()
365
420
 
366
421
 
367
422
  def test_read_fid_as_index(naturalearth_lowres_all_ext, use_arrow):
@@ -438,10 +493,17 @@ def test_read_where_invalid(request, naturalearth_lowres_all_ext, use_arrow):
438
493
  if use_arrow and naturalearth_lowres_all_ext.suffix == ".gpkg":
439
494
  # https://github.com/OSGeo/gdal/issues/8492
440
495
  request.node.add_marker(pytest.mark.xfail(reason="GDAL doesn't error for GPGK"))
441
- with pytest.raises(ValueError, match="Invalid SQL"):
442
- read_dataframe(
443
- naturalearth_lowres_all_ext, use_arrow=use_arrow, where="invalid"
444
- )
496
+
497
+ if naturalearth_lowres_all_ext.suffix == ".gpkg" and __gdal_version__ >= (3, 11, 0):
498
+ with pytest.raises(DataLayerError, match="no such column"):
499
+ read_dataframe(
500
+ naturalearth_lowres_all_ext, use_arrow=use_arrow, where="invalid"
501
+ )
502
+ else:
503
+ with pytest.raises(ValueError, match="Invalid SQL"):
504
+ read_dataframe(
505
+ naturalearth_lowres_all_ext, use_arrow=use_arrow, where="invalid"
506
+ )
445
507
 
446
508
 
447
509
  def test_read_where_ignored_field(naturalearth_lowres, use_arrow):
@@ -675,6 +737,13 @@ def test_read_skip_features(naturalearth_lowres_all_ext, use_arrow, skip_feature
675
737
  # In .geojsonl the vertices are reordered, so normalize
676
738
  is_jsons = ext == ".geojsonl"
677
739
 
740
+ if skip_features == 200 and not use_arrow:
741
+ # result is an empty dataframe, so no proper dtype inference happens
742
+ # for the numpy object dtype arrays
743
+ df[["continent", "name", "iso_a3"]] = df[
744
+ ["continent", "name", "iso_a3"]
745
+ ].astype("str")
746
+
678
747
  assert_geodataframe_equal(
679
748
  df,
680
749
  expected,
@@ -943,9 +1012,20 @@ def test_read_sql_dialect_sqlite_gpkg(naturalearth_lowres, use_arrow):
943
1012
  assert df.iloc[0].geometry.area > area_canada
944
1013
 
945
1014
 
946
- @pytest.mark.parametrize("encoding", ["utf-8", "cp1252", None])
947
- def test_write_csv_encoding(tmp_path, encoding):
948
- """Test if write_dataframe uses the default encoding correctly."""
1015
+ @pytest.mark.parametrize(
1016
+ "encoding, arrow",
1017
+ [
1018
+ ("utf-8", False),
1019
+ pytest.param("utf-8", True, marks=requires_arrow_write_api),
1020
+ ("cp1252", False),
1021
+ (None, False),
1022
+ ],
1023
+ )
1024
+ def test_write_csv_encoding(tmp_path, encoding, arrow):
1025
+ """Test if write_dataframe uses the default encoding correctly.
1026
+
1027
+ Arrow only supports utf-8 encoding.
1028
+ """
949
1029
  # Write csv test file. Depending on the os this will be written in a different
950
1030
  # encoding: for linux and macos this is utf-8, for windows it is cp1252.
951
1031
  csv_path = tmp_path / "test.csv"
@@ -958,7 +1038,7 @@ def test_write_csv_encoding(tmp_path, encoding):
958
1038
  # same encoding as above.
959
1039
  df = pd.DataFrame({"näme": ["Wilhelm Röntgen"], "city": ["Zürich"]})
960
1040
  csv_pyogrio_path = tmp_path / "test_pyogrio.csv"
961
- write_dataframe(df, csv_pyogrio_path, encoding=encoding)
1041
+ write_dataframe(df, csv_pyogrio_path, encoding=encoding, use_arrow=arrow)
962
1042
 
963
1043
  # Check if the text files written both ways can be read again and give same result.
964
1044
  with open(csv_path, encoding=encoding) as csv:
@@ -976,6 +1056,48 @@ def test_write_csv_encoding(tmp_path, encoding):
976
1056
  assert csv_bytes == csv_pyogrio_bytes
977
1057
 
978
1058
 
1059
+ @pytest.mark.parametrize(
1060
+ "ext, fid_column, fid_param_value",
1061
+ [
1062
+ (".gpkg", "fid", None),
1063
+ (".gpkg", "FID", None),
1064
+ (".sqlite", "ogc_fid", None),
1065
+ (".gpkg", "fid_custom", "fid_custom"),
1066
+ (".gpkg", "FID_custom", "fid_custom"),
1067
+ (".sqlite", "ogc_fid_custom", "ogc_fid_custom"),
1068
+ ],
1069
+ )
1070
+ @pytest.mark.requires_arrow_write_api
1071
+ def test_write_custom_fids(tmp_path, ext, fid_column, fid_param_value, use_arrow):
1072
+ """Test to specify FIDs to save when writing to a file.
1073
+
1074
+ Saving custom FIDs is only supported for formats that actually store the FID, like
1075
+ e.g. GPKG and SQLite. The fid_column name check is case-insensitive.
1076
+
1077
+ Typically, GDAL supports using a custom FID column for these file formats via a
1078
+ `FID` layer creation option, which is also tested here. If `fid_param_value` is
1079
+ specified (not None), an `fid` parameter is passed to `write_dataframe`, causing
1080
+ GDAL to use the column name specified for the FID.
1081
+ """
1082
+ input_gdf = gp.GeoDataFrame(
1083
+ {fid_column: [5]}, geometry=[shapely.Point(0, 0)], crs="epsg:4326"
1084
+ )
1085
+ kwargs = {}
1086
+ if fid_param_value is not None:
1087
+ kwargs["fid"] = fid_param_value
1088
+ path = tmp_path / f"test{ext}"
1089
+
1090
+ write_dataframe(input_gdf, path, use_arrow=use_arrow, **kwargs)
1091
+
1092
+ assert path.exists()
1093
+ output_gdf = read_dataframe(path, fid_as_index=True, use_arrow=use_arrow)
1094
+ output_gdf = output_gdf.reset_index()
1095
+
1096
+ # pyogrio always sets "fid" as index name with `fid_as_index`
1097
+ expected_gdf = input_gdf.rename(columns={fid_column: "fid"})
1098
+ assert_geodataframe_equal(output_gdf, expected_gdf)
1099
+
1100
+
979
1101
  @pytest.mark.parametrize("ext", ALL_EXTS)
980
1102
  @pytest.mark.requires_arrow_write_api
981
1103
  def test_write_dataframe(tmp_path, naturalearth_lowres, ext, use_arrow):
@@ -1087,16 +1209,38 @@ def test_write_dataframe_index(tmp_path, naturalearth_lowres, use_arrow):
1087
1209
 
1088
1210
 
1089
1211
  @pytest.mark.parametrize("ext", [ext for ext in ALL_EXTS if ext not in ".geojsonl"])
1212
+ @pytest.mark.parametrize(
1213
+ "columns, dtype",
1214
+ [
1215
+ ([], None),
1216
+ (["col_int"], np.int64),
1217
+ (["col_float"], np.float64),
1218
+ (["col_object"], object),
1219
+ ],
1220
+ )
1090
1221
  @pytest.mark.requires_arrow_write_api
1091
- def test_write_empty_dataframe(tmp_path, ext, use_arrow):
1092
- expected = gp.GeoDataFrame(geometry=[], crs=4326)
1222
+ def test_write_empty_dataframe(tmp_path, ext, columns, dtype, use_arrow):
1223
+ """Test writing dataframe with no rows.
1093
1224
 
1225
+ With use_arrow, object type columns with no rows are converted to null type columns
1226
+ by pyarrow, but null columns are not supported by GDAL. Added to test fix for #513.
1227
+ """
1228
+ expected = gp.GeoDataFrame(geometry=[], columns=columns, dtype=dtype, crs=4326)
1094
1229
  filename = tmp_path / f"test{ext}"
1095
1230
  write_dataframe(expected, filename, use_arrow=use_arrow)
1096
1231
 
1097
1232
  assert filename.exists()
1098
- df = read_dataframe(filename)
1099
- assert_geodataframe_equal(df, expected)
1233
+ df = read_dataframe(filename, use_arrow=use_arrow)
1234
+
1235
+ # Check result
1236
+ # For older pandas versions, the index is created as Object dtype but read as
1237
+ # RangeIndex, so don't check the index dtype in that case.
1238
+ check_index_type = True if PANDAS_GE_20 else False
1239
+ # with pandas 3+ and reading through arrow, we preserve the string dtype
1240
+ # (no proper dtype inference happens for the empty numpy object dtype arrays)
1241
+ if use_arrow and dtype is object:
1242
+ expected["col_object"] = expected["col_object"].astype("str")
1243
+ assert_geodataframe_equal(df, expected, check_index_type=check_index_type)
1100
1244
 
1101
1245
 
1102
1246
  def test_write_empty_geometry(tmp_path):
@@ -1116,6 +1260,28 @@ def test_write_empty_geometry(tmp_path):
1116
1260
  assert_geodataframe_equal(df, expected)
1117
1261
 
1118
1262
 
1263
+ @pytest.mark.requires_arrow_write_api
1264
+ def test_write_None_string_column(tmp_path, use_arrow):
1265
+ """Test pandas object columns with all None values.
1266
+
1267
+ With use_arrow, such columns are converted to null type columns by pyarrow, but null
1268
+ columns are not supported by GDAL. Added to test fix for #513.
1269
+ """
1270
+ gdf = gp.GeoDataFrame({"object_col": [None]}, geometry=[Point(0, 0)], crs=4326)
1271
+ filename = tmp_path / "test.gpkg"
1272
+
1273
+ write_dataframe(gdf, filename, use_arrow=use_arrow)
1274
+ assert filename.exists()
1275
+
1276
+ result_gdf = read_dataframe(filename, use_arrow=use_arrow)
1277
+ if PANDAS_GE_30 and use_arrow:
1278
+ assert result_gdf.object_col.dtype == "str"
1279
+ gdf["object_col"] = gdf["object_col"].astype("str")
1280
+ else:
1281
+ assert result_gdf.object_col.dtype == object
1282
+ assert_geodataframe_equal(result_gdf, gdf)
1283
+
1284
+
1119
1285
  @pytest.mark.parametrize("ext", [".geojsonl", ".geojsons"])
1120
1286
  @pytest.mark.requires_arrow_write_api
1121
1287
  def test_write_read_empty_dataframe_unsupported(tmp_path, ext, use_arrow):
@@ -1521,6 +1687,30 @@ def test_custom_crs_io(tmp_path, naturalearth_lowres_all_ext, use_arrow):
1521
1687
  assert df.crs.equals(expected.crs)
1522
1688
 
1523
1689
 
1690
+ @pytest.mark.parametrize("ext", [".gpkg.zip", ".shp.zip", ".shz"])
1691
+ @pytest.mark.requires_arrow_write_api
1692
+ def test_write_read_zipped_ext(tmp_path, naturalearth_lowres, ext, use_arrow):
1693
+ """Run a basic read and write test on some extra (zipped) extensions."""
1694
+ if ext == ".gpkg.zip" and not GDAL_GE_37:
1695
+ pytest.skip(".gpkg.zip support requires GDAL >= 3.7")
1696
+
1697
+ input_gdf = read_dataframe(naturalearth_lowres)
1698
+ output_path = tmp_path / f"test{ext}"
1699
+
1700
+ write_dataframe(input_gdf, output_path, use_arrow=use_arrow)
1701
+
1702
+ assert output_path.exists()
1703
+ result_gdf = read_dataframe(output_path)
1704
+
1705
+ geometry_types = result_gdf.geometry.type.unique()
1706
+ if DRIVERS[ext] in DRIVERS_NO_MIXED_SINGLE_MULTI:
1707
+ assert list(geometry_types) == ["MultiPolygon"]
1708
+ else:
1709
+ assert set(geometry_types) == {"MultiPolygon", "Polygon"}
1710
+
1711
+ assert_geodataframe_equal(result_gdf, input_gdf, check_index_type=False)
1712
+
1713
+
1524
1714
  def test_write_read_mixed_column_values(tmp_path):
1525
1715
  # use_arrow=True is tested separately below
1526
1716
  mixed_values = ["test", 1.0, 1, datetime.now(), None, np.nan]
@@ -1532,11 +1722,13 @@ def test_write_read_mixed_column_values(tmp_path):
1532
1722
  write_dataframe(test_gdf, output_path)
1533
1723
  output_gdf = read_dataframe(output_path)
1534
1724
  assert len(test_gdf) == len(output_gdf)
1535
- for idx, value in enumerate(mixed_values):
1536
- if value in (None, np.nan):
1537
- assert output_gdf["mixed"][idx] is None
1538
- else:
1539
- assert output_gdf["mixed"][idx] == str(value)
1725
+ # mixed values as object dtype are currently written as strings
1726
+ # (but preserving nulls)
1727
+ expected = pd.Series(
1728
+ [str(value) if value not in (None, np.nan) else None for value in mixed_values],
1729
+ name="mixed",
1730
+ )
1731
+ assert_series_equal(output_gdf["mixed"], expected)
1540
1732
 
1541
1733
 
1542
1734
  @requires_arrow_write_api
@@ -1569,8 +1761,8 @@ def test_write_read_null(tmp_path, use_arrow):
1569
1761
  assert pd.isna(result_gdf["float64"][1])
1570
1762
  assert pd.isna(result_gdf["float64"][2])
1571
1763
  assert result_gdf["object_str"][0] == "test"
1572
- assert result_gdf["object_str"][1] is None
1573
- assert result_gdf["object_str"][2] is None
1764
+ assert pd.isna(result_gdf["object_str"][1])
1765
+ assert pd.isna(result_gdf["object_str"][2])
1574
1766
 
1575
1767
 
1576
1768
  @pytest.mark.requires_arrow_write_api
@@ -1714,23 +1906,29 @@ def test_write_geometry_z_types_auto(
1714
1906
 
1715
1907
 
1716
1908
  @pytest.mark.parametrize(
1717
- "on_invalid, message",
1909
+ "on_invalid, message, expected_wkt",
1718
1910
  [
1719
1911
  (
1720
1912
  "warn",
1721
1913
  "Invalid WKB: geometry is returned as None. IllegalArgumentException: "
1722
- "Invalid number of points in LinearRing found 2 - must be 0 or >=",
1914
+ "Points of LinearRing do not form a closed linestring",
1915
+ None,
1723
1916
  ),
1724
- ("raise", "Invalid number of points in LinearRing found 2 - must be 0 or >="),
1725
- ("ignore", None),
1917
+ ("raise", "Points of LinearRing do not form a closed linestring", None),
1918
+ ("ignore", None, None),
1919
+ ("fix", None, "POLYGON ((0 0, 0 1, 0 0))"),
1726
1920
  ],
1727
1921
  )
1728
- def test_read_invalid_poly_ring(tmp_path, use_arrow, on_invalid, message):
1922
+ @pytest.mark.filterwarnings("ignore:Non closed ring detected:RuntimeWarning")
1923
+ def test_read_invalid_poly_ring(tmp_path, use_arrow, on_invalid, message, expected_wkt):
1924
+ if on_invalid == "fix" and not SHAPELY_GE_21:
1925
+ pytest.skip("on_invalid=fix not available for Shapely < 2.1")
1926
+
1729
1927
  if on_invalid == "raise":
1730
1928
  handler = pytest.raises(shapely.errors.GEOSException, match=message)
1731
1929
  elif on_invalid == "warn":
1732
1930
  handler = pytest.warns(match=message)
1733
- elif on_invalid == "ignore":
1931
+ elif on_invalid in ("fix", "ignore"):
1734
1932
  handler = contextlib.nullcontext()
1735
1933
  else:
1736
1934
  raise ValueError(f"unknown value for on_invalid: {on_invalid}")
@@ -1744,7 +1942,7 @@ def test_read_invalid_poly_ring(tmp_path, use_arrow, on_invalid, message):
1744
1942
  "properties": {},
1745
1943
  "geometry": {
1746
1944
  "type": "Polygon",
1747
- "coordinates": [ [ [0, 0], [0, 0] ] ]
1945
+ "coordinates": [ [ [0, 0], [0, 1] ] ]
1748
1946
  }
1749
1947
  }
1750
1948
  ]
@@ -1760,7 +1958,10 @@ def test_read_invalid_poly_ring(tmp_path, use_arrow, on_invalid, message):
1760
1958
  use_arrow=use_arrow,
1761
1959
  on_invalid=on_invalid,
1762
1960
  )
1763
- df.geometry.isnull().all()
1961
+ if expected_wkt is None:
1962
+ assert df.geometry.iloc[0] is None
1963
+ else:
1964
+ assert df.geometry.iloc[0].wkt == expected_wkt
1764
1965
 
1765
1966
 
1766
1967
  def test_read_multisurface(multisurface_file, use_arrow):
@@ -1792,6 +1993,10 @@ def test_read_dataset_kwargs(nested_geojson_file, use_arrow):
1792
1993
  geometry=[shapely.Point(0, 0)],
1793
1994
  crs="EPSG:4326",
1794
1995
  )
1996
+ if GDAL_GE_311 and use_arrow:
1997
+ # GDAL 3.11 started to use json extension type, which is not yet handled
1998
+ # correctly in the arrow->pandas conversion (using object instead of str dtype)
1999
+ expected["intermediate_level"] = expected["intermediate_level"].astype(object)
1795
2000
 
1796
2001
  assert_geodataframe_equal(df, expected)
1797
2002
 
@@ -1837,7 +2042,7 @@ def test_write_nullable_dtypes(tmp_path, use_arrow):
1837
2042
  expected["col2"] = expected["col2"].astype("float64")
1838
2043
  expected["col3"] = expected["col3"].astype("float32")
1839
2044
  expected["col4"] = expected["col4"].astype("float64")
1840
- expected["col5"] = expected["col5"].astype(object)
2045
+ expected["col5"] = expected["col5"].astype("str")
1841
2046
  expected.loc[1, "col5"] = None # pandas converts to pd.NA on line above
1842
2047
  assert_geodataframe_equal(output_gdf, expected)
1843
2048
 
@@ -2160,7 +2365,10 @@ def test_non_utf8_encoding_io_shapefile(tmp_path, encoded_text, use_arrow):
2160
2365
 
2161
2366
  if use_arrow:
2162
2367
  # pyarrow cannot decode column name with incorrect encoding
2163
- with pytest.raises(UnicodeDecodeError):
2368
+ with pytest.raises(
2369
+ DataSourceError,
2370
+ match="The file being read is not encoded in UTF-8; please use_arrow=False",
2371
+ ):
2164
2372
  read_dataframe(output_path, use_arrow=True)
2165
2373
  else:
2166
2374
  bad = read_dataframe(output_path, use_arrow=False)
@@ -2257,7 +2465,7 @@ def test_write_kml_file_coordinate_order(tmp_path, use_arrow):
2257
2465
  if "LIBKML" in list_drivers():
2258
2466
  # test appending to the existing file only if LIBKML is available
2259
2467
  # as it appears to fall back on LIBKML driver when appending.
2260
- points_append = [Point(70, 80), Point(90, 100), Point(110, 120)]
2468
+ points_append = [Point(7, 8), Point(9, 10), Point(11, 12)]
2261
2469
  gdf_append = gp.GeoDataFrame(geometry=points_append, crs="EPSG:4326")
2262
2470
 
2263
2471
  write_dataframe(
@@ -33,10 +33,20 @@ def change_cwd(path):
33
33
  [
34
34
  # local file paths that should be passed through as is
35
35
  ("data.gpkg", "data.gpkg"),
36
+ ("data.gpkg.zip", "data.gpkg.zip"),
37
+ ("data.shp.zip", "data.shp.zip"),
36
38
  (Path("data.gpkg"), "data.gpkg"),
39
+ (Path("data.gpkg.zip"), "data.gpkg.zip"),
40
+ (Path("data.shp.zip"), "data.shp.zip"),
37
41
  ("/home/user/data.gpkg", "/home/user/data.gpkg"),
42
+ ("/home/user/data.gpkg.zip", "/home/user/data.gpkg.zip"),
43
+ ("/home/user/data.shp.zip", "/home/user/data.shp.zip"),
38
44
  (r"C:\User\Documents\data.gpkg", r"C:\User\Documents\data.gpkg"),
45
+ (r"C:\User\Documents\data.gpkg.zip", r"C:\User\Documents\data.gpkg.zip"),
46
+ (r"C:\User\Documents\data.shp.zip", r"C:\User\Documents\data.shp.zip"),
39
47
  ("file:///home/user/data.gpkg", "/home/user/data.gpkg"),
48
+ ("file:///home/user/data.gpkg.zip", "/home/user/data.gpkg.zip"),
49
+ ("file:///home/user/data.shp.zip", "/home/user/data.shp.zip"),
40
50
  ("/home/folder # with hash/data.gpkg", "/home/folder # with hash/data.gpkg"),
41
51
  # cloud URIs
42
52
  ("https://testing/data.gpkg", "/vsicurl/https://testing/data.gpkg"),
@@ -17,7 +17,7 @@ from pyogrio import (
17
17
  read_info,
18
18
  set_gdal_config_options,
19
19
  )
20
- from pyogrio._compat import HAS_PYARROW, HAS_SHAPELY
20
+ from pyogrio._compat import GDAL_GE_37, HAS_PYARROW, HAS_SHAPELY
21
21
  from pyogrio.errors import DataLayerError, DataSourceError, FeatureError
22
22
  from pyogrio.raw import open_arrow, read, write
23
23
  from pyogrio.tests.conftest import (
@@ -63,9 +63,10 @@ def test_read(naturalearth_lowres):
63
63
  @pytest.mark.parametrize("ext", DRIVERS)
64
64
  def test_read_autodetect_driver(tmp_path, naturalearth_lowres, ext):
65
65
  # Test all supported autodetect drivers
66
+ if ext == ".gpkg.zip" and not GDAL_GE_37:
67
+ pytest.skip(".gpkg.zip not supported for gdal < 3.7.0")
66
68
  testfile = prepare_testfile(naturalearth_lowres, dst_dir=tmp_path, ext=ext)
67
69
 
68
- assert testfile.suffix == ext
69
70
  assert testfile.exists()
70
71
  meta, _, geometry, fields = read(testfile)
71
72
 
@@ -703,6 +704,9 @@ def test_write_append(tmp_path, naturalearth_lowres, ext):
703
704
  if ext in (".geojsonl", ".geojsons") and __gdal_version__ < (3, 6, 0):
704
705
  pytest.skip("Append to GeoJSONSeq only available for GDAL >= 3.6.0")
705
706
 
707
+ if ext == ".gpkg.zip":
708
+ pytest.skip("Append to .gpkg.zip is not supported")
709
+
706
710
  meta, _, geometry, field_data = read(naturalearth_lowres)
707
711
 
708
712
  # coerce output layer to MultiPolygon to avoid mixed type errors
pyogrio/util.py CHANGED
@@ -9,6 +9,8 @@ from urllib.parse import urlparse
9
9
 
10
10
  from pyogrio._vsi import vsimem_rmtree_toplevel as _vsimem_rmtree_toplevel
11
11
 
12
+ MULTI_EXTENSIONS = (".gpkg.zip", ".shp.zip")
13
+
12
14
 
13
15
  def get_vsi_path_or_buffer(path_or_buffer):
14
16
  """Get VSI-prefixed path or bytes buffer depending on type of path_or_buffer.
@@ -68,15 +70,23 @@ def vsi_path(path: Union[str, Path]) -> str:
68
70
  # Windows drive letters (e.g. "C:\") confuse `urlparse` as they look like
69
71
  # URL schemes
70
72
  if sys.platform == "win32" and re.match("^[a-zA-Z]\\:", path):
73
+ # If it is not a zip file or it is multi-extension zip file that is directly
74
+ # supported by a GDAL driver, return the path as is.
71
75
  if not path.split("!")[0].endswith(".zip"):
72
76
  return path
77
+ if path.split("!")[0].endswith(MULTI_EXTENSIONS):
78
+ return path
73
79
 
74
80
  # prefix then allow to proceed with remaining parsing
75
81
  path = f"zip://{path}"
76
82
 
77
83
  path, archive, scheme = _parse_uri(path)
78
84
 
79
- if scheme or archive or path.endswith(".zip"):
85
+ if (
86
+ scheme
87
+ or archive
88
+ or (path.endswith(".zip") and not path.endswith(MULTI_EXTENSIONS))
89
+ ):
80
90
  return _construct_vsi_path(path, archive, scheme)
81
91
 
82
92
  return path
@@ -146,7 +156,10 @@ def _construct_vsi_path(path, archive, scheme) -> str:
146
156
  suffix = ""
147
157
  schemes = scheme.split("+")
148
158
 
149
- if "zip" not in schemes and (archive.endswith(".zip") or path.endswith(".zip")):
159
+ if "zip" not in schemes and (
160
+ archive.endswith(".zip")
161
+ or (path.endswith(".zip") and not path.endswith(MULTI_EXTENSIONS))
162
+ ):
150
163
  schemes.insert(0, "zip")
151
164
 
152
165
  if schemes: