pyogrio 0.10.0__cp313-cp313-manylinux_2_28_aarch64.whl → 0.11.1__cp313-cp313-manylinux_2_28_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyogrio might be problematic. Click here for more details.

Files changed (49) hide show
  1. pyogrio/__init__.py +12 -10
  2. pyogrio/_compat.py +8 -0
  3. pyogrio/_err.cpython-313-aarch64-linux-gnu.so +0 -0
  4. pyogrio/_geometry.cpython-313-aarch64-linux-gnu.so +0 -0
  5. pyogrio/_io.cpython-313-aarch64-linux-gnu.so +0 -0
  6. pyogrio/_ogr.cpython-313-aarch64-linux-gnu.so +0 -0
  7. pyogrio/_version.py +3 -3
  8. pyogrio/_vsi.cpython-313-aarch64-linux-gnu.so +0 -0
  9. pyogrio/gdal_data/GDAL-targets-release.cmake +3 -3
  10. pyogrio/gdal_data/GDAL-targets.cmake +2 -2
  11. pyogrio/gdal_data/GDALConfigVersion.cmake +3 -3
  12. pyogrio/gdal_data/gdalinfo_output.schema.json +3 -3
  13. pyogrio/gdal_data/gdaltileindex.xsd +1 -17
  14. pyogrio/gdal_data/gdalvrt.xsd +48 -41
  15. pyogrio/gdal_data/nitf_spec.xml +1 -17
  16. pyogrio/gdal_data/nitf_spec.xsd +1 -17
  17. pyogrio/gdal_data/ogrvrt.xsd +1 -17
  18. pyogrio/gdal_data/osmconf.ini +3 -1
  19. pyogrio/gdal_data/pdfcomposition.xsd +1 -17
  20. pyogrio/gdal_data/template_tiles.mapml +28 -0
  21. pyogrio/gdal_data/vcpkg.spdx.json +32 -27
  22. pyogrio/gdal_data/vcpkg_abi_info.txt +27 -26
  23. pyogrio/gdal_data/vdv452.xml +1 -17
  24. pyogrio/gdal_data/vdv452.xsd +1 -17
  25. pyogrio/geopandas.py +91 -43
  26. pyogrio/proj_data/ITRF2014 +1 -1
  27. pyogrio/proj_data/ITRF2020 +91 -0
  28. pyogrio/proj_data/proj-config-version.cmake +3 -3
  29. pyogrio/proj_data/proj-config.cmake +1 -1
  30. pyogrio/proj_data/proj-targets.cmake +3 -3
  31. pyogrio/proj_data/proj.db +0 -0
  32. pyogrio/proj_data/proj.ini +11 -3
  33. pyogrio/proj_data/proj4-targets.cmake +3 -3
  34. pyogrio/proj_data/usage +7 -2
  35. pyogrio/proj_data/vcpkg.spdx.json +27 -22
  36. pyogrio/proj_data/vcpkg_abi_info.txt +15 -14
  37. pyogrio/tests/conftest.py +8 -0
  38. pyogrio/tests/test_arrow.py +3 -0
  39. pyogrio/tests/test_core.py +8 -4
  40. pyogrio/tests/test_geopandas_io.py +270 -45
  41. pyogrio/tests/test_path.py +10 -0
  42. pyogrio/tests/test_raw_io.py +6 -2
  43. pyogrio/util.py +15 -2
  44. {pyogrio-0.10.0.dist-info → pyogrio-0.11.1.dist-info}/METADATA +32 -37
  45. {pyogrio-0.10.0.dist-info → pyogrio-0.11.1.dist-info}/RECORD +202 -200
  46. {pyogrio-0.10.0.dist-info → pyogrio-0.11.1.dist-info}/WHEEL +1 -1
  47. pyogrio.libs/{libgdal-b0847c7b.so.35.3.9.1 → libgdal-3af0c888.so.36.3.10.3} +0 -0
  48. {pyogrio-0.10.0.dist-info → pyogrio-0.11.1.dist-info/licenses}/LICENSE +0 -0
  49. {pyogrio-0.10.0.dist-info → pyogrio-0.11.1.dist-info}/top_level.txt +0 -0
@@ -12,10 +12,20 @@ from pyogrio import (
12
12
  list_drivers,
13
13
  list_layers,
14
14
  read_info,
15
+ set_gdal_config_options,
15
16
  vsi_listtree,
16
17
  vsi_unlink,
17
18
  )
18
- from pyogrio._compat import HAS_ARROW_WRITE_API, HAS_PYPROJ, PANDAS_GE_15
19
+ from pyogrio._compat import (
20
+ GDAL_GE_37,
21
+ GDAL_GE_311,
22
+ GDAL_GE_352,
23
+ HAS_ARROW_WRITE_API,
24
+ HAS_PYPROJ,
25
+ PANDAS_GE_15,
26
+ PANDAS_GE_30,
27
+ SHAPELY_GE_21,
28
+ )
19
29
  from pyogrio.errors import DataLayerError, DataSourceError, FeatureError, GeometryError
20
30
  from pyogrio.geopandas import PANDAS_GE_20, read_dataframe, write_dataframe
21
31
  from pyogrio.raw import (
@@ -93,8 +103,20 @@ def spatialite_available(path):
93
103
  return False
94
104
 
95
105
 
96
- @pytest.mark.parametrize("encoding", ["utf-8", "cp1252", None])
97
- def test_read_csv_encoding(tmp_path, encoding):
106
+ @pytest.mark.parametrize(
107
+ "encoding, arrow",
108
+ [
109
+ ("utf-8", False),
110
+ pytest.param("utf-8", True, marks=requires_pyarrow_api),
111
+ ("cp1252", False),
112
+ (None, False),
113
+ ],
114
+ )
115
+ def test_read_csv_encoding(tmp_path, encoding, arrow):
116
+ """ "Test reading CSV files with different encodings.
117
+
118
+ Arrow only supports utf-8 encoding.
119
+ """
98
120
  # Write csv test file. Depending on the os this will be written in a different
99
121
  # encoding: for linux and macos this is utf-8, for windows it is cp1252.
100
122
  csv_path = tmp_path / "test.csv"
@@ -105,7 +127,7 @@ def test_read_csv_encoding(tmp_path, encoding):
105
127
  # Read csv. The data should be read with the same default encoding as the csv file
106
128
  # was written in, but should have been converted to utf-8 in the dataframe returned.
107
129
  # Hence, the asserts below, with strings in utf-8, be OK.
108
- df = read_dataframe(csv_path, encoding=encoding)
130
+ df = read_dataframe(csv_path, encoding=encoding, use_arrow=arrow)
109
131
 
110
132
  assert len(df) == 1
111
133
  assert df.columns.tolist() == ["näme", "city"]
@@ -117,19 +139,29 @@ def test_read_csv_encoding(tmp_path, encoding):
117
139
  locale.getpreferredencoding().upper() == "UTF-8",
118
140
  reason="test requires non-UTF-8 default platform",
119
141
  )
120
- def test_read_csv_platform_encoding(tmp_path):
121
- """verify that read defaults to platform encoding; only works on Windows (CP1252)"""
142
+ def test_read_csv_platform_encoding(tmp_path, use_arrow):
143
+ """Verify that read defaults to platform encoding; only works on Windows (CP1252).
144
+
145
+ When use_arrow=True, reading an non-UTF8 fails.
146
+ """
122
147
  csv_path = tmp_path / "test.csv"
123
148
  with open(csv_path, "w", encoding=locale.getpreferredencoding()) as csv:
124
149
  csv.write("näme,city\n")
125
150
  csv.write("Wilhelm Röntgen,Zürich\n")
126
151
 
127
- df = read_dataframe(csv_path)
152
+ if use_arrow:
153
+ with pytest.raises(
154
+ DataSourceError,
155
+ match="; please use_arrow=False",
156
+ ):
157
+ df = read_dataframe(csv_path, use_arrow=use_arrow)
158
+ else:
159
+ df = read_dataframe(csv_path, use_arrow=use_arrow)
128
160
 
129
- assert len(df) == 1
130
- assert df.columns.tolist() == ["näme", "city"]
131
- assert df.city.tolist() == ["Zürich"]
132
- assert df.näme.tolist() == ["Wilhelm Röntgen"]
161
+ assert len(df) == 1
162
+ assert df.columns.tolist() == ["näme", "city"]
163
+ assert df.city.tolist() == ["Zürich"]
164
+ assert df.näme.tolist() == ["Wilhelm Röntgen"]
133
165
 
134
166
 
135
167
  def test_read_dataframe(naturalearth_lowres_all_ext):
@@ -227,11 +259,32 @@ def test_read_force_2d(tmp_path, use_arrow):
227
259
  assert not df.iloc[0].geometry.has_z
228
260
 
229
261
 
262
+ @pytest.mark.skipif(
263
+ not GDAL_GE_352,
264
+ reason="gdal >= 3.5.2 needed to use OGR_GEOJSON_MAX_OBJ_SIZE with a float value",
265
+ )
266
+ def test_read_geojson_error(naturalearth_lowres_geojson, use_arrow):
267
+ try:
268
+ set_gdal_config_options({"OGR_GEOJSON_MAX_OBJ_SIZE": 0.01})
269
+ with pytest.raises(
270
+ DataSourceError,
271
+ match="Failed to read GeoJSON data; .* GeoJSON object too complex",
272
+ ):
273
+ read_dataframe(naturalearth_lowres_geojson, use_arrow=use_arrow)
274
+ finally:
275
+ set_gdal_config_options({"OGR_GEOJSON_MAX_OBJ_SIZE": None})
276
+
277
+
230
278
  def test_read_layer(tmp_path, use_arrow):
231
279
  filename = tmp_path / "test.gpkg"
232
280
 
233
281
  # create a multilayer GPKG
234
282
  expected1 = gp.GeoDataFrame(geometry=[Point(0, 0)], crs="EPSG:4326")
283
+ if use_arrow:
284
+ # TODO this needs to be fixed on the geopandas side (to ensure the
285
+ # GeoDataFrame() constructor does this), when use_arrow we already
286
+ # get columns Index with string dtype
287
+ expected1.columns = expected1.columns.astype("str")
235
288
  write_dataframe(
236
289
  expected1,
237
290
  filename,
@@ -239,6 +292,8 @@ def test_read_layer(tmp_path, use_arrow):
239
292
  )
240
293
 
241
294
  expected2 = gp.GeoDataFrame(geometry=[Point(1, 1)], crs="EPSG:4326")
295
+ if use_arrow:
296
+ expected2.columns = expected2.columns.astype("str")
242
297
  write_dataframe(expected2, filename, layer="layer2", append=True)
243
298
 
244
299
  assert np.array_equal(
@@ -361,7 +416,7 @@ def test_read_null_values(tmp_path, use_arrow):
361
416
  df = read_dataframe(filename, use_arrow=use_arrow, read_geometry=False)
362
417
 
363
418
  # make sure that Null values are preserved
364
- assert np.array_equal(df.col.values, expected.col.values)
419
+ assert df["col"].isna().all()
365
420
 
366
421
 
367
422
  def test_read_fid_as_index(naturalearth_lowres_all_ext, use_arrow):
@@ -438,10 +493,17 @@ def test_read_where_invalid(request, naturalearth_lowres_all_ext, use_arrow):
438
493
  if use_arrow and naturalearth_lowres_all_ext.suffix == ".gpkg":
439
494
  # https://github.com/OSGeo/gdal/issues/8492
440
495
  request.node.add_marker(pytest.mark.xfail(reason="GDAL doesn't error for GPGK"))
441
- with pytest.raises(ValueError, match="Invalid SQL"):
442
- read_dataframe(
443
- naturalearth_lowres_all_ext, use_arrow=use_arrow, where="invalid"
444
- )
496
+
497
+ if naturalearth_lowres_all_ext.suffix == ".gpkg" and __gdal_version__ >= (3, 11, 0):
498
+ with pytest.raises(DataLayerError, match="no such column"):
499
+ read_dataframe(
500
+ naturalearth_lowres_all_ext, use_arrow=use_arrow, where="invalid"
501
+ )
502
+ else:
503
+ with pytest.raises(ValueError, match="Invalid SQL"):
504
+ read_dataframe(
505
+ naturalearth_lowres_all_ext, use_arrow=use_arrow, where="invalid"
506
+ )
445
507
 
446
508
 
447
509
  def test_read_where_ignored_field(naturalearth_lowres, use_arrow):
@@ -675,6 +737,13 @@ def test_read_skip_features(naturalearth_lowres_all_ext, use_arrow, skip_feature
675
737
  # In .geojsonl the vertices are reordered, so normalize
676
738
  is_jsons = ext == ".geojsonl"
677
739
 
740
+ if skip_features == 200 and not use_arrow:
741
+ # result is an empty dataframe, so no proper dtype inference happens
742
+ # for the numpy object dtype arrays
743
+ df[["continent", "name", "iso_a3"]] = df[
744
+ ["continent", "name", "iso_a3"]
745
+ ].astype("str")
746
+
678
747
  assert_geodataframe_equal(
679
748
  df,
680
749
  expected,
@@ -690,12 +759,22 @@ def test_read_negative_skip_features(naturalearth_lowres, use_arrow):
690
759
  read_dataframe(naturalearth_lowres, skip_features=-1, use_arrow=use_arrow)
691
760
 
692
761
 
762
+ @pytest.mark.parametrize("skip_features", [0, 10, 200])
693
763
  @pytest.mark.parametrize("max_features", [10, 100])
694
- def test_read_max_features(naturalearth_lowres_all_ext, use_arrow, max_features):
764
+ def test_read_max_features(
765
+ naturalearth_lowres_all_ext, use_arrow, max_features, skip_features
766
+ ):
695
767
  ext = naturalearth_lowres_all_ext.suffix
696
- expected = read_dataframe(naturalearth_lowres_all_ext).iloc[:max_features]
768
+ expected = (
769
+ read_dataframe(naturalearth_lowres_all_ext)
770
+ .iloc[skip_features : skip_features + max_features]
771
+ .reset_index(drop=True)
772
+ )
697
773
  df = read_dataframe(
698
- naturalearth_lowres_all_ext, max_features=max_features, use_arrow=use_arrow
774
+ naturalearth_lowres_all_ext,
775
+ skip_features=skip_features,
776
+ max_features=max_features,
777
+ use_arrow=use_arrow,
699
778
  )
700
779
 
701
780
  assert len(df) == len(expected)
@@ -706,6 +785,13 @@ def test_read_max_features(naturalearth_lowres_all_ext, use_arrow, max_features)
706
785
  # In .geojsonl the vertices are reordered, so normalize
707
786
  is_jsons = ext == ".geojsonl"
708
787
 
788
+ if len(expected) == 0 and not use_arrow:
789
+ # for pandas >= 3, the column has string dtype but when reading it as
790
+ # empty result, it gets inferred as object dtype
791
+ expected["continent"] = expected["continent"].astype("object")
792
+ expected["name"] = expected["name"].astype("object")
793
+ expected["iso_a3"] = expected["iso_a3"].astype("object")
794
+
709
795
  assert_geodataframe_equal(
710
796
  df,
711
797
  expected,
@@ -943,9 +1029,20 @@ def test_read_sql_dialect_sqlite_gpkg(naturalearth_lowres, use_arrow):
943
1029
  assert df.iloc[0].geometry.area > area_canada
944
1030
 
945
1031
 
946
- @pytest.mark.parametrize("encoding", ["utf-8", "cp1252", None])
947
- def test_write_csv_encoding(tmp_path, encoding):
948
- """Test if write_dataframe uses the default encoding correctly."""
1032
+ @pytest.mark.parametrize(
1033
+ "encoding, arrow",
1034
+ [
1035
+ ("utf-8", False),
1036
+ pytest.param("utf-8", True, marks=requires_arrow_write_api),
1037
+ ("cp1252", False),
1038
+ (None, False),
1039
+ ],
1040
+ )
1041
+ def test_write_csv_encoding(tmp_path, encoding, arrow):
1042
+ """Test if write_dataframe uses the default encoding correctly.
1043
+
1044
+ Arrow only supports utf-8 encoding.
1045
+ """
949
1046
  # Write csv test file. Depending on the os this will be written in a different
950
1047
  # encoding: for linux and macos this is utf-8, for windows it is cp1252.
951
1048
  csv_path = tmp_path / "test.csv"
@@ -958,7 +1055,7 @@ def test_write_csv_encoding(tmp_path, encoding):
958
1055
  # same encoding as above.
959
1056
  df = pd.DataFrame({"näme": ["Wilhelm Röntgen"], "city": ["Zürich"]})
960
1057
  csv_pyogrio_path = tmp_path / "test_pyogrio.csv"
961
- write_dataframe(df, csv_pyogrio_path, encoding=encoding)
1058
+ write_dataframe(df, csv_pyogrio_path, encoding=encoding, use_arrow=arrow)
962
1059
 
963
1060
  # Check if the text files written both ways can be read again and give same result.
964
1061
  with open(csv_path, encoding=encoding) as csv:
@@ -976,6 +1073,48 @@ def test_write_csv_encoding(tmp_path, encoding):
976
1073
  assert csv_bytes == csv_pyogrio_bytes
977
1074
 
978
1075
 
1076
+ @pytest.mark.parametrize(
1077
+ "ext, fid_column, fid_param_value",
1078
+ [
1079
+ (".gpkg", "fid", None),
1080
+ (".gpkg", "FID", None),
1081
+ (".sqlite", "ogc_fid", None),
1082
+ (".gpkg", "fid_custom", "fid_custom"),
1083
+ (".gpkg", "FID_custom", "fid_custom"),
1084
+ (".sqlite", "ogc_fid_custom", "ogc_fid_custom"),
1085
+ ],
1086
+ )
1087
+ @pytest.mark.requires_arrow_write_api
1088
+ def test_write_custom_fids(tmp_path, ext, fid_column, fid_param_value, use_arrow):
1089
+ """Test to specify FIDs to save when writing to a file.
1090
+
1091
+ Saving custom FIDs is only supported for formats that actually store the FID, like
1092
+ e.g. GPKG and SQLite. The fid_column name check is case-insensitive.
1093
+
1094
+ Typically, GDAL supports using a custom FID column for these file formats via a
1095
+ `FID` layer creation option, which is also tested here. If `fid_param_value` is
1096
+ specified (not None), an `fid` parameter is passed to `write_dataframe`, causing
1097
+ GDAL to use the column name specified for the FID.
1098
+ """
1099
+ input_gdf = gp.GeoDataFrame(
1100
+ {fid_column: [5]}, geometry=[shapely.Point(0, 0)], crs="epsg:4326"
1101
+ )
1102
+ kwargs = {}
1103
+ if fid_param_value is not None:
1104
+ kwargs["fid"] = fid_param_value
1105
+ path = tmp_path / f"test{ext}"
1106
+
1107
+ write_dataframe(input_gdf, path, use_arrow=use_arrow, **kwargs)
1108
+
1109
+ assert path.exists()
1110
+ output_gdf = read_dataframe(path, fid_as_index=True, use_arrow=use_arrow)
1111
+ output_gdf = output_gdf.reset_index()
1112
+
1113
+ # pyogrio always sets "fid" as index name with `fid_as_index`
1114
+ expected_gdf = input_gdf.rename(columns={fid_column: "fid"})
1115
+ assert_geodataframe_equal(output_gdf, expected_gdf)
1116
+
1117
+
979
1118
  @pytest.mark.parametrize("ext", ALL_EXTS)
980
1119
  @pytest.mark.requires_arrow_write_api
981
1120
  def test_write_dataframe(tmp_path, naturalearth_lowres, ext, use_arrow):
@@ -1087,16 +1226,38 @@ def test_write_dataframe_index(tmp_path, naturalearth_lowres, use_arrow):
1087
1226
 
1088
1227
 
1089
1228
  @pytest.mark.parametrize("ext", [ext for ext in ALL_EXTS if ext not in ".geojsonl"])
1229
+ @pytest.mark.parametrize(
1230
+ "columns, dtype",
1231
+ [
1232
+ ([], None),
1233
+ (["col_int"], np.int64),
1234
+ (["col_float"], np.float64),
1235
+ (["col_object"], object),
1236
+ ],
1237
+ )
1090
1238
  @pytest.mark.requires_arrow_write_api
1091
- def test_write_empty_dataframe(tmp_path, ext, use_arrow):
1092
- expected = gp.GeoDataFrame(geometry=[], crs=4326)
1239
+ def test_write_empty_dataframe(tmp_path, ext, columns, dtype, use_arrow):
1240
+ """Test writing dataframe with no rows.
1093
1241
 
1242
+ With use_arrow, object type columns with no rows are converted to null type columns
1243
+ by pyarrow, but null columns are not supported by GDAL. Added to test fix for #513.
1244
+ """
1245
+ expected = gp.GeoDataFrame(geometry=[], columns=columns, dtype=dtype, crs=4326)
1094
1246
  filename = tmp_path / f"test{ext}"
1095
1247
  write_dataframe(expected, filename, use_arrow=use_arrow)
1096
1248
 
1097
1249
  assert filename.exists()
1098
- df = read_dataframe(filename)
1099
- assert_geodataframe_equal(df, expected)
1250
+ df = read_dataframe(filename, use_arrow=use_arrow)
1251
+
1252
+ # Check result
1253
+ # For older pandas versions, the index is created as Object dtype but read as
1254
+ # RangeIndex, so don't check the index dtype in that case.
1255
+ check_index_type = True if PANDAS_GE_20 else False
1256
+ # with pandas 3+ and reading through arrow, we preserve the string dtype
1257
+ # (no proper dtype inference happens for the empty numpy object dtype arrays)
1258
+ if use_arrow and dtype is object:
1259
+ expected["col_object"] = expected["col_object"].astype("str")
1260
+ assert_geodataframe_equal(df, expected, check_index_type=check_index_type)
1100
1261
 
1101
1262
 
1102
1263
  def test_write_empty_geometry(tmp_path):
@@ -1116,6 +1277,28 @@ def test_write_empty_geometry(tmp_path):
1116
1277
  assert_geodataframe_equal(df, expected)
1117
1278
 
1118
1279
 
1280
+ @pytest.mark.requires_arrow_write_api
1281
+ def test_write_None_string_column(tmp_path, use_arrow):
1282
+ """Test pandas object columns with all None values.
1283
+
1284
+ With use_arrow, such columns are converted to null type columns by pyarrow, but null
1285
+ columns are not supported by GDAL. Added to test fix for #513.
1286
+ """
1287
+ gdf = gp.GeoDataFrame({"object_col": [None]}, geometry=[Point(0, 0)], crs=4326)
1288
+ filename = tmp_path / "test.gpkg"
1289
+
1290
+ write_dataframe(gdf, filename, use_arrow=use_arrow)
1291
+ assert filename.exists()
1292
+
1293
+ result_gdf = read_dataframe(filename, use_arrow=use_arrow)
1294
+ if PANDAS_GE_30 and use_arrow:
1295
+ assert result_gdf.object_col.dtype == "str"
1296
+ gdf["object_col"] = gdf["object_col"].astype("str")
1297
+ else:
1298
+ assert result_gdf.object_col.dtype == object
1299
+ assert_geodataframe_equal(result_gdf, gdf)
1300
+
1301
+
1119
1302
  @pytest.mark.parametrize("ext", [".geojsonl", ".geojsons"])
1120
1303
  @pytest.mark.requires_arrow_write_api
1121
1304
  def test_write_read_empty_dataframe_unsupported(tmp_path, ext, use_arrow):
@@ -1521,6 +1704,30 @@ def test_custom_crs_io(tmp_path, naturalearth_lowres_all_ext, use_arrow):
1521
1704
  assert df.crs.equals(expected.crs)
1522
1705
 
1523
1706
 
1707
+ @pytest.mark.parametrize("ext", [".gpkg.zip", ".shp.zip", ".shz"])
1708
+ @pytest.mark.requires_arrow_write_api
1709
+ def test_write_read_zipped_ext(tmp_path, naturalearth_lowres, ext, use_arrow):
1710
+ """Run a basic read and write test on some extra (zipped) extensions."""
1711
+ if ext == ".gpkg.zip" and not GDAL_GE_37:
1712
+ pytest.skip(".gpkg.zip support requires GDAL >= 3.7")
1713
+
1714
+ input_gdf = read_dataframe(naturalearth_lowres)
1715
+ output_path = tmp_path / f"test{ext}"
1716
+
1717
+ write_dataframe(input_gdf, output_path, use_arrow=use_arrow)
1718
+
1719
+ assert output_path.exists()
1720
+ result_gdf = read_dataframe(output_path)
1721
+
1722
+ geometry_types = result_gdf.geometry.type.unique()
1723
+ if DRIVERS[ext] in DRIVERS_NO_MIXED_SINGLE_MULTI:
1724
+ assert list(geometry_types) == ["MultiPolygon"]
1725
+ else:
1726
+ assert set(geometry_types) == {"MultiPolygon", "Polygon"}
1727
+
1728
+ assert_geodataframe_equal(result_gdf, input_gdf, check_index_type=False)
1729
+
1730
+
1524
1731
  def test_write_read_mixed_column_values(tmp_path):
1525
1732
  # use_arrow=True is tested separately below
1526
1733
  mixed_values = ["test", 1.0, 1, datetime.now(), None, np.nan]
@@ -1532,11 +1739,13 @@ def test_write_read_mixed_column_values(tmp_path):
1532
1739
  write_dataframe(test_gdf, output_path)
1533
1740
  output_gdf = read_dataframe(output_path)
1534
1741
  assert len(test_gdf) == len(output_gdf)
1535
- for idx, value in enumerate(mixed_values):
1536
- if value in (None, np.nan):
1537
- assert output_gdf["mixed"][idx] is None
1538
- else:
1539
- assert output_gdf["mixed"][idx] == str(value)
1742
+ # mixed values as object dtype are currently written as strings
1743
+ # (but preserving nulls)
1744
+ expected = pd.Series(
1745
+ [str(value) if value not in (None, np.nan) else None for value in mixed_values],
1746
+ name="mixed",
1747
+ )
1748
+ assert_series_equal(output_gdf["mixed"], expected)
1540
1749
 
1541
1750
 
1542
1751
  @requires_arrow_write_api
@@ -1569,8 +1778,8 @@ def test_write_read_null(tmp_path, use_arrow):
1569
1778
  assert pd.isna(result_gdf["float64"][1])
1570
1779
  assert pd.isna(result_gdf["float64"][2])
1571
1780
  assert result_gdf["object_str"][0] == "test"
1572
- assert result_gdf["object_str"][1] is None
1573
- assert result_gdf["object_str"][2] is None
1781
+ assert pd.isna(result_gdf["object_str"][1])
1782
+ assert pd.isna(result_gdf["object_str"][2])
1574
1783
 
1575
1784
 
1576
1785
  @pytest.mark.requires_arrow_write_api
@@ -1714,23 +1923,29 @@ def test_write_geometry_z_types_auto(
1714
1923
 
1715
1924
 
1716
1925
  @pytest.mark.parametrize(
1717
- "on_invalid, message",
1926
+ "on_invalid, message, expected_wkt",
1718
1927
  [
1719
1928
  (
1720
1929
  "warn",
1721
1930
  "Invalid WKB: geometry is returned as None. IllegalArgumentException: "
1722
- "Invalid number of points in LinearRing found 2 - must be 0 or >=",
1931
+ "Points of LinearRing do not form a closed linestring",
1932
+ None,
1723
1933
  ),
1724
- ("raise", "Invalid number of points in LinearRing found 2 - must be 0 or >="),
1725
- ("ignore", None),
1934
+ ("raise", "Points of LinearRing do not form a closed linestring", None),
1935
+ ("ignore", None, None),
1936
+ ("fix", None, "POLYGON ((0 0, 0 1, 0 0))"),
1726
1937
  ],
1727
1938
  )
1728
- def test_read_invalid_poly_ring(tmp_path, use_arrow, on_invalid, message):
1939
+ @pytest.mark.filterwarnings("ignore:Non closed ring detected:RuntimeWarning")
1940
+ def test_read_invalid_poly_ring(tmp_path, use_arrow, on_invalid, message, expected_wkt):
1941
+ if on_invalid == "fix" and not SHAPELY_GE_21:
1942
+ pytest.skip("on_invalid=fix not available for Shapely < 2.1")
1943
+
1729
1944
  if on_invalid == "raise":
1730
1945
  handler = pytest.raises(shapely.errors.GEOSException, match=message)
1731
1946
  elif on_invalid == "warn":
1732
1947
  handler = pytest.warns(match=message)
1733
- elif on_invalid == "ignore":
1948
+ elif on_invalid in ("fix", "ignore"):
1734
1949
  handler = contextlib.nullcontext()
1735
1950
  else:
1736
1951
  raise ValueError(f"unknown value for on_invalid: {on_invalid}")
@@ -1744,7 +1959,7 @@ def test_read_invalid_poly_ring(tmp_path, use_arrow, on_invalid, message):
1744
1959
  "properties": {},
1745
1960
  "geometry": {
1746
1961
  "type": "Polygon",
1747
- "coordinates": [ [ [0, 0], [0, 0] ] ]
1962
+ "coordinates": [ [ [0, 0], [0, 1] ] ]
1748
1963
  }
1749
1964
  }
1750
1965
  ]
@@ -1760,7 +1975,10 @@ def test_read_invalid_poly_ring(tmp_path, use_arrow, on_invalid, message):
1760
1975
  use_arrow=use_arrow,
1761
1976
  on_invalid=on_invalid,
1762
1977
  )
1763
- df.geometry.isnull().all()
1978
+ if expected_wkt is None:
1979
+ assert df.geometry.iloc[0] is None
1980
+ else:
1981
+ assert df.geometry.iloc[0].wkt == expected_wkt
1764
1982
 
1765
1983
 
1766
1984
  def test_read_multisurface(multisurface_file, use_arrow):
@@ -1792,6 +2010,10 @@ def test_read_dataset_kwargs(nested_geojson_file, use_arrow):
1792
2010
  geometry=[shapely.Point(0, 0)],
1793
2011
  crs="EPSG:4326",
1794
2012
  )
2013
+ if GDAL_GE_311 and use_arrow:
2014
+ # GDAL 3.11 started to use json extension type, which is not yet handled
2015
+ # correctly in the arrow->pandas conversion (using object instead of str dtype)
2016
+ expected["intermediate_level"] = expected["intermediate_level"].astype(object)
1795
2017
 
1796
2018
  assert_geodataframe_equal(df, expected)
1797
2019
 
@@ -1837,7 +2059,7 @@ def test_write_nullable_dtypes(tmp_path, use_arrow):
1837
2059
  expected["col2"] = expected["col2"].astype("float64")
1838
2060
  expected["col3"] = expected["col3"].astype("float32")
1839
2061
  expected["col4"] = expected["col4"].astype("float64")
1840
- expected["col5"] = expected["col5"].astype(object)
2062
+ expected["col5"] = expected["col5"].astype("str")
1841
2063
  expected.loc[1, "col5"] = None # pandas converts to pd.NA on line above
1842
2064
  assert_geodataframe_equal(output_gdf, expected)
1843
2065
 
@@ -2160,7 +2382,10 @@ def test_non_utf8_encoding_io_shapefile(tmp_path, encoded_text, use_arrow):
2160
2382
 
2161
2383
  if use_arrow:
2162
2384
  # pyarrow cannot decode column name with incorrect encoding
2163
- with pytest.raises(UnicodeDecodeError):
2385
+ with pytest.raises(
2386
+ DataSourceError,
2387
+ match="The file being read is not encoded in UTF-8; please use_arrow=False",
2388
+ ):
2164
2389
  read_dataframe(output_path, use_arrow=True)
2165
2390
  else:
2166
2391
  bad = read_dataframe(output_path, use_arrow=False)
@@ -2257,7 +2482,7 @@ def test_write_kml_file_coordinate_order(tmp_path, use_arrow):
2257
2482
  if "LIBKML" in list_drivers():
2258
2483
  # test appending to the existing file only if LIBKML is available
2259
2484
  # as it appears to fall back on LIBKML driver when appending.
2260
- points_append = [Point(70, 80), Point(90, 100), Point(110, 120)]
2485
+ points_append = [Point(7, 8), Point(9, 10), Point(11, 12)]
2261
2486
  gdf_append = gp.GeoDataFrame(geometry=points_append, crs="EPSG:4326")
2262
2487
 
2263
2488
  write_dataframe(
@@ -33,10 +33,20 @@ def change_cwd(path):
33
33
  [
34
34
  # local file paths that should be passed through as is
35
35
  ("data.gpkg", "data.gpkg"),
36
+ ("data.gpkg.zip", "data.gpkg.zip"),
37
+ ("data.shp.zip", "data.shp.zip"),
36
38
  (Path("data.gpkg"), "data.gpkg"),
39
+ (Path("data.gpkg.zip"), "data.gpkg.zip"),
40
+ (Path("data.shp.zip"), "data.shp.zip"),
37
41
  ("/home/user/data.gpkg", "/home/user/data.gpkg"),
42
+ ("/home/user/data.gpkg.zip", "/home/user/data.gpkg.zip"),
43
+ ("/home/user/data.shp.zip", "/home/user/data.shp.zip"),
38
44
  (r"C:\User\Documents\data.gpkg", r"C:\User\Documents\data.gpkg"),
45
+ (r"C:\User\Documents\data.gpkg.zip", r"C:\User\Documents\data.gpkg.zip"),
46
+ (r"C:\User\Documents\data.shp.zip", r"C:\User\Documents\data.shp.zip"),
39
47
  ("file:///home/user/data.gpkg", "/home/user/data.gpkg"),
48
+ ("file:///home/user/data.gpkg.zip", "/home/user/data.gpkg.zip"),
49
+ ("file:///home/user/data.shp.zip", "/home/user/data.shp.zip"),
40
50
  ("/home/folder # with hash/data.gpkg", "/home/folder # with hash/data.gpkg"),
41
51
  # cloud URIs
42
52
  ("https://testing/data.gpkg", "/vsicurl/https://testing/data.gpkg"),
@@ -17,7 +17,7 @@ from pyogrio import (
17
17
  read_info,
18
18
  set_gdal_config_options,
19
19
  )
20
- from pyogrio._compat import HAS_PYARROW, HAS_SHAPELY
20
+ from pyogrio._compat import GDAL_GE_37, HAS_PYARROW, HAS_SHAPELY
21
21
  from pyogrio.errors import DataLayerError, DataSourceError, FeatureError
22
22
  from pyogrio.raw import open_arrow, read, write
23
23
  from pyogrio.tests.conftest import (
@@ -63,9 +63,10 @@ def test_read(naturalearth_lowres):
63
63
  @pytest.mark.parametrize("ext", DRIVERS)
64
64
  def test_read_autodetect_driver(tmp_path, naturalearth_lowres, ext):
65
65
  # Test all supported autodetect drivers
66
+ if ext == ".gpkg.zip" and not GDAL_GE_37:
67
+ pytest.skip(".gpkg.zip not supported for gdal < 3.7.0")
66
68
  testfile = prepare_testfile(naturalearth_lowres, dst_dir=tmp_path, ext=ext)
67
69
 
68
- assert testfile.suffix == ext
69
70
  assert testfile.exists()
70
71
  meta, _, geometry, fields = read(testfile)
71
72
 
@@ -703,6 +704,9 @@ def test_write_append(tmp_path, naturalearth_lowres, ext):
703
704
  if ext in (".geojsonl", ".geojsons") and __gdal_version__ < (3, 6, 0):
704
705
  pytest.skip("Append to GeoJSONSeq only available for GDAL >= 3.6.0")
705
706
 
707
+ if ext == ".gpkg.zip":
708
+ pytest.skip("Append to .gpkg.zip is not supported")
709
+
706
710
  meta, _, geometry, field_data = read(naturalearth_lowres)
707
711
 
708
712
  # coerce output layer to MultiPolygon to avoid mixed type errors
pyogrio/util.py CHANGED
@@ -9,6 +9,8 @@ from urllib.parse import urlparse
9
9
 
10
10
  from pyogrio._vsi import vsimem_rmtree_toplevel as _vsimem_rmtree_toplevel
11
11
 
12
+ MULTI_EXTENSIONS = (".gpkg.zip", ".shp.zip")
13
+
12
14
 
13
15
  def get_vsi_path_or_buffer(path_or_buffer):
14
16
  """Get VSI-prefixed path or bytes buffer depending on type of path_or_buffer.
@@ -68,15 +70,23 @@ def vsi_path(path: Union[str, Path]) -> str:
68
70
  # Windows drive letters (e.g. "C:\") confuse `urlparse` as they look like
69
71
  # URL schemes
70
72
  if sys.platform == "win32" and re.match("^[a-zA-Z]\\:", path):
73
+ # If it is not a zip file or it is multi-extension zip file that is directly
74
+ # supported by a GDAL driver, return the path as is.
71
75
  if not path.split("!")[0].endswith(".zip"):
72
76
  return path
77
+ if path.split("!")[0].endswith(MULTI_EXTENSIONS):
78
+ return path
73
79
 
74
80
  # prefix then allow to proceed with remaining parsing
75
81
  path = f"zip://{path}"
76
82
 
77
83
  path, archive, scheme = _parse_uri(path)
78
84
 
79
- if scheme or archive or path.endswith(".zip"):
85
+ if (
86
+ scheme
87
+ or archive
88
+ or (path.endswith(".zip") and not path.endswith(MULTI_EXTENSIONS))
89
+ ):
80
90
  return _construct_vsi_path(path, archive, scheme)
81
91
 
82
92
  return path
@@ -146,7 +156,10 @@ def _construct_vsi_path(path, archive, scheme) -> str:
146
156
  suffix = ""
147
157
  schemes = scheme.split("+")
148
158
 
149
- if "zip" not in schemes and (archive.endswith(".zip") or path.endswith(".zip")):
159
+ if "zip" not in schemes and (
160
+ archive.endswith(".zip")
161
+ or (path.endswith(".zip") and not path.endswith(MULTI_EXTENSIONS))
162
+ ):
150
163
  schemes.insert(0, "zip")
151
164
 
152
165
  if schemes: