pyogrio 0.10.0__cp312-cp312-manylinux_2_28_aarch64.whl → 0.11.0__cp312-cp312-manylinux_2_28_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyogrio might be problematic. Click here for more details.
- pyogrio/__init__.py +9 -9
- pyogrio/_compat.py +8 -0
- pyogrio/_err.cpython-312-aarch64-linux-gnu.so +0 -0
- pyogrio/_geometry.cpython-312-aarch64-linux-gnu.so +0 -0
- pyogrio/_io.cpython-312-aarch64-linux-gnu.so +0 -0
- pyogrio/_ogr.cpython-312-aarch64-linux-gnu.so +0 -0
- pyogrio/_version.py +3 -3
- pyogrio/_vsi.cpython-312-aarch64-linux-gnu.so +0 -0
- pyogrio/gdal_data/GDAL-targets-release.cmake +3 -3
- pyogrio/gdal_data/GDAL-targets.cmake +2 -2
- pyogrio/gdal_data/GDALConfigVersion.cmake +3 -3
- pyogrio/gdal_data/gdalinfo_output.schema.json +3 -3
- pyogrio/gdal_data/gdaltileindex.xsd +1 -17
- pyogrio/gdal_data/gdalvrt.xsd +48 -41
- pyogrio/gdal_data/nitf_spec.xml +1 -17
- pyogrio/gdal_data/nitf_spec.xsd +1 -17
- pyogrio/gdal_data/ogrvrt.xsd +1 -17
- pyogrio/gdal_data/osmconf.ini +3 -1
- pyogrio/gdal_data/pdfcomposition.xsd +1 -17
- pyogrio/gdal_data/template_tiles.mapml +28 -0
- pyogrio/gdal_data/vcpkg.spdx.json +32 -27
- pyogrio/gdal_data/vcpkg_abi_info.txt +27 -26
- pyogrio/gdal_data/vdv452.xml +1 -17
- pyogrio/gdal_data/vdv452.xsd +1 -17
- pyogrio/geopandas.py +91 -43
- pyogrio/proj_data/ITRF2014 +1 -1
- pyogrio/proj_data/ITRF2020 +91 -0
- pyogrio/proj_data/proj-config-version.cmake +3 -3
- pyogrio/proj_data/proj-config.cmake +1 -1
- pyogrio/proj_data/proj-targets.cmake +3 -3
- pyogrio/proj_data/proj.db +0 -0
- pyogrio/proj_data/proj.ini +11 -3
- pyogrio/proj_data/proj4-targets.cmake +3 -3
- pyogrio/proj_data/usage +7 -2
- pyogrio/proj_data/vcpkg.spdx.json +27 -22
- pyogrio/proj_data/vcpkg_abi_info.txt +15 -14
- pyogrio/tests/conftest.py +8 -0
- pyogrio/tests/test_arrow.py +3 -0
- pyogrio/tests/test_core.py +8 -4
- pyogrio/tests/test_geopandas_io.py +250 -42
- pyogrio/tests/test_path.py +10 -0
- pyogrio/tests/test_raw_io.py +6 -2
- pyogrio/util.py +15 -2
- {pyogrio-0.10.0.dist-info → pyogrio-0.11.0.dist-info}/METADATA +32 -37
- {pyogrio-0.10.0.dist-info → pyogrio-0.11.0.dist-info}/RECORD +202 -200
- {pyogrio-0.10.0.dist-info → pyogrio-0.11.0.dist-info}/WHEEL +1 -1
- pyogrio.libs/{libgdal-b0847c7b.so.35.3.9.1 → libgdal-4bc0d15f.so.36.3.10.3} +0 -0
- {pyogrio-0.10.0.dist-info → pyogrio-0.11.0.dist-info/licenses}/LICENSE +0 -0
- {pyogrio-0.10.0.dist-info → pyogrio-0.11.0.dist-info}/top_level.txt +0 -0
|
@@ -12,10 +12,20 @@ from pyogrio import (
|
|
|
12
12
|
list_drivers,
|
|
13
13
|
list_layers,
|
|
14
14
|
read_info,
|
|
15
|
+
set_gdal_config_options,
|
|
15
16
|
vsi_listtree,
|
|
16
17
|
vsi_unlink,
|
|
17
18
|
)
|
|
18
|
-
from pyogrio._compat import
|
|
19
|
+
from pyogrio._compat import (
|
|
20
|
+
GDAL_GE_37,
|
|
21
|
+
GDAL_GE_311,
|
|
22
|
+
GDAL_GE_352,
|
|
23
|
+
HAS_ARROW_WRITE_API,
|
|
24
|
+
HAS_PYPROJ,
|
|
25
|
+
PANDAS_GE_15,
|
|
26
|
+
PANDAS_GE_30,
|
|
27
|
+
SHAPELY_GE_21,
|
|
28
|
+
)
|
|
19
29
|
from pyogrio.errors import DataLayerError, DataSourceError, FeatureError, GeometryError
|
|
20
30
|
from pyogrio.geopandas import PANDAS_GE_20, read_dataframe, write_dataframe
|
|
21
31
|
from pyogrio.raw import (
|
|
@@ -93,8 +103,20 @@ def spatialite_available(path):
|
|
|
93
103
|
return False
|
|
94
104
|
|
|
95
105
|
|
|
96
|
-
@pytest.mark.parametrize(
|
|
97
|
-
|
|
106
|
+
@pytest.mark.parametrize(
|
|
107
|
+
"encoding, arrow",
|
|
108
|
+
[
|
|
109
|
+
("utf-8", False),
|
|
110
|
+
pytest.param("utf-8", True, marks=requires_pyarrow_api),
|
|
111
|
+
("cp1252", False),
|
|
112
|
+
(None, False),
|
|
113
|
+
],
|
|
114
|
+
)
|
|
115
|
+
def test_read_csv_encoding(tmp_path, encoding, arrow):
|
|
116
|
+
""" "Test reading CSV files with different encodings.
|
|
117
|
+
|
|
118
|
+
Arrow only supports utf-8 encoding.
|
|
119
|
+
"""
|
|
98
120
|
# Write csv test file. Depending on the os this will be written in a different
|
|
99
121
|
# encoding: for linux and macos this is utf-8, for windows it is cp1252.
|
|
100
122
|
csv_path = tmp_path / "test.csv"
|
|
@@ -105,7 +127,7 @@ def test_read_csv_encoding(tmp_path, encoding):
|
|
|
105
127
|
# Read csv. The data should be read with the same default encoding as the csv file
|
|
106
128
|
# was written in, but should have been converted to utf-8 in the dataframe returned.
|
|
107
129
|
# Hence, the asserts below, with strings in utf-8, be OK.
|
|
108
|
-
df = read_dataframe(csv_path, encoding=encoding)
|
|
130
|
+
df = read_dataframe(csv_path, encoding=encoding, use_arrow=arrow)
|
|
109
131
|
|
|
110
132
|
assert len(df) == 1
|
|
111
133
|
assert df.columns.tolist() == ["näme", "city"]
|
|
@@ -117,19 +139,29 @@ def test_read_csv_encoding(tmp_path, encoding):
|
|
|
117
139
|
locale.getpreferredencoding().upper() == "UTF-8",
|
|
118
140
|
reason="test requires non-UTF-8 default platform",
|
|
119
141
|
)
|
|
120
|
-
def test_read_csv_platform_encoding(tmp_path):
|
|
121
|
-
"""
|
|
142
|
+
def test_read_csv_platform_encoding(tmp_path, use_arrow):
|
|
143
|
+
"""Verify that read defaults to platform encoding; only works on Windows (CP1252).
|
|
144
|
+
|
|
145
|
+
When use_arrow=True, reading an non-UTF8 fails.
|
|
146
|
+
"""
|
|
122
147
|
csv_path = tmp_path / "test.csv"
|
|
123
148
|
with open(csv_path, "w", encoding=locale.getpreferredencoding()) as csv:
|
|
124
149
|
csv.write("näme,city\n")
|
|
125
150
|
csv.write("Wilhelm Röntgen,Zürich\n")
|
|
126
151
|
|
|
127
|
-
|
|
152
|
+
if use_arrow:
|
|
153
|
+
with pytest.raises(
|
|
154
|
+
DataSourceError,
|
|
155
|
+
match="; please use_arrow=False",
|
|
156
|
+
):
|
|
157
|
+
df = read_dataframe(csv_path, use_arrow=use_arrow)
|
|
158
|
+
else:
|
|
159
|
+
df = read_dataframe(csv_path, use_arrow=use_arrow)
|
|
128
160
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
161
|
+
assert len(df) == 1
|
|
162
|
+
assert df.columns.tolist() == ["näme", "city"]
|
|
163
|
+
assert df.city.tolist() == ["Zürich"]
|
|
164
|
+
assert df.näme.tolist() == ["Wilhelm Röntgen"]
|
|
133
165
|
|
|
134
166
|
|
|
135
167
|
def test_read_dataframe(naturalearth_lowres_all_ext):
|
|
@@ -227,11 +259,32 @@ def test_read_force_2d(tmp_path, use_arrow):
|
|
|
227
259
|
assert not df.iloc[0].geometry.has_z
|
|
228
260
|
|
|
229
261
|
|
|
262
|
+
@pytest.mark.skipif(
|
|
263
|
+
not GDAL_GE_352,
|
|
264
|
+
reason="gdal >= 3.5.2 needed to use OGR_GEOJSON_MAX_OBJ_SIZE with a float value",
|
|
265
|
+
)
|
|
266
|
+
def test_read_geojson_error(naturalearth_lowres_geojson, use_arrow):
|
|
267
|
+
try:
|
|
268
|
+
set_gdal_config_options({"OGR_GEOJSON_MAX_OBJ_SIZE": 0.01})
|
|
269
|
+
with pytest.raises(
|
|
270
|
+
DataSourceError,
|
|
271
|
+
match="Failed to read GeoJSON data; .* GeoJSON object too complex",
|
|
272
|
+
):
|
|
273
|
+
read_dataframe(naturalearth_lowres_geojson, use_arrow=use_arrow)
|
|
274
|
+
finally:
|
|
275
|
+
set_gdal_config_options({"OGR_GEOJSON_MAX_OBJ_SIZE": None})
|
|
276
|
+
|
|
277
|
+
|
|
230
278
|
def test_read_layer(tmp_path, use_arrow):
|
|
231
279
|
filename = tmp_path / "test.gpkg"
|
|
232
280
|
|
|
233
281
|
# create a multilayer GPKG
|
|
234
282
|
expected1 = gp.GeoDataFrame(geometry=[Point(0, 0)], crs="EPSG:4326")
|
|
283
|
+
if use_arrow:
|
|
284
|
+
# TODO this needs to be fixed on the geopandas side (to ensure the
|
|
285
|
+
# GeoDataFrame() constructor does this), when use_arrow we already
|
|
286
|
+
# get columns Index with string dtype
|
|
287
|
+
expected1.columns = expected1.columns.astype("str")
|
|
235
288
|
write_dataframe(
|
|
236
289
|
expected1,
|
|
237
290
|
filename,
|
|
@@ -239,6 +292,8 @@ def test_read_layer(tmp_path, use_arrow):
|
|
|
239
292
|
)
|
|
240
293
|
|
|
241
294
|
expected2 = gp.GeoDataFrame(geometry=[Point(1, 1)], crs="EPSG:4326")
|
|
295
|
+
if use_arrow:
|
|
296
|
+
expected2.columns = expected2.columns.astype("str")
|
|
242
297
|
write_dataframe(expected2, filename, layer="layer2", append=True)
|
|
243
298
|
|
|
244
299
|
assert np.array_equal(
|
|
@@ -361,7 +416,7 @@ def test_read_null_values(tmp_path, use_arrow):
|
|
|
361
416
|
df = read_dataframe(filename, use_arrow=use_arrow, read_geometry=False)
|
|
362
417
|
|
|
363
418
|
# make sure that Null values are preserved
|
|
364
|
-
assert
|
|
419
|
+
assert df["col"].isna().all()
|
|
365
420
|
|
|
366
421
|
|
|
367
422
|
def test_read_fid_as_index(naturalearth_lowres_all_ext, use_arrow):
|
|
@@ -438,10 +493,17 @@ def test_read_where_invalid(request, naturalearth_lowres_all_ext, use_arrow):
|
|
|
438
493
|
if use_arrow and naturalearth_lowres_all_ext.suffix == ".gpkg":
|
|
439
494
|
# https://github.com/OSGeo/gdal/issues/8492
|
|
440
495
|
request.node.add_marker(pytest.mark.xfail(reason="GDAL doesn't error for GPGK"))
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
496
|
+
|
|
497
|
+
if naturalearth_lowres_all_ext.suffix == ".gpkg" and __gdal_version__ >= (3, 11, 0):
|
|
498
|
+
with pytest.raises(DataLayerError, match="no such column"):
|
|
499
|
+
read_dataframe(
|
|
500
|
+
naturalearth_lowres_all_ext, use_arrow=use_arrow, where="invalid"
|
|
501
|
+
)
|
|
502
|
+
else:
|
|
503
|
+
with pytest.raises(ValueError, match="Invalid SQL"):
|
|
504
|
+
read_dataframe(
|
|
505
|
+
naturalearth_lowres_all_ext, use_arrow=use_arrow, where="invalid"
|
|
506
|
+
)
|
|
445
507
|
|
|
446
508
|
|
|
447
509
|
def test_read_where_ignored_field(naturalearth_lowres, use_arrow):
|
|
@@ -675,6 +737,13 @@ def test_read_skip_features(naturalearth_lowres_all_ext, use_arrow, skip_feature
|
|
|
675
737
|
# In .geojsonl the vertices are reordered, so normalize
|
|
676
738
|
is_jsons = ext == ".geojsonl"
|
|
677
739
|
|
|
740
|
+
if skip_features == 200 and not use_arrow:
|
|
741
|
+
# result is an empty dataframe, so no proper dtype inference happens
|
|
742
|
+
# for the numpy object dtype arrays
|
|
743
|
+
df[["continent", "name", "iso_a3"]] = df[
|
|
744
|
+
["continent", "name", "iso_a3"]
|
|
745
|
+
].astype("str")
|
|
746
|
+
|
|
678
747
|
assert_geodataframe_equal(
|
|
679
748
|
df,
|
|
680
749
|
expected,
|
|
@@ -943,9 +1012,20 @@ def test_read_sql_dialect_sqlite_gpkg(naturalearth_lowres, use_arrow):
|
|
|
943
1012
|
assert df.iloc[0].geometry.area > area_canada
|
|
944
1013
|
|
|
945
1014
|
|
|
946
|
-
@pytest.mark.parametrize(
|
|
947
|
-
|
|
948
|
-
|
|
1015
|
+
@pytest.mark.parametrize(
|
|
1016
|
+
"encoding, arrow",
|
|
1017
|
+
[
|
|
1018
|
+
("utf-8", False),
|
|
1019
|
+
pytest.param("utf-8", True, marks=requires_arrow_write_api),
|
|
1020
|
+
("cp1252", False),
|
|
1021
|
+
(None, False),
|
|
1022
|
+
],
|
|
1023
|
+
)
|
|
1024
|
+
def test_write_csv_encoding(tmp_path, encoding, arrow):
|
|
1025
|
+
"""Test if write_dataframe uses the default encoding correctly.
|
|
1026
|
+
|
|
1027
|
+
Arrow only supports utf-8 encoding.
|
|
1028
|
+
"""
|
|
949
1029
|
# Write csv test file. Depending on the os this will be written in a different
|
|
950
1030
|
# encoding: for linux and macos this is utf-8, for windows it is cp1252.
|
|
951
1031
|
csv_path = tmp_path / "test.csv"
|
|
@@ -958,7 +1038,7 @@ def test_write_csv_encoding(tmp_path, encoding):
|
|
|
958
1038
|
# same encoding as above.
|
|
959
1039
|
df = pd.DataFrame({"näme": ["Wilhelm Röntgen"], "city": ["Zürich"]})
|
|
960
1040
|
csv_pyogrio_path = tmp_path / "test_pyogrio.csv"
|
|
961
|
-
write_dataframe(df, csv_pyogrio_path, encoding=encoding)
|
|
1041
|
+
write_dataframe(df, csv_pyogrio_path, encoding=encoding, use_arrow=arrow)
|
|
962
1042
|
|
|
963
1043
|
# Check if the text files written both ways can be read again and give same result.
|
|
964
1044
|
with open(csv_path, encoding=encoding) as csv:
|
|
@@ -976,6 +1056,48 @@ def test_write_csv_encoding(tmp_path, encoding):
|
|
|
976
1056
|
assert csv_bytes == csv_pyogrio_bytes
|
|
977
1057
|
|
|
978
1058
|
|
|
1059
|
+
@pytest.mark.parametrize(
|
|
1060
|
+
"ext, fid_column, fid_param_value",
|
|
1061
|
+
[
|
|
1062
|
+
(".gpkg", "fid", None),
|
|
1063
|
+
(".gpkg", "FID", None),
|
|
1064
|
+
(".sqlite", "ogc_fid", None),
|
|
1065
|
+
(".gpkg", "fid_custom", "fid_custom"),
|
|
1066
|
+
(".gpkg", "FID_custom", "fid_custom"),
|
|
1067
|
+
(".sqlite", "ogc_fid_custom", "ogc_fid_custom"),
|
|
1068
|
+
],
|
|
1069
|
+
)
|
|
1070
|
+
@pytest.mark.requires_arrow_write_api
|
|
1071
|
+
def test_write_custom_fids(tmp_path, ext, fid_column, fid_param_value, use_arrow):
|
|
1072
|
+
"""Test to specify FIDs to save when writing to a file.
|
|
1073
|
+
|
|
1074
|
+
Saving custom FIDs is only supported for formats that actually store the FID, like
|
|
1075
|
+
e.g. GPKG and SQLite. The fid_column name check is case-insensitive.
|
|
1076
|
+
|
|
1077
|
+
Typically, GDAL supports using a custom FID column for these file formats via a
|
|
1078
|
+
`FID` layer creation option, which is also tested here. If `fid_param_value` is
|
|
1079
|
+
specified (not None), an `fid` parameter is passed to `write_dataframe`, causing
|
|
1080
|
+
GDAL to use the column name specified for the FID.
|
|
1081
|
+
"""
|
|
1082
|
+
input_gdf = gp.GeoDataFrame(
|
|
1083
|
+
{fid_column: [5]}, geometry=[shapely.Point(0, 0)], crs="epsg:4326"
|
|
1084
|
+
)
|
|
1085
|
+
kwargs = {}
|
|
1086
|
+
if fid_param_value is not None:
|
|
1087
|
+
kwargs["fid"] = fid_param_value
|
|
1088
|
+
path = tmp_path / f"test{ext}"
|
|
1089
|
+
|
|
1090
|
+
write_dataframe(input_gdf, path, use_arrow=use_arrow, **kwargs)
|
|
1091
|
+
|
|
1092
|
+
assert path.exists()
|
|
1093
|
+
output_gdf = read_dataframe(path, fid_as_index=True, use_arrow=use_arrow)
|
|
1094
|
+
output_gdf = output_gdf.reset_index()
|
|
1095
|
+
|
|
1096
|
+
# pyogrio always sets "fid" as index name with `fid_as_index`
|
|
1097
|
+
expected_gdf = input_gdf.rename(columns={fid_column: "fid"})
|
|
1098
|
+
assert_geodataframe_equal(output_gdf, expected_gdf)
|
|
1099
|
+
|
|
1100
|
+
|
|
979
1101
|
@pytest.mark.parametrize("ext", ALL_EXTS)
|
|
980
1102
|
@pytest.mark.requires_arrow_write_api
|
|
981
1103
|
def test_write_dataframe(tmp_path, naturalearth_lowres, ext, use_arrow):
|
|
@@ -1087,16 +1209,38 @@ def test_write_dataframe_index(tmp_path, naturalearth_lowres, use_arrow):
|
|
|
1087
1209
|
|
|
1088
1210
|
|
|
1089
1211
|
@pytest.mark.parametrize("ext", [ext for ext in ALL_EXTS if ext not in ".geojsonl"])
|
|
1212
|
+
@pytest.mark.parametrize(
|
|
1213
|
+
"columns, dtype",
|
|
1214
|
+
[
|
|
1215
|
+
([], None),
|
|
1216
|
+
(["col_int"], np.int64),
|
|
1217
|
+
(["col_float"], np.float64),
|
|
1218
|
+
(["col_object"], object),
|
|
1219
|
+
],
|
|
1220
|
+
)
|
|
1090
1221
|
@pytest.mark.requires_arrow_write_api
|
|
1091
|
-
def test_write_empty_dataframe(tmp_path, ext, use_arrow):
|
|
1092
|
-
|
|
1222
|
+
def test_write_empty_dataframe(tmp_path, ext, columns, dtype, use_arrow):
|
|
1223
|
+
"""Test writing dataframe with no rows.
|
|
1093
1224
|
|
|
1225
|
+
With use_arrow, object type columns with no rows are converted to null type columns
|
|
1226
|
+
by pyarrow, but null columns are not supported by GDAL. Added to test fix for #513.
|
|
1227
|
+
"""
|
|
1228
|
+
expected = gp.GeoDataFrame(geometry=[], columns=columns, dtype=dtype, crs=4326)
|
|
1094
1229
|
filename = tmp_path / f"test{ext}"
|
|
1095
1230
|
write_dataframe(expected, filename, use_arrow=use_arrow)
|
|
1096
1231
|
|
|
1097
1232
|
assert filename.exists()
|
|
1098
|
-
df = read_dataframe(filename)
|
|
1099
|
-
|
|
1233
|
+
df = read_dataframe(filename, use_arrow=use_arrow)
|
|
1234
|
+
|
|
1235
|
+
# Check result
|
|
1236
|
+
# For older pandas versions, the index is created as Object dtype but read as
|
|
1237
|
+
# RangeIndex, so don't check the index dtype in that case.
|
|
1238
|
+
check_index_type = True if PANDAS_GE_20 else False
|
|
1239
|
+
# with pandas 3+ and reading through arrow, we preserve the string dtype
|
|
1240
|
+
# (no proper dtype inference happens for the empty numpy object dtype arrays)
|
|
1241
|
+
if use_arrow and dtype is object:
|
|
1242
|
+
expected["col_object"] = expected["col_object"].astype("str")
|
|
1243
|
+
assert_geodataframe_equal(df, expected, check_index_type=check_index_type)
|
|
1100
1244
|
|
|
1101
1245
|
|
|
1102
1246
|
def test_write_empty_geometry(tmp_path):
|
|
@@ -1116,6 +1260,28 @@ def test_write_empty_geometry(tmp_path):
|
|
|
1116
1260
|
assert_geodataframe_equal(df, expected)
|
|
1117
1261
|
|
|
1118
1262
|
|
|
1263
|
+
@pytest.mark.requires_arrow_write_api
|
|
1264
|
+
def test_write_None_string_column(tmp_path, use_arrow):
|
|
1265
|
+
"""Test pandas object columns with all None values.
|
|
1266
|
+
|
|
1267
|
+
With use_arrow, such columns are converted to null type columns by pyarrow, but null
|
|
1268
|
+
columns are not supported by GDAL. Added to test fix for #513.
|
|
1269
|
+
"""
|
|
1270
|
+
gdf = gp.GeoDataFrame({"object_col": [None]}, geometry=[Point(0, 0)], crs=4326)
|
|
1271
|
+
filename = tmp_path / "test.gpkg"
|
|
1272
|
+
|
|
1273
|
+
write_dataframe(gdf, filename, use_arrow=use_arrow)
|
|
1274
|
+
assert filename.exists()
|
|
1275
|
+
|
|
1276
|
+
result_gdf = read_dataframe(filename, use_arrow=use_arrow)
|
|
1277
|
+
if PANDAS_GE_30 and use_arrow:
|
|
1278
|
+
assert result_gdf.object_col.dtype == "str"
|
|
1279
|
+
gdf["object_col"] = gdf["object_col"].astype("str")
|
|
1280
|
+
else:
|
|
1281
|
+
assert result_gdf.object_col.dtype == object
|
|
1282
|
+
assert_geodataframe_equal(result_gdf, gdf)
|
|
1283
|
+
|
|
1284
|
+
|
|
1119
1285
|
@pytest.mark.parametrize("ext", [".geojsonl", ".geojsons"])
|
|
1120
1286
|
@pytest.mark.requires_arrow_write_api
|
|
1121
1287
|
def test_write_read_empty_dataframe_unsupported(tmp_path, ext, use_arrow):
|
|
@@ -1521,6 +1687,30 @@ def test_custom_crs_io(tmp_path, naturalearth_lowres_all_ext, use_arrow):
|
|
|
1521
1687
|
assert df.crs.equals(expected.crs)
|
|
1522
1688
|
|
|
1523
1689
|
|
|
1690
|
+
@pytest.mark.parametrize("ext", [".gpkg.zip", ".shp.zip", ".shz"])
|
|
1691
|
+
@pytest.mark.requires_arrow_write_api
|
|
1692
|
+
def test_write_read_zipped_ext(tmp_path, naturalearth_lowres, ext, use_arrow):
|
|
1693
|
+
"""Run a basic read and write test on some extra (zipped) extensions."""
|
|
1694
|
+
if ext == ".gpkg.zip" and not GDAL_GE_37:
|
|
1695
|
+
pytest.skip(".gpkg.zip support requires GDAL >= 3.7")
|
|
1696
|
+
|
|
1697
|
+
input_gdf = read_dataframe(naturalearth_lowres)
|
|
1698
|
+
output_path = tmp_path / f"test{ext}"
|
|
1699
|
+
|
|
1700
|
+
write_dataframe(input_gdf, output_path, use_arrow=use_arrow)
|
|
1701
|
+
|
|
1702
|
+
assert output_path.exists()
|
|
1703
|
+
result_gdf = read_dataframe(output_path)
|
|
1704
|
+
|
|
1705
|
+
geometry_types = result_gdf.geometry.type.unique()
|
|
1706
|
+
if DRIVERS[ext] in DRIVERS_NO_MIXED_SINGLE_MULTI:
|
|
1707
|
+
assert list(geometry_types) == ["MultiPolygon"]
|
|
1708
|
+
else:
|
|
1709
|
+
assert set(geometry_types) == {"MultiPolygon", "Polygon"}
|
|
1710
|
+
|
|
1711
|
+
assert_geodataframe_equal(result_gdf, input_gdf, check_index_type=False)
|
|
1712
|
+
|
|
1713
|
+
|
|
1524
1714
|
def test_write_read_mixed_column_values(tmp_path):
|
|
1525
1715
|
# use_arrow=True is tested separately below
|
|
1526
1716
|
mixed_values = ["test", 1.0, 1, datetime.now(), None, np.nan]
|
|
@@ -1532,11 +1722,13 @@ def test_write_read_mixed_column_values(tmp_path):
|
|
|
1532
1722
|
write_dataframe(test_gdf, output_path)
|
|
1533
1723
|
output_gdf = read_dataframe(output_path)
|
|
1534
1724
|
assert len(test_gdf) == len(output_gdf)
|
|
1535
|
-
|
|
1536
|
-
|
|
1537
|
-
|
|
1538
|
-
else
|
|
1539
|
-
|
|
1725
|
+
# mixed values as object dtype are currently written as strings
|
|
1726
|
+
# (but preserving nulls)
|
|
1727
|
+
expected = pd.Series(
|
|
1728
|
+
[str(value) if value not in (None, np.nan) else None for value in mixed_values],
|
|
1729
|
+
name="mixed",
|
|
1730
|
+
)
|
|
1731
|
+
assert_series_equal(output_gdf["mixed"], expected)
|
|
1540
1732
|
|
|
1541
1733
|
|
|
1542
1734
|
@requires_arrow_write_api
|
|
@@ -1569,8 +1761,8 @@ def test_write_read_null(tmp_path, use_arrow):
|
|
|
1569
1761
|
assert pd.isna(result_gdf["float64"][1])
|
|
1570
1762
|
assert pd.isna(result_gdf["float64"][2])
|
|
1571
1763
|
assert result_gdf["object_str"][0] == "test"
|
|
1572
|
-
assert result_gdf["object_str"][1]
|
|
1573
|
-
assert result_gdf["object_str"][2]
|
|
1764
|
+
assert pd.isna(result_gdf["object_str"][1])
|
|
1765
|
+
assert pd.isna(result_gdf["object_str"][2])
|
|
1574
1766
|
|
|
1575
1767
|
|
|
1576
1768
|
@pytest.mark.requires_arrow_write_api
|
|
@@ -1714,23 +1906,29 @@ def test_write_geometry_z_types_auto(
|
|
|
1714
1906
|
|
|
1715
1907
|
|
|
1716
1908
|
@pytest.mark.parametrize(
|
|
1717
|
-
"on_invalid, message",
|
|
1909
|
+
"on_invalid, message, expected_wkt",
|
|
1718
1910
|
[
|
|
1719
1911
|
(
|
|
1720
1912
|
"warn",
|
|
1721
1913
|
"Invalid WKB: geometry is returned as None. IllegalArgumentException: "
|
|
1722
|
-
"
|
|
1914
|
+
"Points of LinearRing do not form a closed linestring",
|
|
1915
|
+
None,
|
|
1723
1916
|
),
|
|
1724
|
-
("raise", "
|
|
1725
|
-
("ignore", None),
|
|
1917
|
+
("raise", "Points of LinearRing do not form a closed linestring", None),
|
|
1918
|
+
("ignore", None, None),
|
|
1919
|
+
("fix", None, "POLYGON ((0 0, 0 1, 0 0))"),
|
|
1726
1920
|
],
|
|
1727
1921
|
)
|
|
1728
|
-
|
|
1922
|
+
@pytest.mark.filterwarnings("ignore:Non closed ring detected:RuntimeWarning")
|
|
1923
|
+
def test_read_invalid_poly_ring(tmp_path, use_arrow, on_invalid, message, expected_wkt):
|
|
1924
|
+
if on_invalid == "fix" and not SHAPELY_GE_21:
|
|
1925
|
+
pytest.skip("on_invalid=fix not available for Shapely < 2.1")
|
|
1926
|
+
|
|
1729
1927
|
if on_invalid == "raise":
|
|
1730
1928
|
handler = pytest.raises(shapely.errors.GEOSException, match=message)
|
|
1731
1929
|
elif on_invalid == "warn":
|
|
1732
1930
|
handler = pytest.warns(match=message)
|
|
1733
|
-
elif on_invalid
|
|
1931
|
+
elif on_invalid in ("fix", "ignore"):
|
|
1734
1932
|
handler = contextlib.nullcontext()
|
|
1735
1933
|
else:
|
|
1736
1934
|
raise ValueError(f"unknown value for on_invalid: {on_invalid}")
|
|
@@ -1744,7 +1942,7 @@ def test_read_invalid_poly_ring(tmp_path, use_arrow, on_invalid, message):
|
|
|
1744
1942
|
"properties": {},
|
|
1745
1943
|
"geometry": {
|
|
1746
1944
|
"type": "Polygon",
|
|
1747
|
-
"coordinates": [ [ [0, 0], [0,
|
|
1945
|
+
"coordinates": [ [ [0, 0], [0, 1] ] ]
|
|
1748
1946
|
}
|
|
1749
1947
|
}
|
|
1750
1948
|
]
|
|
@@ -1760,7 +1958,10 @@ def test_read_invalid_poly_ring(tmp_path, use_arrow, on_invalid, message):
|
|
|
1760
1958
|
use_arrow=use_arrow,
|
|
1761
1959
|
on_invalid=on_invalid,
|
|
1762
1960
|
)
|
|
1763
|
-
|
|
1961
|
+
if expected_wkt is None:
|
|
1962
|
+
assert df.geometry.iloc[0] is None
|
|
1963
|
+
else:
|
|
1964
|
+
assert df.geometry.iloc[0].wkt == expected_wkt
|
|
1764
1965
|
|
|
1765
1966
|
|
|
1766
1967
|
def test_read_multisurface(multisurface_file, use_arrow):
|
|
@@ -1792,6 +1993,10 @@ def test_read_dataset_kwargs(nested_geojson_file, use_arrow):
|
|
|
1792
1993
|
geometry=[shapely.Point(0, 0)],
|
|
1793
1994
|
crs="EPSG:4326",
|
|
1794
1995
|
)
|
|
1996
|
+
if GDAL_GE_311 and use_arrow:
|
|
1997
|
+
# GDAL 3.11 started to use json extension type, which is not yet handled
|
|
1998
|
+
# correctly in the arrow->pandas conversion (using object instead of str dtype)
|
|
1999
|
+
expected["intermediate_level"] = expected["intermediate_level"].astype(object)
|
|
1795
2000
|
|
|
1796
2001
|
assert_geodataframe_equal(df, expected)
|
|
1797
2002
|
|
|
@@ -1837,7 +2042,7 @@ def test_write_nullable_dtypes(tmp_path, use_arrow):
|
|
|
1837
2042
|
expected["col2"] = expected["col2"].astype("float64")
|
|
1838
2043
|
expected["col3"] = expected["col3"].astype("float32")
|
|
1839
2044
|
expected["col4"] = expected["col4"].astype("float64")
|
|
1840
|
-
expected["col5"] = expected["col5"].astype(
|
|
2045
|
+
expected["col5"] = expected["col5"].astype("str")
|
|
1841
2046
|
expected.loc[1, "col5"] = None # pandas converts to pd.NA on line above
|
|
1842
2047
|
assert_geodataframe_equal(output_gdf, expected)
|
|
1843
2048
|
|
|
@@ -2160,7 +2365,10 @@ def test_non_utf8_encoding_io_shapefile(tmp_path, encoded_text, use_arrow):
|
|
|
2160
2365
|
|
|
2161
2366
|
if use_arrow:
|
|
2162
2367
|
# pyarrow cannot decode column name with incorrect encoding
|
|
2163
|
-
with pytest.raises(
|
|
2368
|
+
with pytest.raises(
|
|
2369
|
+
DataSourceError,
|
|
2370
|
+
match="The file being read is not encoded in UTF-8; please use_arrow=False",
|
|
2371
|
+
):
|
|
2164
2372
|
read_dataframe(output_path, use_arrow=True)
|
|
2165
2373
|
else:
|
|
2166
2374
|
bad = read_dataframe(output_path, use_arrow=False)
|
|
@@ -2257,7 +2465,7 @@ def test_write_kml_file_coordinate_order(tmp_path, use_arrow):
|
|
|
2257
2465
|
if "LIBKML" in list_drivers():
|
|
2258
2466
|
# test appending to the existing file only if LIBKML is available
|
|
2259
2467
|
# as it appears to fall back on LIBKML driver when appending.
|
|
2260
|
-
points_append = [Point(
|
|
2468
|
+
points_append = [Point(7, 8), Point(9, 10), Point(11, 12)]
|
|
2261
2469
|
gdf_append = gp.GeoDataFrame(geometry=points_append, crs="EPSG:4326")
|
|
2262
2470
|
|
|
2263
2471
|
write_dataframe(
|
pyogrio/tests/test_path.py
CHANGED
|
@@ -33,10 +33,20 @@ def change_cwd(path):
|
|
|
33
33
|
[
|
|
34
34
|
# local file paths that should be passed through as is
|
|
35
35
|
("data.gpkg", "data.gpkg"),
|
|
36
|
+
("data.gpkg.zip", "data.gpkg.zip"),
|
|
37
|
+
("data.shp.zip", "data.shp.zip"),
|
|
36
38
|
(Path("data.gpkg"), "data.gpkg"),
|
|
39
|
+
(Path("data.gpkg.zip"), "data.gpkg.zip"),
|
|
40
|
+
(Path("data.shp.zip"), "data.shp.zip"),
|
|
37
41
|
("/home/user/data.gpkg", "/home/user/data.gpkg"),
|
|
42
|
+
("/home/user/data.gpkg.zip", "/home/user/data.gpkg.zip"),
|
|
43
|
+
("/home/user/data.shp.zip", "/home/user/data.shp.zip"),
|
|
38
44
|
(r"C:\User\Documents\data.gpkg", r"C:\User\Documents\data.gpkg"),
|
|
45
|
+
(r"C:\User\Documents\data.gpkg.zip", r"C:\User\Documents\data.gpkg.zip"),
|
|
46
|
+
(r"C:\User\Documents\data.shp.zip", r"C:\User\Documents\data.shp.zip"),
|
|
39
47
|
("file:///home/user/data.gpkg", "/home/user/data.gpkg"),
|
|
48
|
+
("file:///home/user/data.gpkg.zip", "/home/user/data.gpkg.zip"),
|
|
49
|
+
("file:///home/user/data.shp.zip", "/home/user/data.shp.zip"),
|
|
40
50
|
("/home/folder # with hash/data.gpkg", "/home/folder # with hash/data.gpkg"),
|
|
41
51
|
# cloud URIs
|
|
42
52
|
("https://testing/data.gpkg", "/vsicurl/https://testing/data.gpkg"),
|
pyogrio/tests/test_raw_io.py
CHANGED
|
@@ -17,7 +17,7 @@ from pyogrio import (
|
|
|
17
17
|
read_info,
|
|
18
18
|
set_gdal_config_options,
|
|
19
19
|
)
|
|
20
|
-
from pyogrio._compat import HAS_PYARROW, HAS_SHAPELY
|
|
20
|
+
from pyogrio._compat import GDAL_GE_37, HAS_PYARROW, HAS_SHAPELY
|
|
21
21
|
from pyogrio.errors import DataLayerError, DataSourceError, FeatureError
|
|
22
22
|
from pyogrio.raw import open_arrow, read, write
|
|
23
23
|
from pyogrio.tests.conftest import (
|
|
@@ -63,9 +63,10 @@ def test_read(naturalearth_lowres):
|
|
|
63
63
|
@pytest.mark.parametrize("ext", DRIVERS)
|
|
64
64
|
def test_read_autodetect_driver(tmp_path, naturalearth_lowres, ext):
|
|
65
65
|
# Test all supported autodetect drivers
|
|
66
|
+
if ext == ".gpkg.zip" and not GDAL_GE_37:
|
|
67
|
+
pytest.skip(".gpkg.zip not supported for gdal < 3.7.0")
|
|
66
68
|
testfile = prepare_testfile(naturalearth_lowres, dst_dir=tmp_path, ext=ext)
|
|
67
69
|
|
|
68
|
-
assert testfile.suffix == ext
|
|
69
70
|
assert testfile.exists()
|
|
70
71
|
meta, _, geometry, fields = read(testfile)
|
|
71
72
|
|
|
@@ -703,6 +704,9 @@ def test_write_append(tmp_path, naturalearth_lowres, ext):
|
|
|
703
704
|
if ext in (".geojsonl", ".geojsons") and __gdal_version__ < (3, 6, 0):
|
|
704
705
|
pytest.skip("Append to GeoJSONSeq only available for GDAL >= 3.6.0")
|
|
705
706
|
|
|
707
|
+
if ext == ".gpkg.zip":
|
|
708
|
+
pytest.skip("Append to .gpkg.zip is not supported")
|
|
709
|
+
|
|
706
710
|
meta, _, geometry, field_data = read(naturalearth_lowres)
|
|
707
711
|
|
|
708
712
|
# coerce output layer to MultiPolygon to avoid mixed type errors
|
pyogrio/util.py
CHANGED
|
@@ -9,6 +9,8 @@ from urllib.parse import urlparse
|
|
|
9
9
|
|
|
10
10
|
from pyogrio._vsi import vsimem_rmtree_toplevel as _vsimem_rmtree_toplevel
|
|
11
11
|
|
|
12
|
+
MULTI_EXTENSIONS = (".gpkg.zip", ".shp.zip")
|
|
13
|
+
|
|
12
14
|
|
|
13
15
|
def get_vsi_path_or_buffer(path_or_buffer):
|
|
14
16
|
"""Get VSI-prefixed path or bytes buffer depending on type of path_or_buffer.
|
|
@@ -68,15 +70,23 @@ def vsi_path(path: Union[str, Path]) -> str:
|
|
|
68
70
|
# Windows drive letters (e.g. "C:\") confuse `urlparse` as they look like
|
|
69
71
|
# URL schemes
|
|
70
72
|
if sys.platform == "win32" and re.match("^[a-zA-Z]\\:", path):
|
|
73
|
+
# If it is not a zip file or it is multi-extension zip file that is directly
|
|
74
|
+
# supported by a GDAL driver, return the path as is.
|
|
71
75
|
if not path.split("!")[0].endswith(".zip"):
|
|
72
76
|
return path
|
|
77
|
+
if path.split("!")[0].endswith(MULTI_EXTENSIONS):
|
|
78
|
+
return path
|
|
73
79
|
|
|
74
80
|
# prefix then allow to proceed with remaining parsing
|
|
75
81
|
path = f"zip://{path}"
|
|
76
82
|
|
|
77
83
|
path, archive, scheme = _parse_uri(path)
|
|
78
84
|
|
|
79
|
-
if
|
|
85
|
+
if (
|
|
86
|
+
scheme
|
|
87
|
+
or archive
|
|
88
|
+
or (path.endswith(".zip") and not path.endswith(MULTI_EXTENSIONS))
|
|
89
|
+
):
|
|
80
90
|
return _construct_vsi_path(path, archive, scheme)
|
|
81
91
|
|
|
82
92
|
return path
|
|
@@ -146,7 +156,10 @@ def _construct_vsi_path(path, archive, scheme) -> str:
|
|
|
146
156
|
suffix = ""
|
|
147
157
|
schemes = scheme.split("+")
|
|
148
158
|
|
|
149
|
-
if "zip" not in schemes and (
|
|
159
|
+
if "zip" not in schemes and (
|
|
160
|
+
archive.endswith(".zip")
|
|
161
|
+
or (path.endswith(".zip") and not path.endswith(MULTI_EXTENSIONS))
|
|
162
|
+
):
|
|
150
163
|
schemes.insert(0, "zip")
|
|
151
164
|
|
|
152
165
|
if schemes:
|