pyogrio 0.10.0__cp311-cp311-manylinux_2_28_aarch64.whl → 0.11.1__cp311-cp311-manylinux_2_28_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyogrio might be problematic. Click here for more details.
- pyogrio/__init__.py +12 -10
- pyogrio/_compat.py +8 -0
- pyogrio/_err.cpython-311-aarch64-linux-gnu.so +0 -0
- pyogrio/_geometry.cpython-311-aarch64-linux-gnu.so +0 -0
- pyogrio/_io.cpython-311-aarch64-linux-gnu.so +0 -0
- pyogrio/_ogr.cpython-311-aarch64-linux-gnu.so +0 -0
- pyogrio/_version.py +3 -3
- pyogrio/_vsi.cpython-311-aarch64-linux-gnu.so +0 -0
- pyogrio/gdal_data/GDAL-targets-release.cmake +3 -3
- pyogrio/gdal_data/GDAL-targets.cmake +2 -2
- pyogrio/gdal_data/GDALConfigVersion.cmake +3 -3
- pyogrio/gdal_data/gdalinfo_output.schema.json +3 -3
- pyogrio/gdal_data/gdaltileindex.xsd +1 -17
- pyogrio/gdal_data/gdalvrt.xsd +48 -41
- pyogrio/gdal_data/nitf_spec.xml +1 -17
- pyogrio/gdal_data/nitf_spec.xsd +1 -17
- pyogrio/gdal_data/ogrvrt.xsd +1 -17
- pyogrio/gdal_data/osmconf.ini +3 -1
- pyogrio/gdal_data/pdfcomposition.xsd +1 -17
- pyogrio/gdal_data/template_tiles.mapml +28 -0
- pyogrio/gdal_data/vcpkg.spdx.json +32 -27
- pyogrio/gdal_data/vcpkg_abi_info.txt +27 -26
- pyogrio/gdal_data/vdv452.xml +1 -17
- pyogrio/gdal_data/vdv452.xsd +1 -17
- pyogrio/geopandas.py +91 -43
- pyogrio/proj_data/ITRF2014 +1 -1
- pyogrio/proj_data/ITRF2020 +91 -0
- pyogrio/proj_data/proj-config-version.cmake +3 -3
- pyogrio/proj_data/proj-config.cmake +1 -1
- pyogrio/proj_data/proj-targets.cmake +3 -3
- pyogrio/proj_data/proj.db +0 -0
- pyogrio/proj_data/proj.ini +11 -3
- pyogrio/proj_data/proj4-targets.cmake +3 -3
- pyogrio/proj_data/usage +7 -2
- pyogrio/proj_data/vcpkg.spdx.json +27 -22
- pyogrio/proj_data/vcpkg_abi_info.txt +15 -14
- pyogrio/tests/conftest.py +8 -0
- pyogrio/tests/test_arrow.py +3 -0
- pyogrio/tests/test_core.py +8 -4
- pyogrio/tests/test_geopandas_io.py +270 -45
- pyogrio/tests/test_path.py +10 -0
- pyogrio/tests/test_raw_io.py +6 -2
- pyogrio/util.py +15 -2
- {pyogrio-0.10.0.dist-info → pyogrio-0.11.1.dist-info}/METADATA +32 -37
- {pyogrio-0.10.0.dist-info → pyogrio-0.11.1.dist-info}/RECORD +202 -200
- {pyogrio-0.10.0.dist-info → pyogrio-0.11.1.dist-info}/WHEEL +1 -1
- pyogrio.libs/{libgdal-b0847c7b.so.35.3.9.1 → libgdal-3af0c888.so.36.3.10.3} +0 -0
- {pyogrio-0.10.0.dist-info → pyogrio-0.11.1.dist-info/licenses}/LICENSE +0 -0
- {pyogrio-0.10.0.dist-info → pyogrio-0.11.1.dist-info}/top_level.txt +0 -0
|
@@ -12,10 +12,20 @@ from pyogrio import (
|
|
|
12
12
|
list_drivers,
|
|
13
13
|
list_layers,
|
|
14
14
|
read_info,
|
|
15
|
+
set_gdal_config_options,
|
|
15
16
|
vsi_listtree,
|
|
16
17
|
vsi_unlink,
|
|
17
18
|
)
|
|
18
|
-
from pyogrio._compat import
|
|
19
|
+
from pyogrio._compat import (
|
|
20
|
+
GDAL_GE_37,
|
|
21
|
+
GDAL_GE_311,
|
|
22
|
+
GDAL_GE_352,
|
|
23
|
+
HAS_ARROW_WRITE_API,
|
|
24
|
+
HAS_PYPROJ,
|
|
25
|
+
PANDAS_GE_15,
|
|
26
|
+
PANDAS_GE_30,
|
|
27
|
+
SHAPELY_GE_21,
|
|
28
|
+
)
|
|
19
29
|
from pyogrio.errors import DataLayerError, DataSourceError, FeatureError, GeometryError
|
|
20
30
|
from pyogrio.geopandas import PANDAS_GE_20, read_dataframe, write_dataframe
|
|
21
31
|
from pyogrio.raw import (
|
|
@@ -93,8 +103,20 @@ def spatialite_available(path):
|
|
|
93
103
|
return False
|
|
94
104
|
|
|
95
105
|
|
|
96
|
-
@pytest.mark.parametrize(
|
|
97
|
-
|
|
106
|
+
@pytest.mark.parametrize(
|
|
107
|
+
"encoding, arrow",
|
|
108
|
+
[
|
|
109
|
+
("utf-8", False),
|
|
110
|
+
pytest.param("utf-8", True, marks=requires_pyarrow_api),
|
|
111
|
+
("cp1252", False),
|
|
112
|
+
(None, False),
|
|
113
|
+
],
|
|
114
|
+
)
|
|
115
|
+
def test_read_csv_encoding(tmp_path, encoding, arrow):
|
|
116
|
+
""" "Test reading CSV files with different encodings.
|
|
117
|
+
|
|
118
|
+
Arrow only supports utf-8 encoding.
|
|
119
|
+
"""
|
|
98
120
|
# Write csv test file. Depending on the os this will be written in a different
|
|
99
121
|
# encoding: for linux and macos this is utf-8, for windows it is cp1252.
|
|
100
122
|
csv_path = tmp_path / "test.csv"
|
|
@@ -105,7 +127,7 @@ def test_read_csv_encoding(tmp_path, encoding):
|
|
|
105
127
|
# Read csv. The data should be read with the same default encoding as the csv file
|
|
106
128
|
# was written in, but should have been converted to utf-8 in the dataframe returned.
|
|
107
129
|
# Hence, the asserts below, with strings in utf-8, be OK.
|
|
108
|
-
df = read_dataframe(csv_path, encoding=encoding)
|
|
130
|
+
df = read_dataframe(csv_path, encoding=encoding, use_arrow=arrow)
|
|
109
131
|
|
|
110
132
|
assert len(df) == 1
|
|
111
133
|
assert df.columns.tolist() == ["näme", "city"]
|
|
@@ -117,19 +139,29 @@ def test_read_csv_encoding(tmp_path, encoding):
|
|
|
117
139
|
locale.getpreferredencoding().upper() == "UTF-8",
|
|
118
140
|
reason="test requires non-UTF-8 default platform",
|
|
119
141
|
)
|
|
120
|
-
def test_read_csv_platform_encoding(tmp_path):
|
|
121
|
-
"""
|
|
142
|
+
def test_read_csv_platform_encoding(tmp_path, use_arrow):
|
|
143
|
+
"""Verify that read defaults to platform encoding; only works on Windows (CP1252).
|
|
144
|
+
|
|
145
|
+
When use_arrow=True, reading an non-UTF8 fails.
|
|
146
|
+
"""
|
|
122
147
|
csv_path = tmp_path / "test.csv"
|
|
123
148
|
with open(csv_path, "w", encoding=locale.getpreferredencoding()) as csv:
|
|
124
149
|
csv.write("näme,city\n")
|
|
125
150
|
csv.write("Wilhelm Röntgen,Zürich\n")
|
|
126
151
|
|
|
127
|
-
|
|
152
|
+
if use_arrow:
|
|
153
|
+
with pytest.raises(
|
|
154
|
+
DataSourceError,
|
|
155
|
+
match="; please use_arrow=False",
|
|
156
|
+
):
|
|
157
|
+
df = read_dataframe(csv_path, use_arrow=use_arrow)
|
|
158
|
+
else:
|
|
159
|
+
df = read_dataframe(csv_path, use_arrow=use_arrow)
|
|
128
160
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
161
|
+
assert len(df) == 1
|
|
162
|
+
assert df.columns.tolist() == ["näme", "city"]
|
|
163
|
+
assert df.city.tolist() == ["Zürich"]
|
|
164
|
+
assert df.näme.tolist() == ["Wilhelm Röntgen"]
|
|
133
165
|
|
|
134
166
|
|
|
135
167
|
def test_read_dataframe(naturalearth_lowres_all_ext):
|
|
@@ -227,11 +259,32 @@ def test_read_force_2d(tmp_path, use_arrow):
|
|
|
227
259
|
assert not df.iloc[0].geometry.has_z
|
|
228
260
|
|
|
229
261
|
|
|
262
|
+
@pytest.mark.skipif(
|
|
263
|
+
not GDAL_GE_352,
|
|
264
|
+
reason="gdal >= 3.5.2 needed to use OGR_GEOJSON_MAX_OBJ_SIZE with a float value",
|
|
265
|
+
)
|
|
266
|
+
def test_read_geojson_error(naturalearth_lowres_geojson, use_arrow):
|
|
267
|
+
try:
|
|
268
|
+
set_gdal_config_options({"OGR_GEOJSON_MAX_OBJ_SIZE": 0.01})
|
|
269
|
+
with pytest.raises(
|
|
270
|
+
DataSourceError,
|
|
271
|
+
match="Failed to read GeoJSON data; .* GeoJSON object too complex",
|
|
272
|
+
):
|
|
273
|
+
read_dataframe(naturalearth_lowres_geojson, use_arrow=use_arrow)
|
|
274
|
+
finally:
|
|
275
|
+
set_gdal_config_options({"OGR_GEOJSON_MAX_OBJ_SIZE": None})
|
|
276
|
+
|
|
277
|
+
|
|
230
278
|
def test_read_layer(tmp_path, use_arrow):
|
|
231
279
|
filename = tmp_path / "test.gpkg"
|
|
232
280
|
|
|
233
281
|
# create a multilayer GPKG
|
|
234
282
|
expected1 = gp.GeoDataFrame(geometry=[Point(0, 0)], crs="EPSG:4326")
|
|
283
|
+
if use_arrow:
|
|
284
|
+
# TODO this needs to be fixed on the geopandas side (to ensure the
|
|
285
|
+
# GeoDataFrame() constructor does this), when use_arrow we already
|
|
286
|
+
# get columns Index with string dtype
|
|
287
|
+
expected1.columns = expected1.columns.astype("str")
|
|
235
288
|
write_dataframe(
|
|
236
289
|
expected1,
|
|
237
290
|
filename,
|
|
@@ -239,6 +292,8 @@ def test_read_layer(tmp_path, use_arrow):
|
|
|
239
292
|
)
|
|
240
293
|
|
|
241
294
|
expected2 = gp.GeoDataFrame(geometry=[Point(1, 1)], crs="EPSG:4326")
|
|
295
|
+
if use_arrow:
|
|
296
|
+
expected2.columns = expected2.columns.astype("str")
|
|
242
297
|
write_dataframe(expected2, filename, layer="layer2", append=True)
|
|
243
298
|
|
|
244
299
|
assert np.array_equal(
|
|
@@ -361,7 +416,7 @@ def test_read_null_values(tmp_path, use_arrow):
|
|
|
361
416
|
df = read_dataframe(filename, use_arrow=use_arrow, read_geometry=False)
|
|
362
417
|
|
|
363
418
|
# make sure that Null values are preserved
|
|
364
|
-
assert
|
|
419
|
+
assert df["col"].isna().all()
|
|
365
420
|
|
|
366
421
|
|
|
367
422
|
def test_read_fid_as_index(naturalearth_lowres_all_ext, use_arrow):
|
|
@@ -438,10 +493,17 @@ def test_read_where_invalid(request, naturalearth_lowres_all_ext, use_arrow):
|
|
|
438
493
|
if use_arrow and naturalearth_lowres_all_ext.suffix == ".gpkg":
|
|
439
494
|
# https://github.com/OSGeo/gdal/issues/8492
|
|
440
495
|
request.node.add_marker(pytest.mark.xfail(reason="GDAL doesn't error for GPGK"))
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
496
|
+
|
|
497
|
+
if naturalearth_lowres_all_ext.suffix == ".gpkg" and __gdal_version__ >= (3, 11, 0):
|
|
498
|
+
with pytest.raises(DataLayerError, match="no such column"):
|
|
499
|
+
read_dataframe(
|
|
500
|
+
naturalearth_lowres_all_ext, use_arrow=use_arrow, where="invalid"
|
|
501
|
+
)
|
|
502
|
+
else:
|
|
503
|
+
with pytest.raises(ValueError, match="Invalid SQL"):
|
|
504
|
+
read_dataframe(
|
|
505
|
+
naturalearth_lowres_all_ext, use_arrow=use_arrow, where="invalid"
|
|
506
|
+
)
|
|
445
507
|
|
|
446
508
|
|
|
447
509
|
def test_read_where_ignored_field(naturalearth_lowres, use_arrow):
|
|
@@ -675,6 +737,13 @@ def test_read_skip_features(naturalearth_lowres_all_ext, use_arrow, skip_feature
|
|
|
675
737
|
# In .geojsonl the vertices are reordered, so normalize
|
|
676
738
|
is_jsons = ext == ".geojsonl"
|
|
677
739
|
|
|
740
|
+
if skip_features == 200 and not use_arrow:
|
|
741
|
+
# result is an empty dataframe, so no proper dtype inference happens
|
|
742
|
+
# for the numpy object dtype arrays
|
|
743
|
+
df[["continent", "name", "iso_a3"]] = df[
|
|
744
|
+
["continent", "name", "iso_a3"]
|
|
745
|
+
].astype("str")
|
|
746
|
+
|
|
678
747
|
assert_geodataframe_equal(
|
|
679
748
|
df,
|
|
680
749
|
expected,
|
|
@@ -690,12 +759,22 @@ def test_read_negative_skip_features(naturalearth_lowres, use_arrow):
|
|
|
690
759
|
read_dataframe(naturalearth_lowres, skip_features=-1, use_arrow=use_arrow)
|
|
691
760
|
|
|
692
761
|
|
|
762
|
+
@pytest.mark.parametrize("skip_features", [0, 10, 200])
|
|
693
763
|
@pytest.mark.parametrize("max_features", [10, 100])
|
|
694
|
-
def test_read_max_features(
|
|
764
|
+
def test_read_max_features(
|
|
765
|
+
naturalearth_lowres_all_ext, use_arrow, max_features, skip_features
|
|
766
|
+
):
|
|
695
767
|
ext = naturalearth_lowres_all_ext.suffix
|
|
696
|
-
expected =
|
|
768
|
+
expected = (
|
|
769
|
+
read_dataframe(naturalearth_lowres_all_ext)
|
|
770
|
+
.iloc[skip_features : skip_features + max_features]
|
|
771
|
+
.reset_index(drop=True)
|
|
772
|
+
)
|
|
697
773
|
df = read_dataframe(
|
|
698
|
-
naturalearth_lowres_all_ext,
|
|
774
|
+
naturalearth_lowres_all_ext,
|
|
775
|
+
skip_features=skip_features,
|
|
776
|
+
max_features=max_features,
|
|
777
|
+
use_arrow=use_arrow,
|
|
699
778
|
)
|
|
700
779
|
|
|
701
780
|
assert len(df) == len(expected)
|
|
@@ -706,6 +785,13 @@ def test_read_max_features(naturalearth_lowres_all_ext, use_arrow, max_features)
|
|
|
706
785
|
# In .geojsonl the vertices are reordered, so normalize
|
|
707
786
|
is_jsons = ext == ".geojsonl"
|
|
708
787
|
|
|
788
|
+
if len(expected) == 0 and not use_arrow:
|
|
789
|
+
# for pandas >= 3, the column has string dtype but when reading it as
|
|
790
|
+
# empty result, it gets inferred as object dtype
|
|
791
|
+
expected["continent"] = expected["continent"].astype("object")
|
|
792
|
+
expected["name"] = expected["name"].astype("object")
|
|
793
|
+
expected["iso_a3"] = expected["iso_a3"].astype("object")
|
|
794
|
+
|
|
709
795
|
assert_geodataframe_equal(
|
|
710
796
|
df,
|
|
711
797
|
expected,
|
|
@@ -943,9 +1029,20 @@ def test_read_sql_dialect_sqlite_gpkg(naturalearth_lowres, use_arrow):
|
|
|
943
1029
|
assert df.iloc[0].geometry.area > area_canada
|
|
944
1030
|
|
|
945
1031
|
|
|
946
|
-
@pytest.mark.parametrize(
|
|
947
|
-
|
|
948
|
-
|
|
1032
|
+
@pytest.mark.parametrize(
|
|
1033
|
+
"encoding, arrow",
|
|
1034
|
+
[
|
|
1035
|
+
("utf-8", False),
|
|
1036
|
+
pytest.param("utf-8", True, marks=requires_arrow_write_api),
|
|
1037
|
+
("cp1252", False),
|
|
1038
|
+
(None, False),
|
|
1039
|
+
],
|
|
1040
|
+
)
|
|
1041
|
+
def test_write_csv_encoding(tmp_path, encoding, arrow):
|
|
1042
|
+
"""Test if write_dataframe uses the default encoding correctly.
|
|
1043
|
+
|
|
1044
|
+
Arrow only supports utf-8 encoding.
|
|
1045
|
+
"""
|
|
949
1046
|
# Write csv test file. Depending on the os this will be written in a different
|
|
950
1047
|
# encoding: for linux and macos this is utf-8, for windows it is cp1252.
|
|
951
1048
|
csv_path = tmp_path / "test.csv"
|
|
@@ -958,7 +1055,7 @@ def test_write_csv_encoding(tmp_path, encoding):
|
|
|
958
1055
|
# same encoding as above.
|
|
959
1056
|
df = pd.DataFrame({"näme": ["Wilhelm Röntgen"], "city": ["Zürich"]})
|
|
960
1057
|
csv_pyogrio_path = tmp_path / "test_pyogrio.csv"
|
|
961
|
-
write_dataframe(df, csv_pyogrio_path, encoding=encoding)
|
|
1058
|
+
write_dataframe(df, csv_pyogrio_path, encoding=encoding, use_arrow=arrow)
|
|
962
1059
|
|
|
963
1060
|
# Check if the text files written both ways can be read again and give same result.
|
|
964
1061
|
with open(csv_path, encoding=encoding) as csv:
|
|
@@ -976,6 +1073,48 @@ def test_write_csv_encoding(tmp_path, encoding):
|
|
|
976
1073
|
assert csv_bytes == csv_pyogrio_bytes
|
|
977
1074
|
|
|
978
1075
|
|
|
1076
|
+
@pytest.mark.parametrize(
|
|
1077
|
+
"ext, fid_column, fid_param_value",
|
|
1078
|
+
[
|
|
1079
|
+
(".gpkg", "fid", None),
|
|
1080
|
+
(".gpkg", "FID", None),
|
|
1081
|
+
(".sqlite", "ogc_fid", None),
|
|
1082
|
+
(".gpkg", "fid_custom", "fid_custom"),
|
|
1083
|
+
(".gpkg", "FID_custom", "fid_custom"),
|
|
1084
|
+
(".sqlite", "ogc_fid_custom", "ogc_fid_custom"),
|
|
1085
|
+
],
|
|
1086
|
+
)
|
|
1087
|
+
@pytest.mark.requires_arrow_write_api
|
|
1088
|
+
def test_write_custom_fids(tmp_path, ext, fid_column, fid_param_value, use_arrow):
|
|
1089
|
+
"""Test to specify FIDs to save when writing to a file.
|
|
1090
|
+
|
|
1091
|
+
Saving custom FIDs is only supported for formats that actually store the FID, like
|
|
1092
|
+
e.g. GPKG and SQLite. The fid_column name check is case-insensitive.
|
|
1093
|
+
|
|
1094
|
+
Typically, GDAL supports using a custom FID column for these file formats via a
|
|
1095
|
+
`FID` layer creation option, which is also tested here. If `fid_param_value` is
|
|
1096
|
+
specified (not None), an `fid` parameter is passed to `write_dataframe`, causing
|
|
1097
|
+
GDAL to use the column name specified for the FID.
|
|
1098
|
+
"""
|
|
1099
|
+
input_gdf = gp.GeoDataFrame(
|
|
1100
|
+
{fid_column: [5]}, geometry=[shapely.Point(0, 0)], crs="epsg:4326"
|
|
1101
|
+
)
|
|
1102
|
+
kwargs = {}
|
|
1103
|
+
if fid_param_value is not None:
|
|
1104
|
+
kwargs["fid"] = fid_param_value
|
|
1105
|
+
path = tmp_path / f"test{ext}"
|
|
1106
|
+
|
|
1107
|
+
write_dataframe(input_gdf, path, use_arrow=use_arrow, **kwargs)
|
|
1108
|
+
|
|
1109
|
+
assert path.exists()
|
|
1110
|
+
output_gdf = read_dataframe(path, fid_as_index=True, use_arrow=use_arrow)
|
|
1111
|
+
output_gdf = output_gdf.reset_index()
|
|
1112
|
+
|
|
1113
|
+
# pyogrio always sets "fid" as index name with `fid_as_index`
|
|
1114
|
+
expected_gdf = input_gdf.rename(columns={fid_column: "fid"})
|
|
1115
|
+
assert_geodataframe_equal(output_gdf, expected_gdf)
|
|
1116
|
+
|
|
1117
|
+
|
|
979
1118
|
@pytest.mark.parametrize("ext", ALL_EXTS)
|
|
980
1119
|
@pytest.mark.requires_arrow_write_api
|
|
981
1120
|
def test_write_dataframe(tmp_path, naturalearth_lowres, ext, use_arrow):
|
|
@@ -1087,16 +1226,38 @@ def test_write_dataframe_index(tmp_path, naturalearth_lowres, use_arrow):
|
|
|
1087
1226
|
|
|
1088
1227
|
|
|
1089
1228
|
@pytest.mark.parametrize("ext", [ext for ext in ALL_EXTS if ext not in ".geojsonl"])
|
|
1229
|
+
@pytest.mark.parametrize(
|
|
1230
|
+
"columns, dtype",
|
|
1231
|
+
[
|
|
1232
|
+
([], None),
|
|
1233
|
+
(["col_int"], np.int64),
|
|
1234
|
+
(["col_float"], np.float64),
|
|
1235
|
+
(["col_object"], object),
|
|
1236
|
+
],
|
|
1237
|
+
)
|
|
1090
1238
|
@pytest.mark.requires_arrow_write_api
|
|
1091
|
-
def test_write_empty_dataframe(tmp_path, ext, use_arrow):
|
|
1092
|
-
|
|
1239
|
+
def test_write_empty_dataframe(tmp_path, ext, columns, dtype, use_arrow):
|
|
1240
|
+
"""Test writing dataframe with no rows.
|
|
1093
1241
|
|
|
1242
|
+
With use_arrow, object type columns with no rows are converted to null type columns
|
|
1243
|
+
by pyarrow, but null columns are not supported by GDAL. Added to test fix for #513.
|
|
1244
|
+
"""
|
|
1245
|
+
expected = gp.GeoDataFrame(geometry=[], columns=columns, dtype=dtype, crs=4326)
|
|
1094
1246
|
filename = tmp_path / f"test{ext}"
|
|
1095
1247
|
write_dataframe(expected, filename, use_arrow=use_arrow)
|
|
1096
1248
|
|
|
1097
1249
|
assert filename.exists()
|
|
1098
|
-
df = read_dataframe(filename)
|
|
1099
|
-
|
|
1250
|
+
df = read_dataframe(filename, use_arrow=use_arrow)
|
|
1251
|
+
|
|
1252
|
+
# Check result
|
|
1253
|
+
# For older pandas versions, the index is created as Object dtype but read as
|
|
1254
|
+
# RangeIndex, so don't check the index dtype in that case.
|
|
1255
|
+
check_index_type = True if PANDAS_GE_20 else False
|
|
1256
|
+
# with pandas 3+ and reading through arrow, we preserve the string dtype
|
|
1257
|
+
# (no proper dtype inference happens for the empty numpy object dtype arrays)
|
|
1258
|
+
if use_arrow and dtype is object:
|
|
1259
|
+
expected["col_object"] = expected["col_object"].astype("str")
|
|
1260
|
+
assert_geodataframe_equal(df, expected, check_index_type=check_index_type)
|
|
1100
1261
|
|
|
1101
1262
|
|
|
1102
1263
|
def test_write_empty_geometry(tmp_path):
|
|
@@ -1116,6 +1277,28 @@ def test_write_empty_geometry(tmp_path):
|
|
|
1116
1277
|
assert_geodataframe_equal(df, expected)
|
|
1117
1278
|
|
|
1118
1279
|
|
|
1280
|
+
@pytest.mark.requires_arrow_write_api
|
|
1281
|
+
def test_write_None_string_column(tmp_path, use_arrow):
|
|
1282
|
+
"""Test pandas object columns with all None values.
|
|
1283
|
+
|
|
1284
|
+
With use_arrow, such columns are converted to null type columns by pyarrow, but null
|
|
1285
|
+
columns are not supported by GDAL. Added to test fix for #513.
|
|
1286
|
+
"""
|
|
1287
|
+
gdf = gp.GeoDataFrame({"object_col": [None]}, geometry=[Point(0, 0)], crs=4326)
|
|
1288
|
+
filename = tmp_path / "test.gpkg"
|
|
1289
|
+
|
|
1290
|
+
write_dataframe(gdf, filename, use_arrow=use_arrow)
|
|
1291
|
+
assert filename.exists()
|
|
1292
|
+
|
|
1293
|
+
result_gdf = read_dataframe(filename, use_arrow=use_arrow)
|
|
1294
|
+
if PANDAS_GE_30 and use_arrow:
|
|
1295
|
+
assert result_gdf.object_col.dtype == "str"
|
|
1296
|
+
gdf["object_col"] = gdf["object_col"].astype("str")
|
|
1297
|
+
else:
|
|
1298
|
+
assert result_gdf.object_col.dtype == object
|
|
1299
|
+
assert_geodataframe_equal(result_gdf, gdf)
|
|
1300
|
+
|
|
1301
|
+
|
|
1119
1302
|
@pytest.mark.parametrize("ext", [".geojsonl", ".geojsons"])
|
|
1120
1303
|
@pytest.mark.requires_arrow_write_api
|
|
1121
1304
|
def test_write_read_empty_dataframe_unsupported(tmp_path, ext, use_arrow):
|
|
@@ -1521,6 +1704,30 @@ def test_custom_crs_io(tmp_path, naturalearth_lowres_all_ext, use_arrow):
|
|
|
1521
1704
|
assert df.crs.equals(expected.crs)
|
|
1522
1705
|
|
|
1523
1706
|
|
|
1707
|
+
@pytest.mark.parametrize("ext", [".gpkg.zip", ".shp.zip", ".shz"])
|
|
1708
|
+
@pytest.mark.requires_arrow_write_api
|
|
1709
|
+
def test_write_read_zipped_ext(tmp_path, naturalearth_lowres, ext, use_arrow):
|
|
1710
|
+
"""Run a basic read and write test on some extra (zipped) extensions."""
|
|
1711
|
+
if ext == ".gpkg.zip" and not GDAL_GE_37:
|
|
1712
|
+
pytest.skip(".gpkg.zip support requires GDAL >= 3.7")
|
|
1713
|
+
|
|
1714
|
+
input_gdf = read_dataframe(naturalearth_lowres)
|
|
1715
|
+
output_path = tmp_path / f"test{ext}"
|
|
1716
|
+
|
|
1717
|
+
write_dataframe(input_gdf, output_path, use_arrow=use_arrow)
|
|
1718
|
+
|
|
1719
|
+
assert output_path.exists()
|
|
1720
|
+
result_gdf = read_dataframe(output_path)
|
|
1721
|
+
|
|
1722
|
+
geometry_types = result_gdf.geometry.type.unique()
|
|
1723
|
+
if DRIVERS[ext] in DRIVERS_NO_MIXED_SINGLE_MULTI:
|
|
1724
|
+
assert list(geometry_types) == ["MultiPolygon"]
|
|
1725
|
+
else:
|
|
1726
|
+
assert set(geometry_types) == {"MultiPolygon", "Polygon"}
|
|
1727
|
+
|
|
1728
|
+
assert_geodataframe_equal(result_gdf, input_gdf, check_index_type=False)
|
|
1729
|
+
|
|
1730
|
+
|
|
1524
1731
|
def test_write_read_mixed_column_values(tmp_path):
|
|
1525
1732
|
# use_arrow=True is tested separately below
|
|
1526
1733
|
mixed_values = ["test", 1.0, 1, datetime.now(), None, np.nan]
|
|
@@ -1532,11 +1739,13 @@ def test_write_read_mixed_column_values(tmp_path):
|
|
|
1532
1739
|
write_dataframe(test_gdf, output_path)
|
|
1533
1740
|
output_gdf = read_dataframe(output_path)
|
|
1534
1741
|
assert len(test_gdf) == len(output_gdf)
|
|
1535
|
-
|
|
1536
|
-
|
|
1537
|
-
|
|
1538
|
-
else
|
|
1539
|
-
|
|
1742
|
+
# mixed values as object dtype are currently written as strings
|
|
1743
|
+
# (but preserving nulls)
|
|
1744
|
+
expected = pd.Series(
|
|
1745
|
+
[str(value) if value not in (None, np.nan) else None for value in mixed_values],
|
|
1746
|
+
name="mixed",
|
|
1747
|
+
)
|
|
1748
|
+
assert_series_equal(output_gdf["mixed"], expected)
|
|
1540
1749
|
|
|
1541
1750
|
|
|
1542
1751
|
@requires_arrow_write_api
|
|
@@ -1569,8 +1778,8 @@ def test_write_read_null(tmp_path, use_arrow):
|
|
|
1569
1778
|
assert pd.isna(result_gdf["float64"][1])
|
|
1570
1779
|
assert pd.isna(result_gdf["float64"][2])
|
|
1571
1780
|
assert result_gdf["object_str"][0] == "test"
|
|
1572
|
-
assert result_gdf["object_str"][1]
|
|
1573
|
-
assert result_gdf["object_str"][2]
|
|
1781
|
+
assert pd.isna(result_gdf["object_str"][1])
|
|
1782
|
+
assert pd.isna(result_gdf["object_str"][2])
|
|
1574
1783
|
|
|
1575
1784
|
|
|
1576
1785
|
@pytest.mark.requires_arrow_write_api
|
|
@@ -1714,23 +1923,29 @@ def test_write_geometry_z_types_auto(
|
|
|
1714
1923
|
|
|
1715
1924
|
|
|
1716
1925
|
@pytest.mark.parametrize(
|
|
1717
|
-
"on_invalid, message",
|
|
1926
|
+
"on_invalid, message, expected_wkt",
|
|
1718
1927
|
[
|
|
1719
1928
|
(
|
|
1720
1929
|
"warn",
|
|
1721
1930
|
"Invalid WKB: geometry is returned as None. IllegalArgumentException: "
|
|
1722
|
-
"
|
|
1931
|
+
"Points of LinearRing do not form a closed linestring",
|
|
1932
|
+
None,
|
|
1723
1933
|
),
|
|
1724
|
-
("raise", "
|
|
1725
|
-
("ignore", None),
|
|
1934
|
+
("raise", "Points of LinearRing do not form a closed linestring", None),
|
|
1935
|
+
("ignore", None, None),
|
|
1936
|
+
("fix", None, "POLYGON ((0 0, 0 1, 0 0))"),
|
|
1726
1937
|
],
|
|
1727
1938
|
)
|
|
1728
|
-
|
|
1939
|
+
@pytest.mark.filterwarnings("ignore:Non closed ring detected:RuntimeWarning")
|
|
1940
|
+
def test_read_invalid_poly_ring(tmp_path, use_arrow, on_invalid, message, expected_wkt):
|
|
1941
|
+
if on_invalid == "fix" and not SHAPELY_GE_21:
|
|
1942
|
+
pytest.skip("on_invalid=fix not available for Shapely < 2.1")
|
|
1943
|
+
|
|
1729
1944
|
if on_invalid == "raise":
|
|
1730
1945
|
handler = pytest.raises(shapely.errors.GEOSException, match=message)
|
|
1731
1946
|
elif on_invalid == "warn":
|
|
1732
1947
|
handler = pytest.warns(match=message)
|
|
1733
|
-
elif on_invalid
|
|
1948
|
+
elif on_invalid in ("fix", "ignore"):
|
|
1734
1949
|
handler = contextlib.nullcontext()
|
|
1735
1950
|
else:
|
|
1736
1951
|
raise ValueError(f"unknown value for on_invalid: {on_invalid}")
|
|
@@ -1744,7 +1959,7 @@ def test_read_invalid_poly_ring(tmp_path, use_arrow, on_invalid, message):
|
|
|
1744
1959
|
"properties": {},
|
|
1745
1960
|
"geometry": {
|
|
1746
1961
|
"type": "Polygon",
|
|
1747
|
-
"coordinates": [ [ [0, 0], [0,
|
|
1962
|
+
"coordinates": [ [ [0, 0], [0, 1] ] ]
|
|
1748
1963
|
}
|
|
1749
1964
|
}
|
|
1750
1965
|
]
|
|
@@ -1760,7 +1975,10 @@ def test_read_invalid_poly_ring(tmp_path, use_arrow, on_invalid, message):
|
|
|
1760
1975
|
use_arrow=use_arrow,
|
|
1761
1976
|
on_invalid=on_invalid,
|
|
1762
1977
|
)
|
|
1763
|
-
|
|
1978
|
+
if expected_wkt is None:
|
|
1979
|
+
assert df.geometry.iloc[0] is None
|
|
1980
|
+
else:
|
|
1981
|
+
assert df.geometry.iloc[0].wkt == expected_wkt
|
|
1764
1982
|
|
|
1765
1983
|
|
|
1766
1984
|
def test_read_multisurface(multisurface_file, use_arrow):
|
|
@@ -1792,6 +2010,10 @@ def test_read_dataset_kwargs(nested_geojson_file, use_arrow):
|
|
|
1792
2010
|
geometry=[shapely.Point(0, 0)],
|
|
1793
2011
|
crs="EPSG:4326",
|
|
1794
2012
|
)
|
|
2013
|
+
if GDAL_GE_311 and use_arrow:
|
|
2014
|
+
# GDAL 3.11 started to use json extension type, which is not yet handled
|
|
2015
|
+
# correctly in the arrow->pandas conversion (using object instead of str dtype)
|
|
2016
|
+
expected["intermediate_level"] = expected["intermediate_level"].astype(object)
|
|
1795
2017
|
|
|
1796
2018
|
assert_geodataframe_equal(df, expected)
|
|
1797
2019
|
|
|
@@ -1837,7 +2059,7 @@ def test_write_nullable_dtypes(tmp_path, use_arrow):
|
|
|
1837
2059
|
expected["col2"] = expected["col2"].astype("float64")
|
|
1838
2060
|
expected["col3"] = expected["col3"].astype("float32")
|
|
1839
2061
|
expected["col4"] = expected["col4"].astype("float64")
|
|
1840
|
-
expected["col5"] = expected["col5"].astype(
|
|
2062
|
+
expected["col5"] = expected["col5"].astype("str")
|
|
1841
2063
|
expected.loc[1, "col5"] = None # pandas converts to pd.NA on line above
|
|
1842
2064
|
assert_geodataframe_equal(output_gdf, expected)
|
|
1843
2065
|
|
|
@@ -2160,7 +2382,10 @@ def test_non_utf8_encoding_io_shapefile(tmp_path, encoded_text, use_arrow):
|
|
|
2160
2382
|
|
|
2161
2383
|
if use_arrow:
|
|
2162
2384
|
# pyarrow cannot decode column name with incorrect encoding
|
|
2163
|
-
with pytest.raises(
|
|
2385
|
+
with pytest.raises(
|
|
2386
|
+
DataSourceError,
|
|
2387
|
+
match="The file being read is not encoded in UTF-8; please use_arrow=False",
|
|
2388
|
+
):
|
|
2164
2389
|
read_dataframe(output_path, use_arrow=True)
|
|
2165
2390
|
else:
|
|
2166
2391
|
bad = read_dataframe(output_path, use_arrow=False)
|
|
@@ -2257,7 +2482,7 @@ def test_write_kml_file_coordinate_order(tmp_path, use_arrow):
|
|
|
2257
2482
|
if "LIBKML" in list_drivers():
|
|
2258
2483
|
# test appending to the existing file only if LIBKML is available
|
|
2259
2484
|
# as it appears to fall back on LIBKML driver when appending.
|
|
2260
|
-
points_append = [Point(
|
|
2485
|
+
points_append = [Point(7, 8), Point(9, 10), Point(11, 12)]
|
|
2261
2486
|
gdf_append = gp.GeoDataFrame(geometry=points_append, crs="EPSG:4326")
|
|
2262
2487
|
|
|
2263
2488
|
write_dataframe(
|
pyogrio/tests/test_path.py
CHANGED
|
@@ -33,10 +33,20 @@ def change_cwd(path):
|
|
|
33
33
|
[
|
|
34
34
|
# local file paths that should be passed through as is
|
|
35
35
|
("data.gpkg", "data.gpkg"),
|
|
36
|
+
("data.gpkg.zip", "data.gpkg.zip"),
|
|
37
|
+
("data.shp.zip", "data.shp.zip"),
|
|
36
38
|
(Path("data.gpkg"), "data.gpkg"),
|
|
39
|
+
(Path("data.gpkg.zip"), "data.gpkg.zip"),
|
|
40
|
+
(Path("data.shp.zip"), "data.shp.zip"),
|
|
37
41
|
("/home/user/data.gpkg", "/home/user/data.gpkg"),
|
|
42
|
+
("/home/user/data.gpkg.zip", "/home/user/data.gpkg.zip"),
|
|
43
|
+
("/home/user/data.shp.zip", "/home/user/data.shp.zip"),
|
|
38
44
|
(r"C:\User\Documents\data.gpkg", r"C:\User\Documents\data.gpkg"),
|
|
45
|
+
(r"C:\User\Documents\data.gpkg.zip", r"C:\User\Documents\data.gpkg.zip"),
|
|
46
|
+
(r"C:\User\Documents\data.shp.zip", r"C:\User\Documents\data.shp.zip"),
|
|
39
47
|
("file:///home/user/data.gpkg", "/home/user/data.gpkg"),
|
|
48
|
+
("file:///home/user/data.gpkg.zip", "/home/user/data.gpkg.zip"),
|
|
49
|
+
("file:///home/user/data.shp.zip", "/home/user/data.shp.zip"),
|
|
40
50
|
("/home/folder # with hash/data.gpkg", "/home/folder # with hash/data.gpkg"),
|
|
41
51
|
# cloud URIs
|
|
42
52
|
("https://testing/data.gpkg", "/vsicurl/https://testing/data.gpkg"),
|
pyogrio/tests/test_raw_io.py
CHANGED
|
@@ -17,7 +17,7 @@ from pyogrio import (
|
|
|
17
17
|
read_info,
|
|
18
18
|
set_gdal_config_options,
|
|
19
19
|
)
|
|
20
|
-
from pyogrio._compat import HAS_PYARROW, HAS_SHAPELY
|
|
20
|
+
from pyogrio._compat import GDAL_GE_37, HAS_PYARROW, HAS_SHAPELY
|
|
21
21
|
from pyogrio.errors import DataLayerError, DataSourceError, FeatureError
|
|
22
22
|
from pyogrio.raw import open_arrow, read, write
|
|
23
23
|
from pyogrio.tests.conftest import (
|
|
@@ -63,9 +63,10 @@ def test_read(naturalearth_lowres):
|
|
|
63
63
|
@pytest.mark.parametrize("ext", DRIVERS)
|
|
64
64
|
def test_read_autodetect_driver(tmp_path, naturalearth_lowres, ext):
|
|
65
65
|
# Test all supported autodetect drivers
|
|
66
|
+
if ext == ".gpkg.zip" and not GDAL_GE_37:
|
|
67
|
+
pytest.skip(".gpkg.zip not supported for gdal < 3.7.0")
|
|
66
68
|
testfile = prepare_testfile(naturalearth_lowres, dst_dir=tmp_path, ext=ext)
|
|
67
69
|
|
|
68
|
-
assert testfile.suffix == ext
|
|
69
70
|
assert testfile.exists()
|
|
70
71
|
meta, _, geometry, fields = read(testfile)
|
|
71
72
|
|
|
@@ -703,6 +704,9 @@ def test_write_append(tmp_path, naturalearth_lowres, ext):
|
|
|
703
704
|
if ext in (".geojsonl", ".geojsons") and __gdal_version__ < (3, 6, 0):
|
|
704
705
|
pytest.skip("Append to GeoJSONSeq only available for GDAL >= 3.6.0")
|
|
705
706
|
|
|
707
|
+
if ext == ".gpkg.zip":
|
|
708
|
+
pytest.skip("Append to .gpkg.zip is not supported")
|
|
709
|
+
|
|
706
710
|
meta, _, geometry, field_data = read(naturalearth_lowres)
|
|
707
711
|
|
|
708
712
|
# coerce output layer to MultiPolygon to avoid mixed type errors
|
pyogrio/util.py
CHANGED
|
@@ -9,6 +9,8 @@ from urllib.parse import urlparse
|
|
|
9
9
|
|
|
10
10
|
from pyogrio._vsi import vsimem_rmtree_toplevel as _vsimem_rmtree_toplevel
|
|
11
11
|
|
|
12
|
+
MULTI_EXTENSIONS = (".gpkg.zip", ".shp.zip")
|
|
13
|
+
|
|
12
14
|
|
|
13
15
|
def get_vsi_path_or_buffer(path_or_buffer):
|
|
14
16
|
"""Get VSI-prefixed path or bytes buffer depending on type of path_or_buffer.
|
|
@@ -68,15 +70,23 @@ def vsi_path(path: Union[str, Path]) -> str:
|
|
|
68
70
|
# Windows drive letters (e.g. "C:\") confuse `urlparse` as they look like
|
|
69
71
|
# URL schemes
|
|
70
72
|
if sys.platform == "win32" and re.match("^[a-zA-Z]\\:", path):
|
|
73
|
+
# If it is not a zip file or it is multi-extension zip file that is directly
|
|
74
|
+
# supported by a GDAL driver, return the path as is.
|
|
71
75
|
if not path.split("!")[0].endswith(".zip"):
|
|
72
76
|
return path
|
|
77
|
+
if path.split("!")[0].endswith(MULTI_EXTENSIONS):
|
|
78
|
+
return path
|
|
73
79
|
|
|
74
80
|
# prefix then allow to proceed with remaining parsing
|
|
75
81
|
path = f"zip://{path}"
|
|
76
82
|
|
|
77
83
|
path, archive, scheme = _parse_uri(path)
|
|
78
84
|
|
|
79
|
-
if
|
|
85
|
+
if (
|
|
86
|
+
scheme
|
|
87
|
+
or archive
|
|
88
|
+
or (path.endswith(".zip") and not path.endswith(MULTI_EXTENSIONS))
|
|
89
|
+
):
|
|
80
90
|
return _construct_vsi_path(path, archive, scheme)
|
|
81
91
|
|
|
82
92
|
return path
|
|
@@ -146,7 +156,10 @@ def _construct_vsi_path(path, archive, scheme) -> str:
|
|
|
146
156
|
suffix = ""
|
|
147
157
|
schemes = scheme.split("+")
|
|
148
158
|
|
|
149
|
-
if "zip" not in schemes and (
|
|
159
|
+
if "zip" not in schemes and (
|
|
160
|
+
archive.endswith(".zip")
|
|
161
|
+
or (path.endswith(".zip") and not path.endswith(MULTI_EXTENSIONS))
|
|
162
|
+
):
|
|
150
163
|
schemes.insert(0, "zip")
|
|
151
164
|
|
|
152
165
|
if schemes:
|