pyogrio 0.9.0__cp311-cp311-macosx_12_0_arm64.whl → 0.11.0__cp311-cp311-macosx_12_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyogrio might be problematic. Click here for more details.
- pyogrio/.dylibs/{libgdal.34.3.8.5.dylib → libgdal.36.3.10.3.dylib} +0 -0
- pyogrio/__init__.py +28 -21
- pyogrio/_compat.py +15 -1
- pyogrio/_env.py +4 -6
- pyogrio/_err.cpython-311-darwin.so +0 -0
- pyogrio/_geometry.cpython-311-darwin.so +0 -0
- pyogrio/_io.cpython-311-darwin.so +0 -0
- pyogrio/_ogr.cpython-311-darwin.so +0 -0
- pyogrio/_version.py +3 -3
- pyogrio/_vsi.cpython-311-darwin.so +0 -0
- pyogrio/core.py +86 -20
- pyogrio/errors.py +9 -16
- pyogrio/gdal_data/GDAL-targets-release.cmake +3 -3
- pyogrio/gdal_data/GDAL-targets.cmake +2 -2
- pyogrio/gdal_data/GDALConfig.cmake +0 -1
- pyogrio/gdal_data/GDALConfigVersion.cmake +3 -3
- pyogrio/gdal_data/MM_m_idofic.csv +321 -0
- pyogrio/gdal_data/gdalinfo_output.schema.json +3 -3
- pyogrio/gdal_data/gdaltileindex.xsd +253 -0
- pyogrio/gdal_data/gdalvrt.xsd +178 -63
- pyogrio/gdal_data/nitf_spec.xml +1 -17
- pyogrio/gdal_data/nitf_spec.xsd +1 -17
- pyogrio/gdal_data/ogrinfo_output.schema.json +23 -0
- pyogrio/gdal_data/ogrvrt.xsd +4 -17
- pyogrio/gdal_data/osmconf.ini +3 -1
- pyogrio/gdal_data/pci_datum.txt +222 -155
- pyogrio/gdal_data/pci_ellips.txt +90 -38
- pyogrio/gdal_data/pdfcomposition.xsd +1 -17
- pyogrio/gdal_data/vcpkg.spdx.json +29 -24
- pyogrio/gdal_data/vcpkg_abi_info.txt +31 -30
- pyogrio/gdal_data/vdv452.xml +1 -17
- pyogrio/gdal_data/vdv452.xsd +1 -17
- pyogrio/geopandas.py +122 -66
- pyogrio/proj_data/ITRF2014 +1 -1
- pyogrio/proj_data/ITRF2020 +91 -0
- pyogrio/proj_data/proj-config-version.cmake +2 -2
- pyogrio/proj_data/proj-config.cmake +1 -1
- pyogrio/proj_data/proj-targets.cmake +3 -3
- pyogrio/proj_data/proj.db +0 -0
- pyogrio/proj_data/proj.ini +11 -3
- pyogrio/proj_data/proj4-targets.cmake +3 -3
- pyogrio/proj_data/projjson.schema.json +1 -1
- pyogrio/proj_data/usage +7 -2
- pyogrio/proj_data/vcpkg.spdx.json +27 -22
- pyogrio/proj_data/vcpkg_abi_info.txt +18 -17
- pyogrio/raw.py +46 -30
- pyogrio/tests/conftest.py +214 -12
- pyogrio/tests/fixtures/README.md +32 -13
- pyogrio/tests/fixtures/curve.gpkg +0 -0
- pyogrio/tests/fixtures/{test_multisurface.gpkg → curvepolygon.gpkg} +0 -0
- pyogrio/tests/fixtures/line_zm.gpkg +0 -0
- pyogrio/tests/fixtures/multisurface.gpkg +0 -0
- pyogrio/tests/test_arrow.py +181 -24
- pyogrio/tests/test_core.py +170 -76
- pyogrio/tests/test_geopandas_io.py +483 -135
- pyogrio/tests/test_path.py +39 -17
- pyogrio/tests/test_raw_io.py +170 -55
- pyogrio/tests/test_util.py +56 -0
- pyogrio/util.py +69 -32
- pyogrio-0.11.0.dist-info/METADATA +124 -0
- {pyogrio-0.9.0.dist-info → pyogrio-0.11.0.dist-info}/RECORD +64 -78
- {pyogrio-0.9.0.dist-info → pyogrio-0.11.0.dist-info}/WHEEL +2 -1
- {pyogrio-0.9.0.dist-info → pyogrio-0.11.0.dist-info/licenses}/LICENSE +1 -1
- pyogrio/_err.pxd +0 -4
- pyogrio/_err.pyx +0 -250
- pyogrio/_geometry.pxd +0 -4
- pyogrio/_geometry.pyx +0 -129
- pyogrio/_io.pxd +0 -0
- pyogrio/_io.pyx +0 -2742
- pyogrio/_ogr.pxd +0 -444
- pyogrio/_ogr.pyx +0 -346
- pyogrio/_vsi.pxd +0 -4
- pyogrio/_vsi.pyx +0 -140
- pyogrio/arrow_bridge.h +0 -115
- pyogrio/gdal_data/bag_template.xml +0 -201
- pyogrio/gdal_data/gmlasconf.xml +0 -169
- pyogrio/gdal_data/gmlasconf.xsd +0 -1066
- pyogrio/gdal_data/netcdf_config.xsd +0 -143
- pyogrio/tests/fixtures/poly_not_enough_points.shp.zip +0 -0
- pyogrio/tests/fixtures/test_datetime.geojson +0 -7
- pyogrio/tests/fixtures/test_datetime_tz.geojson +0 -8
- pyogrio/tests/fixtures/test_fgdb.gdb.zip +0 -0
- pyogrio/tests/fixtures/test_nested.geojson +0 -18
- pyogrio/tests/fixtures/test_ogr_types_list.geojson +0 -12
- pyogrio-0.9.0.dist-info/METADATA +0 -100
- {pyogrio-0.9.0.dist-info → pyogrio-0.11.0.dist-info}/top_level.txt +0 -0
|
@@ -1,14 +1,33 @@
|
|
|
1
1
|
import contextlib
|
|
2
|
+
import locale
|
|
3
|
+
import warnings
|
|
2
4
|
from datetime import datetime
|
|
3
5
|
from io import BytesIO
|
|
4
|
-
import
|
|
6
|
+
from zipfile import ZipFile
|
|
5
7
|
|
|
6
8
|
import numpy as np
|
|
7
|
-
import pytest
|
|
8
9
|
|
|
9
|
-
from pyogrio import
|
|
10
|
+
from pyogrio import (
|
|
11
|
+
__gdal_version__,
|
|
12
|
+
list_drivers,
|
|
13
|
+
list_layers,
|
|
14
|
+
read_info,
|
|
15
|
+
set_gdal_config_options,
|
|
16
|
+
vsi_listtree,
|
|
17
|
+
vsi_unlink,
|
|
18
|
+
)
|
|
19
|
+
from pyogrio._compat import (
|
|
20
|
+
GDAL_GE_37,
|
|
21
|
+
GDAL_GE_311,
|
|
22
|
+
GDAL_GE_352,
|
|
23
|
+
HAS_ARROW_WRITE_API,
|
|
24
|
+
HAS_PYPROJ,
|
|
25
|
+
PANDAS_GE_15,
|
|
26
|
+
PANDAS_GE_30,
|
|
27
|
+
SHAPELY_GE_21,
|
|
28
|
+
)
|
|
10
29
|
from pyogrio.errors import DataLayerError, DataSourceError, FeatureError, GeometryError
|
|
11
|
-
from pyogrio.geopandas import read_dataframe, write_dataframe
|
|
30
|
+
from pyogrio.geopandas import PANDAS_GE_20, read_dataframe, write_dataframe
|
|
12
31
|
from pyogrio.raw import (
|
|
13
32
|
DRIVERS_NO_MIXED_DIMENSIONS,
|
|
14
33
|
DRIVERS_NO_MIXED_SINGLE_MULTI,
|
|
@@ -16,27 +35,29 @@ from pyogrio.raw import (
|
|
|
16
35
|
from pyogrio.tests.conftest import (
|
|
17
36
|
ALL_EXTS,
|
|
18
37
|
DRIVERS,
|
|
19
|
-
|
|
38
|
+
START_FID,
|
|
20
39
|
requires_arrow_write_api,
|
|
21
40
|
requires_gdal_geos,
|
|
41
|
+
requires_pyarrow_api,
|
|
42
|
+
requires_pyproj,
|
|
22
43
|
)
|
|
23
|
-
from pyogrio._compat import PANDAS_GE_15, HAS_ARROW_WRITE_API
|
|
24
44
|
|
|
25
|
-
|
|
26
|
-
import pandas as pd
|
|
27
|
-
from pandas.testing import (
|
|
28
|
-
assert_frame_equal,
|
|
29
|
-
assert_index_equal,
|
|
30
|
-
assert_series_equal,
|
|
31
|
-
)
|
|
45
|
+
import pytest
|
|
32
46
|
|
|
47
|
+
try:
|
|
33
48
|
import geopandas as gp
|
|
49
|
+
import pandas as pd
|
|
34
50
|
from geopandas.array import from_wkt
|
|
35
|
-
from geopandas.testing import assert_geodataframe_equal
|
|
36
51
|
|
|
37
52
|
import shapely # if geopandas is present, shapely is expected to be present
|
|
38
53
|
from shapely.geometry import Point
|
|
39
54
|
|
|
55
|
+
from geopandas.testing import assert_geodataframe_equal
|
|
56
|
+
from pandas.testing import (
|
|
57
|
+
assert_index_equal,
|
|
58
|
+
assert_series_equal,
|
|
59
|
+
)
|
|
60
|
+
|
|
40
61
|
except ImportError:
|
|
41
62
|
pass
|
|
42
63
|
|
|
@@ -82,8 +103,20 @@ def spatialite_available(path):
|
|
|
82
103
|
return False
|
|
83
104
|
|
|
84
105
|
|
|
85
|
-
@pytest.mark.parametrize(
|
|
86
|
-
|
|
106
|
+
@pytest.mark.parametrize(
|
|
107
|
+
"encoding, arrow",
|
|
108
|
+
[
|
|
109
|
+
("utf-8", False),
|
|
110
|
+
pytest.param("utf-8", True, marks=requires_pyarrow_api),
|
|
111
|
+
("cp1252", False),
|
|
112
|
+
(None, False),
|
|
113
|
+
],
|
|
114
|
+
)
|
|
115
|
+
def test_read_csv_encoding(tmp_path, encoding, arrow):
|
|
116
|
+
""" "Test reading CSV files with different encodings.
|
|
117
|
+
|
|
118
|
+
Arrow only supports utf-8 encoding.
|
|
119
|
+
"""
|
|
87
120
|
# Write csv test file. Depending on the os this will be written in a different
|
|
88
121
|
# encoding: for linux and macos this is utf-8, for windows it is cp1252.
|
|
89
122
|
csv_path = tmp_path / "test.csv"
|
|
@@ -94,7 +127,7 @@ def test_read_csv_encoding(tmp_path, encoding):
|
|
|
94
127
|
# Read csv. The data should be read with the same default encoding as the csv file
|
|
95
128
|
# was written in, but should have been converted to utf-8 in the dataframe returned.
|
|
96
129
|
# Hence, the asserts below, with strings in utf-8, be OK.
|
|
97
|
-
df = read_dataframe(csv_path, encoding=encoding)
|
|
130
|
+
df = read_dataframe(csv_path, encoding=encoding, use_arrow=arrow)
|
|
98
131
|
|
|
99
132
|
assert len(df) == 1
|
|
100
133
|
assert df.columns.tolist() == ["näme", "city"]
|
|
@@ -106,25 +139,36 @@ def test_read_csv_encoding(tmp_path, encoding):
|
|
|
106
139
|
locale.getpreferredencoding().upper() == "UTF-8",
|
|
107
140
|
reason="test requires non-UTF-8 default platform",
|
|
108
141
|
)
|
|
109
|
-
def test_read_csv_platform_encoding(tmp_path):
|
|
110
|
-
"""
|
|
142
|
+
def test_read_csv_platform_encoding(tmp_path, use_arrow):
|
|
143
|
+
"""Verify that read defaults to platform encoding; only works on Windows (CP1252).
|
|
144
|
+
|
|
145
|
+
When use_arrow=True, reading an non-UTF8 fails.
|
|
146
|
+
"""
|
|
111
147
|
csv_path = tmp_path / "test.csv"
|
|
112
148
|
with open(csv_path, "w", encoding=locale.getpreferredencoding()) as csv:
|
|
113
149
|
csv.write("näme,city\n")
|
|
114
150
|
csv.write("Wilhelm Röntgen,Zürich\n")
|
|
115
151
|
|
|
116
|
-
|
|
152
|
+
if use_arrow:
|
|
153
|
+
with pytest.raises(
|
|
154
|
+
DataSourceError,
|
|
155
|
+
match="; please use_arrow=False",
|
|
156
|
+
):
|
|
157
|
+
df = read_dataframe(csv_path, use_arrow=use_arrow)
|
|
158
|
+
else:
|
|
159
|
+
df = read_dataframe(csv_path, use_arrow=use_arrow)
|
|
117
160
|
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
161
|
+
assert len(df) == 1
|
|
162
|
+
assert df.columns.tolist() == ["näme", "city"]
|
|
163
|
+
assert df.city.tolist() == ["Zürich"]
|
|
164
|
+
assert df.näme.tolist() == ["Wilhelm Röntgen"]
|
|
122
165
|
|
|
123
166
|
|
|
124
167
|
def test_read_dataframe(naturalearth_lowres_all_ext):
|
|
125
168
|
df = read_dataframe(naturalearth_lowres_all_ext)
|
|
126
169
|
|
|
127
|
-
|
|
170
|
+
if HAS_PYPROJ:
|
|
171
|
+
assert df.crs == "EPSG:4326"
|
|
128
172
|
assert len(df) == 177
|
|
129
173
|
assert df.columns.tolist() == [
|
|
130
174
|
"pop_est",
|
|
@@ -142,14 +186,13 @@ def test_read_dataframe_vsi(naturalearth_lowres_vsi, use_arrow):
|
|
|
142
186
|
|
|
143
187
|
|
|
144
188
|
@pytest.mark.parametrize(
|
|
145
|
-
"columns, fid_as_index, exp_len", [(None, False,
|
|
189
|
+
"columns, fid_as_index, exp_len", [(None, False, 3), ([], True, 3), ([], False, 0)]
|
|
146
190
|
)
|
|
147
191
|
def test_read_layer_without_geometry(
|
|
148
|
-
|
|
192
|
+
no_geometry_file, columns, fid_as_index, use_arrow, exp_len
|
|
149
193
|
):
|
|
150
194
|
result = read_dataframe(
|
|
151
|
-
|
|
152
|
-
layer="basetable",
|
|
195
|
+
no_geometry_file,
|
|
153
196
|
columns=columns,
|
|
154
197
|
fid_as_index=fid_as_index,
|
|
155
198
|
use_arrow=use_arrow,
|
|
@@ -195,38 +238,85 @@ def test_read_no_geometry_no_columns_no_fids(naturalearth_lowres, use_arrow):
|
|
|
195
238
|
)
|
|
196
239
|
|
|
197
240
|
|
|
198
|
-
def test_read_force_2d(
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
241
|
+
def test_read_force_2d(tmp_path, use_arrow):
|
|
242
|
+
filename = tmp_path / "test.gpkg"
|
|
243
|
+
|
|
244
|
+
# create a GPKG with 3D point values
|
|
245
|
+
expected = gp.GeoDataFrame(
|
|
246
|
+
geometry=[Point(0, 0, 0), Point(1, 1, 0)], crs="EPSG:4326"
|
|
247
|
+
)
|
|
248
|
+
write_dataframe(expected, filename)
|
|
249
|
+
|
|
250
|
+
df = read_dataframe(filename)
|
|
251
|
+
assert df.iloc[0].geometry.has_z
|
|
252
|
+
|
|
253
|
+
df = read_dataframe(
|
|
254
|
+
filename,
|
|
255
|
+
force_2d=True,
|
|
256
|
+
max_features=1,
|
|
257
|
+
use_arrow=use_arrow,
|
|
258
|
+
)
|
|
259
|
+
assert not df.iloc[0].geometry.has_z
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
@pytest.mark.skipif(
|
|
263
|
+
not GDAL_GE_352,
|
|
264
|
+
reason="gdal >= 3.5.2 needed to use OGR_GEOJSON_MAX_OBJ_SIZE with a float value",
|
|
265
|
+
)
|
|
266
|
+
def test_read_geojson_error(naturalearth_lowres_geojson, use_arrow):
|
|
267
|
+
try:
|
|
268
|
+
set_gdal_config_options({"OGR_GEOJSON_MAX_OBJ_SIZE": 0.01})
|
|
269
|
+
with pytest.raises(
|
|
270
|
+
DataSourceError,
|
|
271
|
+
match="Failed to read GeoJSON data; .* GeoJSON object too complex",
|
|
272
|
+
):
|
|
273
|
+
read_dataframe(naturalearth_lowres_geojson, use_arrow=use_arrow)
|
|
274
|
+
finally:
|
|
275
|
+
set_gdal_config_options({"OGR_GEOJSON_MAX_OBJ_SIZE": None})
|
|
204
276
|
|
|
205
|
-
df = read_dataframe(
|
|
206
|
-
test_fgdb_vsi,
|
|
207
|
-
layer="test_lines",
|
|
208
|
-
force_2d=True,
|
|
209
|
-
max_features=1,
|
|
210
|
-
use_arrow=use_arrow,
|
|
211
|
-
)
|
|
212
|
-
assert not df.iloc[0].geometry.has_z
|
|
213
277
|
|
|
278
|
+
def test_read_layer(tmp_path, use_arrow):
|
|
279
|
+
filename = tmp_path / "test.gpkg"
|
|
214
280
|
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
281
|
+
# create a multilayer GPKG
|
|
282
|
+
expected1 = gp.GeoDataFrame(geometry=[Point(0, 0)], crs="EPSG:4326")
|
|
283
|
+
if use_arrow:
|
|
284
|
+
# TODO this needs to be fixed on the geopandas side (to ensure the
|
|
285
|
+
# GeoDataFrame() constructor does this), when use_arrow we already
|
|
286
|
+
# get columns Index with string dtype
|
|
287
|
+
expected1.columns = expected1.columns.astype("str")
|
|
288
|
+
write_dataframe(
|
|
289
|
+
expected1,
|
|
290
|
+
filename,
|
|
291
|
+
layer="layer1",
|
|
292
|
+
)
|
|
220
293
|
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
294
|
+
expected2 = gp.GeoDataFrame(geometry=[Point(1, 1)], crs="EPSG:4326")
|
|
295
|
+
if use_arrow:
|
|
296
|
+
expected2.columns = expected2.columns.astype("str")
|
|
297
|
+
write_dataframe(expected2, filename, layer="layer2", append=True)
|
|
225
298
|
|
|
226
|
-
|
|
299
|
+
assert np.array_equal(
|
|
300
|
+
list_layers(filename), [["layer1", "Point"], ["layer2", "Point"]]
|
|
301
|
+
)
|
|
302
|
+
|
|
303
|
+
kwargs = {"use_arrow": use_arrow, "max_features": 1}
|
|
304
|
+
|
|
305
|
+
# The first layer is read by default, which will warn when there are multiple
|
|
306
|
+
# layers
|
|
307
|
+
with pytest.warns(UserWarning, match="More than one layer found"):
|
|
308
|
+
df = read_dataframe(filename, **kwargs)
|
|
309
|
+
|
|
310
|
+
assert_geodataframe_equal(df, expected1)
|
|
311
|
+
|
|
312
|
+
# Reading a specific layer by name should return that layer.
|
|
227
313
|
# Detected here by a known column.
|
|
228
|
-
df = read_dataframe(
|
|
229
|
-
|
|
314
|
+
df = read_dataframe(filename, layer="layer2", **kwargs)
|
|
315
|
+
assert_geodataframe_equal(df, expected2)
|
|
316
|
+
|
|
317
|
+
# Reading a specific layer by index should return that layer
|
|
318
|
+
df = read_dataframe(filename, layer=1, **kwargs)
|
|
319
|
+
assert_geodataframe_equal(df, expected2)
|
|
230
320
|
|
|
231
321
|
|
|
232
322
|
def test_read_layer_invalid(naturalearth_lowres_all_ext, use_arrow):
|
|
@@ -234,22 +324,19 @@ def test_read_layer_invalid(naturalearth_lowres_all_ext, use_arrow):
|
|
|
234
324
|
read_dataframe(naturalearth_lowres_all_ext, layer="wrong", use_arrow=use_arrow)
|
|
235
325
|
|
|
236
326
|
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
df = read_dataframe(
|
|
240
|
-
test_fgdb_vsi, layer="test_lines", use_arrow=use_arrow, max_features=1
|
|
241
|
-
)
|
|
327
|
+
def test_read_datetime(datetime_file, use_arrow):
|
|
328
|
+
df = read_dataframe(datetime_file, use_arrow=use_arrow)
|
|
242
329
|
if PANDAS_GE_20:
|
|
243
330
|
# starting with pandas 2.0, it preserves the passed datetime resolution
|
|
244
|
-
assert df.
|
|
331
|
+
assert df.col.dtype.name == "datetime64[ms]"
|
|
245
332
|
else:
|
|
246
|
-
assert df.
|
|
333
|
+
assert df.col.dtype.name == "datetime64[ns]"
|
|
247
334
|
|
|
248
335
|
|
|
249
336
|
@pytest.mark.filterwarnings("ignore: Non-conformant content for record 1 in column ")
|
|
250
337
|
@pytest.mark.requires_arrow_write_api
|
|
251
|
-
def test_read_datetime_tz(
|
|
252
|
-
df = read_dataframe(
|
|
338
|
+
def test_read_datetime_tz(datetime_tz_file, tmp_path, use_arrow):
|
|
339
|
+
df = read_dataframe(datetime_tz_file)
|
|
253
340
|
# Make the index non-consecutive to test this case as well. Added for issue
|
|
254
341
|
# https://github.com/geopandas/pyogrio/issues/324
|
|
255
342
|
df = df.set_index(np.array([0, 2]))
|
|
@@ -319,14 +406,17 @@ def test_read_write_datetime_tz_with_nulls(tmp_path, use_arrow):
|
|
|
319
406
|
assert_geodataframe_equal(df, result)
|
|
320
407
|
|
|
321
408
|
|
|
322
|
-
def test_read_null_values(
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
409
|
+
def test_read_null_values(tmp_path, use_arrow):
|
|
410
|
+
filename = tmp_path / "test_null_values_no_geometry.gpkg"
|
|
411
|
+
|
|
412
|
+
# create a GPKG with no geometries and only null values
|
|
413
|
+
expected = pd.DataFrame({"col": [None, None]})
|
|
414
|
+
write_dataframe(expected, filename)
|
|
415
|
+
|
|
416
|
+
df = read_dataframe(filename, use_arrow=use_arrow, read_geometry=False)
|
|
326
417
|
|
|
327
418
|
# make sure that Null values are preserved
|
|
328
|
-
assert df.
|
|
329
|
-
assert df.loc[df.SEGMENT_NAME.isnull()].SEGMENT_NAME.iloc[0] is None
|
|
419
|
+
assert df["col"].isna().all()
|
|
330
420
|
|
|
331
421
|
|
|
332
422
|
def test_read_fid_as_index(naturalearth_lowres_all_ext, use_arrow):
|
|
@@ -344,12 +434,9 @@ def test_read_fid_as_index(naturalearth_lowres_all_ext, use_arrow):
|
|
|
344
434
|
fid_as_index=True,
|
|
345
435
|
**kwargs,
|
|
346
436
|
)
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
else:
|
|
351
|
-
# File format where fid starts at 0
|
|
352
|
-
assert_index_equal(df.index, pd.Index([2, 3], name="fid"))
|
|
437
|
+
fids_expected = pd.Index([2, 3], name="fid")
|
|
438
|
+
fids_expected += START_FID[naturalearth_lowres_all_ext.suffix]
|
|
439
|
+
assert_index_equal(df.index, fids_expected)
|
|
353
440
|
|
|
354
441
|
|
|
355
442
|
def test_read_fid_as_index_only(naturalearth_lowres, use_arrow):
|
|
@@ -406,10 +493,17 @@ def test_read_where_invalid(request, naturalearth_lowres_all_ext, use_arrow):
|
|
|
406
493
|
if use_arrow and naturalearth_lowres_all_ext.suffix == ".gpkg":
|
|
407
494
|
# https://github.com/OSGeo/gdal/issues/8492
|
|
408
495
|
request.node.add_marker(pytest.mark.xfail(reason="GDAL doesn't error for GPGK"))
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
496
|
+
|
|
497
|
+
if naturalearth_lowres_all_ext.suffix == ".gpkg" and __gdal_version__ >= (3, 11, 0):
|
|
498
|
+
with pytest.raises(DataLayerError, match="no such column"):
|
|
499
|
+
read_dataframe(
|
|
500
|
+
naturalearth_lowres_all_ext, use_arrow=use_arrow, where="invalid"
|
|
501
|
+
)
|
|
502
|
+
else:
|
|
503
|
+
with pytest.raises(ValueError, match="Invalid SQL"):
|
|
504
|
+
read_dataframe(
|
|
505
|
+
naturalearth_lowres_all_ext, use_arrow=use_arrow, where="invalid"
|
|
506
|
+
)
|
|
413
507
|
|
|
414
508
|
|
|
415
509
|
def test_read_where_ignored_field(naturalearth_lowres, use_arrow):
|
|
@@ -605,17 +699,22 @@ def test_read_fids_arrow_warning_old_gdal(naturalearth_lowres_all_ext):
|
|
|
605
699
|
assert len(df) == 1
|
|
606
700
|
|
|
607
701
|
|
|
608
|
-
def test_read_fids_force_2d(
|
|
609
|
-
|
|
610
|
-
UserWarning, match=r"Measured \(M\) geometry types are not supported"
|
|
611
|
-
):
|
|
612
|
-
df = read_dataframe(test_fgdb_vsi, layer="test_lines", fids=[22])
|
|
613
|
-
assert len(df) == 1
|
|
614
|
-
assert df.iloc[0].geometry.has_z
|
|
702
|
+
def test_read_fids_force_2d(tmp_path):
|
|
703
|
+
filename = tmp_path / "test.gpkg"
|
|
615
704
|
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
705
|
+
# create a GPKG with 3D point values
|
|
706
|
+
expected = gp.GeoDataFrame(
|
|
707
|
+
geometry=[Point(0, 0, 0), Point(1, 1, 0)], crs="EPSG:4326"
|
|
708
|
+
)
|
|
709
|
+
write_dataframe(expected, filename)
|
|
710
|
+
|
|
711
|
+
df = read_dataframe(filename, fids=[1])
|
|
712
|
+
assert_geodataframe_equal(df, expected.iloc[:1])
|
|
713
|
+
|
|
714
|
+
df = read_dataframe(filename, force_2d=True, fids=[1])
|
|
715
|
+
assert np.array_equal(
|
|
716
|
+
df.geometry.values, shapely.force_2d(expected.iloc[:1].geometry.values)
|
|
717
|
+
)
|
|
619
718
|
|
|
620
719
|
|
|
621
720
|
@pytest.mark.parametrize("skip_features", [10, 200])
|
|
@@ -638,6 +737,13 @@ def test_read_skip_features(naturalearth_lowres_all_ext, use_arrow, skip_feature
|
|
|
638
737
|
# In .geojsonl the vertices are reordered, so normalize
|
|
639
738
|
is_jsons = ext == ".geojsonl"
|
|
640
739
|
|
|
740
|
+
if skip_features == 200 and not use_arrow:
|
|
741
|
+
# result is an empty dataframe, so no proper dtype inference happens
|
|
742
|
+
# for the numpy object dtype arrays
|
|
743
|
+
df[["continent", "name", "iso_a3"]] = df[
|
|
744
|
+
["continent", "name", "iso_a3"]
|
|
745
|
+
].astype("str")
|
|
746
|
+
|
|
641
747
|
assert_geodataframe_equal(
|
|
642
748
|
df,
|
|
643
749
|
expected,
|
|
@@ -769,7 +875,7 @@ def test_read_sql_invalid(naturalearth_lowres_all_ext, use_arrow):
|
|
|
769
875
|
)
|
|
770
876
|
|
|
771
877
|
with pytest.raises(
|
|
772
|
-
ValueError, match="'sql'
|
|
878
|
+
ValueError, match="'sql' parameter cannot be combined with 'layer'"
|
|
773
879
|
):
|
|
774
880
|
read_dataframe(
|
|
775
881
|
naturalearth_lowres_all_ext,
|
|
@@ -906,9 +1012,20 @@ def test_read_sql_dialect_sqlite_gpkg(naturalearth_lowres, use_arrow):
|
|
|
906
1012
|
assert df.iloc[0].geometry.area > area_canada
|
|
907
1013
|
|
|
908
1014
|
|
|
909
|
-
@pytest.mark.parametrize(
|
|
910
|
-
|
|
911
|
-
|
|
1015
|
+
@pytest.mark.parametrize(
|
|
1016
|
+
"encoding, arrow",
|
|
1017
|
+
[
|
|
1018
|
+
("utf-8", False),
|
|
1019
|
+
pytest.param("utf-8", True, marks=requires_arrow_write_api),
|
|
1020
|
+
("cp1252", False),
|
|
1021
|
+
(None, False),
|
|
1022
|
+
],
|
|
1023
|
+
)
|
|
1024
|
+
def test_write_csv_encoding(tmp_path, encoding, arrow):
|
|
1025
|
+
"""Test if write_dataframe uses the default encoding correctly.
|
|
1026
|
+
|
|
1027
|
+
Arrow only supports utf-8 encoding.
|
|
1028
|
+
"""
|
|
912
1029
|
# Write csv test file. Depending on the os this will be written in a different
|
|
913
1030
|
# encoding: for linux and macos this is utf-8, for windows it is cp1252.
|
|
914
1031
|
csv_path = tmp_path / "test.csv"
|
|
@@ -921,12 +1038,12 @@ def test_write_csv_encoding(tmp_path, encoding):
|
|
|
921
1038
|
# same encoding as above.
|
|
922
1039
|
df = pd.DataFrame({"näme": ["Wilhelm Röntgen"], "city": ["Zürich"]})
|
|
923
1040
|
csv_pyogrio_path = tmp_path / "test_pyogrio.csv"
|
|
924
|
-
write_dataframe(df, csv_pyogrio_path, encoding=encoding)
|
|
1041
|
+
write_dataframe(df, csv_pyogrio_path, encoding=encoding, use_arrow=arrow)
|
|
925
1042
|
|
|
926
1043
|
# Check if the text files written both ways can be read again and give same result.
|
|
927
|
-
with open(csv_path,
|
|
1044
|
+
with open(csv_path, encoding=encoding) as csv:
|
|
928
1045
|
csv_str = csv.read()
|
|
929
|
-
with open(csv_pyogrio_path,
|
|
1046
|
+
with open(csv_pyogrio_path, encoding=encoding) as csv_pyogrio:
|
|
930
1047
|
csv_pyogrio_str = csv_pyogrio.read()
|
|
931
1048
|
assert csv_str == csv_pyogrio_str
|
|
932
1049
|
|
|
@@ -939,6 +1056,48 @@ def test_write_csv_encoding(tmp_path, encoding):
|
|
|
939
1056
|
assert csv_bytes == csv_pyogrio_bytes
|
|
940
1057
|
|
|
941
1058
|
|
|
1059
|
+
@pytest.mark.parametrize(
|
|
1060
|
+
"ext, fid_column, fid_param_value",
|
|
1061
|
+
[
|
|
1062
|
+
(".gpkg", "fid", None),
|
|
1063
|
+
(".gpkg", "FID", None),
|
|
1064
|
+
(".sqlite", "ogc_fid", None),
|
|
1065
|
+
(".gpkg", "fid_custom", "fid_custom"),
|
|
1066
|
+
(".gpkg", "FID_custom", "fid_custom"),
|
|
1067
|
+
(".sqlite", "ogc_fid_custom", "ogc_fid_custom"),
|
|
1068
|
+
],
|
|
1069
|
+
)
|
|
1070
|
+
@pytest.mark.requires_arrow_write_api
|
|
1071
|
+
def test_write_custom_fids(tmp_path, ext, fid_column, fid_param_value, use_arrow):
|
|
1072
|
+
"""Test to specify FIDs to save when writing to a file.
|
|
1073
|
+
|
|
1074
|
+
Saving custom FIDs is only supported for formats that actually store the FID, like
|
|
1075
|
+
e.g. GPKG and SQLite. The fid_column name check is case-insensitive.
|
|
1076
|
+
|
|
1077
|
+
Typically, GDAL supports using a custom FID column for these file formats via a
|
|
1078
|
+
`FID` layer creation option, which is also tested here. If `fid_param_value` is
|
|
1079
|
+
specified (not None), an `fid` parameter is passed to `write_dataframe`, causing
|
|
1080
|
+
GDAL to use the column name specified for the FID.
|
|
1081
|
+
"""
|
|
1082
|
+
input_gdf = gp.GeoDataFrame(
|
|
1083
|
+
{fid_column: [5]}, geometry=[shapely.Point(0, 0)], crs="epsg:4326"
|
|
1084
|
+
)
|
|
1085
|
+
kwargs = {}
|
|
1086
|
+
if fid_param_value is not None:
|
|
1087
|
+
kwargs["fid"] = fid_param_value
|
|
1088
|
+
path = tmp_path / f"test{ext}"
|
|
1089
|
+
|
|
1090
|
+
write_dataframe(input_gdf, path, use_arrow=use_arrow, **kwargs)
|
|
1091
|
+
|
|
1092
|
+
assert path.exists()
|
|
1093
|
+
output_gdf = read_dataframe(path, fid_as_index=True, use_arrow=use_arrow)
|
|
1094
|
+
output_gdf = output_gdf.reset_index()
|
|
1095
|
+
|
|
1096
|
+
# pyogrio always sets "fid" as index name with `fid_as_index`
|
|
1097
|
+
expected_gdf = input_gdf.rename(columns={fid_column: "fid"})
|
|
1098
|
+
assert_geodataframe_equal(output_gdf, expected_gdf)
|
|
1099
|
+
|
|
1100
|
+
|
|
942
1101
|
@pytest.mark.parametrize("ext", ALL_EXTS)
|
|
943
1102
|
@pytest.mark.requires_arrow_write_api
|
|
944
1103
|
def test_write_dataframe(tmp_path, naturalearth_lowres, ext, use_arrow):
|
|
@@ -960,7 +1119,7 @@ def test_write_dataframe(tmp_path, naturalearth_lowres, ext, use_arrow):
|
|
|
960
1119
|
if DRIVERS[ext] in DRIVERS_NO_MIXED_SINGLE_MULTI:
|
|
961
1120
|
assert list(geometry_types) == ["MultiPolygon"]
|
|
962
1121
|
else:
|
|
963
|
-
assert set(geometry_types) ==
|
|
1122
|
+
assert set(geometry_types) == {"MultiPolygon", "Polygon"}
|
|
964
1123
|
|
|
965
1124
|
# Coordinates are not precisely equal when written to JSON
|
|
966
1125
|
# dtypes do not necessarily round-trip precisely through JSON
|
|
@@ -1050,18 +1209,79 @@ def test_write_dataframe_index(tmp_path, naturalearth_lowres, use_arrow):
|
|
|
1050
1209
|
|
|
1051
1210
|
|
|
1052
1211
|
@pytest.mark.parametrize("ext", [ext for ext in ALL_EXTS if ext not in ".geojsonl"])
|
|
1212
|
+
@pytest.mark.parametrize(
|
|
1213
|
+
"columns, dtype",
|
|
1214
|
+
[
|
|
1215
|
+
([], None),
|
|
1216
|
+
(["col_int"], np.int64),
|
|
1217
|
+
(["col_float"], np.float64),
|
|
1218
|
+
(["col_object"], object),
|
|
1219
|
+
],
|
|
1220
|
+
)
|
|
1053
1221
|
@pytest.mark.requires_arrow_write_api
|
|
1054
|
-
def test_write_empty_dataframe(tmp_path, ext, use_arrow):
|
|
1055
|
-
|
|
1222
|
+
def test_write_empty_dataframe(tmp_path, ext, columns, dtype, use_arrow):
|
|
1223
|
+
"""Test writing dataframe with no rows.
|
|
1056
1224
|
|
|
1225
|
+
With use_arrow, object type columns with no rows are converted to null type columns
|
|
1226
|
+
by pyarrow, but null columns are not supported by GDAL. Added to test fix for #513.
|
|
1227
|
+
"""
|
|
1228
|
+
expected = gp.GeoDataFrame(geometry=[], columns=columns, dtype=dtype, crs=4326)
|
|
1057
1229
|
filename = tmp_path / f"test{ext}"
|
|
1058
1230
|
write_dataframe(expected, filename, use_arrow=use_arrow)
|
|
1059
1231
|
|
|
1060
1232
|
assert filename.exists()
|
|
1233
|
+
df = read_dataframe(filename, use_arrow=use_arrow)
|
|
1234
|
+
|
|
1235
|
+
# Check result
|
|
1236
|
+
# For older pandas versions, the index is created as Object dtype but read as
|
|
1237
|
+
# RangeIndex, so don't check the index dtype in that case.
|
|
1238
|
+
check_index_type = True if PANDAS_GE_20 else False
|
|
1239
|
+
# with pandas 3+ and reading through arrow, we preserve the string dtype
|
|
1240
|
+
# (no proper dtype inference happens for the empty numpy object dtype arrays)
|
|
1241
|
+
if use_arrow and dtype is object:
|
|
1242
|
+
expected["col_object"] = expected["col_object"].astype("str")
|
|
1243
|
+
assert_geodataframe_equal(df, expected, check_index_type=check_index_type)
|
|
1244
|
+
|
|
1245
|
+
|
|
1246
|
+
def test_write_empty_geometry(tmp_path):
|
|
1247
|
+
expected = gp.GeoDataFrame({"x": [0]}, geometry=from_wkt(["POINT EMPTY"]), crs=4326)
|
|
1248
|
+
filename = tmp_path / "test.gpkg"
|
|
1249
|
+
|
|
1250
|
+
# Check that no warning is raised with GeoSeries.notna()
|
|
1251
|
+
with warnings.catch_warnings():
|
|
1252
|
+
warnings.simplefilter("error", UserWarning)
|
|
1253
|
+
if not HAS_PYPROJ:
|
|
1254
|
+
warnings.filterwarnings("ignore", message="'crs' was not provided.")
|
|
1255
|
+
write_dataframe(expected, filename)
|
|
1256
|
+
assert filename.exists()
|
|
1257
|
+
|
|
1258
|
+
# Xref GH-436: round-tripping possible with GPKG but not others
|
|
1061
1259
|
df = read_dataframe(filename)
|
|
1062
1260
|
assert_geodataframe_equal(df, expected)
|
|
1063
1261
|
|
|
1064
1262
|
|
|
1263
|
+
@pytest.mark.requires_arrow_write_api
|
|
1264
|
+
def test_write_None_string_column(tmp_path, use_arrow):
|
|
1265
|
+
"""Test pandas object columns with all None values.
|
|
1266
|
+
|
|
1267
|
+
With use_arrow, such columns are converted to null type columns by pyarrow, but null
|
|
1268
|
+
columns are not supported by GDAL. Added to test fix for #513.
|
|
1269
|
+
"""
|
|
1270
|
+
gdf = gp.GeoDataFrame({"object_col": [None]}, geometry=[Point(0, 0)], crs=4326)
|
|
1271
|
+
filename = tmp_path / "test.gpkg"
|
|
1272
|
+
|
|
1273
|
+
write_dataframe(gdf, filename, use_arrow=use_arrow)
|
|
1274
|
+
assert filename.exists()
|
|
1275
|
+
|
|
1276
|
+
result_gdf = read_dataframe(filename, use_arrow=use_arrow)
|
|
1277
|
+
if PANDAS_GE_30 and use_arrow:
|
|
1278
|
+
assert result_gdf.object_col.dtype == "str"
|
|
1279
|
+
gdf["object_col"] = gdf["object_col"].astype("str")
|
|
1280
|
+
else:
|
|
1281
|
+
assert result_gdf.object_col.dtype == object
|
|
1282
|
+
assert_geodataframe_equal(result_gdf, gdf)
|
|
1283
|
+
|
|
1284
|
+
|
|
1065
1285
|
@pytest.mark.parametrize("ext", [".geojsonl", ".geojsons"])
|
|
1066
1286
|
@pytest.mark.requires_arrow_write_api
|
|
1067
1287
|
def test_write_read_empty_dataframe_unsupported(tmp_path, ext, use_arrow):
|
|
@@ -1161,7 +1381,7 @@ def test_write_dataframe_gdal_options(
|
|
|
1161
1381
|
df,
|
|
1162
1382
|
outfilename2,
|
|
1163
1383
|
use_arrow=use_arrow,
|
|
1164
|
-
layer_options=
|
|
1384
|
+
layer_options={"spatial_index": spatial_index},
|
|
1165
1385
|
)
|
|
1166
1386
|
assert outfilename2.exists() is True
|
|
1167
1387
|
index_filename2 = tmp_path / "test2.qix"
|
|
@@ -1207,7 +1427,7 @@ def test_write_dataframe_gdal_options_dataset(tmp_path, naturalearth_lowres, use
|
|
|
1207
1427
|
df,
|
|
1208
1428
|
test_no_contents_filename2,
|
|
1209
1429
|
use_arrow=use_arrow,
|
|
1210
|
-
dataset_options=
|
|
1430
|
+
dataset_options={"add_gpkg_ogr_contents": False},
|
|
1211
1431
|
)
|
|
1212
1432
|
assert "gpkg_ogr_contents" not in _get_gpkg_table_names(test_no_contents_filename2)
|
|
1213
1433
|
|
|
@@ -1320,7 +1540,8 @@ def test_write_dataframe_promote_to_multi_layer_geom_type(
|
|
|
1320
1540
|
".shp",
|
|
1321
1541
|
None,
|
|
1322
1542
|
"Point",
|
|
1323
|
-
"Could not add feature to layer at index|Error while writing batch to OGR
|
|
1543
|
+
"Could not add feature to layer at index|Error while writing batch to OGR "
|
|
1544
|
+
"layer",
|
|
1324
1545
|
),
|
|
1325
1546
|
],
|
|
1326
1547
|
)
|
|
@@ -1443,6 +1664,7 @@ def test_write_dataframe_infer_geometry_with_nulls(tmp_path, geoms, ext, use_arr
|
|
|
1443
1664
|
"ignore: You will likely lose important projection information"
|
|
1444
1665
|
)
|
|
1445
1666
|
@pytest.mark.requires_arrow_write_api
|
|
1667
|
+
@requires_pyproj
|
|
1446
1668
|
def test_custom_crs_io(tmp_path, naturalearth_lowres_all_ext, use_arrow):
|
|
1447
1669
|
df = read_dataframe(naturalearth_lowres_all_ext)
|
|
1448
1670
|
# project Belgium to a custom Albers Equal Area projection
|
|
@@ -1465,6 +1687,30 @@ def test_custom_crs_io(tmp_path, naturalearth_lowres_all_ext, use_arrow):
|
|
|
1465
1687
|
assert df.crs.equals(expected.crs)
|
|
1466
1688
|
|
|
1467
1689
|
|
|
1690
|
+
@pytest.mark.parametrize("ext", [".gpkg.zip", ".shp.zip", ".shz"])
|
|
1691
|
+
@pytest.mark.requires_arrow_write_api
|
|
1692
|
+
def test_write_read_zipped_ext(tmp_path, naturalearth_lowres, ext, use_arrow):
|
|
1693
|
+
"""Run a basic read and write test on some extra (zipped) extensions."""
|
|
1694
|
+
if ext == ".gpkg.zip" and not GDAL_GE_37:
|
|
1695
|
+
pytest.skip(".gpkg.zip support requires GDAL >= 3.7")
|
|
1696
|
+
|
|
1697
|
+
input_gdf = read_dataframe(naturalearth_lowres)
|
|
1698
|
+
output_path = tmp_path / f"test{ext}"
|
|
1699
|
+
|
|
1700
|
+
write_dataframe(input_gdf, output_path, use_arrow=use_arrow)
|
|
1701
|
+
|
|
1702
|
+
assert output_path.exists()
|
|
1703
|
+
result_gdf = read_dataframe(output_path)
|
|
1704
|
+
|
|
1705
|
+
geometry_types = result_gdf.geometry.type.unique()
|
|
1706
|
+
if DRIVERS[ext] in DRIVERS_NO_MIXED_SINGLE_MULTI:
|
|
1707
|
+
assert list(geometry_types) == ["MultiPolygon"]
|
|
1708
|
+
else:
|
|
1709
|
+
assert set(geometry_types) == {"MultiPolygon", "Polygon"}
|
|
1710
|
+
|
|
1711
|
+
assert_geodataframe_equal(result_gdf, input_gdf, check_index_type=False)
|
|
1712
|
+
|
|
1713
|
+
|
|
1468
1714
|
def test_write_read_mixed_column_values(tmp_path):
|
|
1469
1715
|
# use_arrow=True is tested separately below
|
|
1470
1716
|
mixed_values = ["test", 1.0, 1, datetime.now(), None, np.nan]
|
|
@@ -1476,11 +1722,13 @@ def test_write_read_mixed_column_values(tmp_path):
|
|
|
1476
1722
|
write_dataframe(test_gdf, output_path)
|
|
1477
1723
|
output_gdf = read_dataframe(output_path)
|
|
1478
1724
|
assert len(test_gdf) == len(output_gdf)
|
|
1479
|
-
|
|
1480
|
-
|
|
1481
|
-
|
|
1482
|
-
else
|
|
1483
|
-
|
|
1725
|
+
# mixed values as object dtype are currently written as strings
|
|
1726
|
+
# (but preserving nulls)
|
|
1727
|
+
expected = pd.Series(
|
|
1728
|
+
[str(value) if value not in (None, np.nan) else None for value in mixed_values],
|
|
1729
|
+
name="mixed",
|
|
1730
|
+
)
|
|
1731
|
+
assert_series_equal(output_gdf["mixed"], expected)
|
|
1484
1732
|
|
|
1485
1733
|
|
|
1486
1734
|
@requires_arrow_write_api
|
|
@@ -1513,8 +1761,24 @@ def test_write_read_null(tmp_path, use_arrow):
|
|
|
1513
1761
|
assert pd.isna(result_gdf["float64"][1])
|
|
1514
1762
|
assert pd.isna(result_gdf["float64"][2])
|
|
1515
1763
|
assert result_gdf["object_str"][0] == "test"
|
|
1516
|
-
assert result_gdf["object_str"][1]
|
|
1517
|
-
assert result_gdf["object_str"][2]
|
|
1764
|
+
assert pd.isna(result_gdf["object_str"][1])
|
|
1765
|
+
assert pd.isna(result_gdf["object_str"][2])
|
|
1766
|
+
|
|
1767
|
+
|
|
1768
|
+
@pytest.mark.requires_arrow_write_api
|
|
1769
|
+
def test_write_read_vsimem(naturalearth_lowres_vsi, use_arrow):
|
|
1770
|
+
path, _ = naturalearth_lowres_vsi
|
|
1771
|
+
mem_path = f"/vsimem/{path.name}"
|
|
1772
|
+
|
|
1773
|
+
input = read_dataframe(path, use_arrow=use_arrow)
|
|
1774
|
+
assert len(input) == 177
|
|
1775
|
+
|
|
1776
|
+
try:
|
|
1777
|
+
write_dataframe(input, mem_path, use_arrow=use_arrow)
|
|
1778
|
+
result = read_dataframe(mem_path, use_arrow=use_arrow)
|
|
1779
|
+
assert len(result) == 177
|
|
1780
|
+
finally:
|
|
1781
|
+
vsi_unlink(mem_path)
|
|
1518
1782
|
|
|
1519
1783
|
|
|
1520
1784
|
@pytest.mark.parametrize(
|
|
@@ -1529,7 +1793,7 @@ def test_write_read_null(tmp_path, use_arrow):
|
|
|
1529
1793
|
["2.5D MultiLineString", "MultiLineString Z"],
|
|
1530
1794
|
),
|
|
1531
1795
|
(
|
|
1532
|
-
"MultiPolygon Z (((0 0 0, 0 1 0, 1 1 0, 0 0 0)), ((1 1 1, 1 2 1, 2 2 1, 1 1 1)))",
|
|
1796
|
+
"MultiPolygon Z (((0 0 0, 0 1 0, 1 1 0, 0 0 0)), ((1 1 1, 1 2 1, 2 2 1, 1 1 1)))", # noqa: E501
|
|
1533
1797
|
["2.5D MultiPolygon", "MultiPolygon Z"],
|
|
1534
1798
|
),
|
|
1535
1799
|
(
|
|
@@ -1572,7 +1836,7 @@ def test_write_geometry_z_types(tmp_path, wkt, geom_types, use_arrow):
|
|
|
1572
1836
|
"MultiPolygon Z",
|
|
1573
1837
|
False,
|
|
1574
1838
|
[
|
|
1575
|
-
"MultiPolygon Z (((0 0 0, 0 1 0, 1 1 0, 0 0 0)), ((1 1 1, 1 2 1, 2 2 1, 1 1 1)))"
|
|
1839
|
+
"MultiPolygon Z (((0 0 0, 0 1 0, 1 1 0, 0 0 0)), ((1 1 1, 1 2 1, 2 2 1, 1 1 1)))" # noqa: E501
|
|
1576
1840
|
],
|
|
1577
1841
|
),
|
|
1578
1842
|
(
|
|
@@ -1642,54 +1906,84 @@ def test_write_geometry_z_types_auto(
|
|
|
1642
1906
|
|
|
1643
1907
|
|
|
1644
1908
|
@pytest.mark.parametrize(
|
|
1645
|
-
"on_invalid, message",
|
|
1909
|
+
"on_invalid, message, expected_wkt",
|
|
1646
1910
|
[
|
|
1647
1911
|
(
|
|
1648
1912
|
"warn",
|
|
1649
1913
|
"Invalid WKB: geometry is returned as None. IllegalArgumentException: "
|
|
1650
|
-
"
|
|
1914
|
+
"Points of LinearRing do not form a closed linestring",
|
|
1915
|
+
None,
|
|
1651
1916
|
),
|
|
1652
|
-
("raise", "
|
|
1653
|
-
("ignore", None),
|
|
1917
|
+
("raise", "Points of LinearRing do not form a closed linestring", None),
|
|
1918
|
+
("ignore", None, None),
|
|
1919
|
+
("fix", None, "POLYGON ((0 0, 0 1, 0 0))"),
|
|
1654
1920
|
],
|
|
1655
1921
|
)
|
|
1656
|
-
|
|
1922
|
+
@pytest.mark.filterwarnings("ignore:Non closed ring detected:RuntimeWarning")
|
|
1923
|
+
def test_read_invalid_poly_ring(tmp_path, use_arrow, on_invalid, message, expected_wkt):
|
|
1924
|
+
if on_invalid == "fix" and not SHAPELY_GE_21:
|
|
1925
|
+
pytest.skip("on_invalid=fix not available for Shapely < 2.1")
|
|
1926
|
+
|
|
1657
1927
|
if on_invalid == "raise":
|
|
1658
1928
|
handler = pytest.raises(shapely.errors.GEOSException, match=message)
|
|
1659
1929
|
elif on_invalid == "warn":
|
|
1660
1930
|
handler = pytest.warns(match=message)
|
|
1661
|
-
elif on_invalid
|
|
1931
|
+
elif on_invalid in ("fix", "ignore"):
|
|
1662
1932
|
handler = contextlib.nullcontext()
|
|
1663
1933
|
else:
|
|
1664
1934
|
raise ValueError(f"unknown value for on_invalid: {on_invalid}")
|
|
1665
1935
|
|
|
1936
|
+
# create a GeoJSON file with an invalid exterior ring
|
|
1937
|
+
invalid_geojson = """{
|
|
1938
|
+
"type": "FeatureCollection",
|
|
1939
|
+
"features": [
|
|
1940
|
+
{
|
|
1941
|
+
"type": "Feature",
|
|
1942
|
+
"properties": {},
|
|
1943
|
+
"geometry": {
|
|
1944
|
+
"type": "Polygon",
|
|
1945
|
+
"coordinates": [ [ [0, 0], [0, 1] ] ]
|
|
1946
|
+
}
|
|
1947
|
+
}
|
|
1948
|
+
]
|
|
1949
|
+
}"""
|
|
1950
|
+
|
|
1951
|
+
filename = tmp_path / "test.geojson"
|
|
1952
|
+
with open(filename, "w") as f:
|
|
1953
|
+
_ = f.write(invalid_geojson)
|
|
1954
|
+
|
|
1666
1955
|
with handler:
|
|
1667
1956
|
df = read_dataframe(
|
|
1668
|
-
|
|
1957
|
+
filename,
|
|
1669
1958
|
use_arrow=use_arrow,
|
|
1670
1959
|
on_invalid=on_invalid,
|
|
1671
1960
|
)
|
|
1672
|
-
|
|
1961
|
+
if expected_wkt is None:
|
|
1962
|
+
assert df.geometry.iloc[0] is None
|
|
1963
|
+
else:
|
|
1964
|
+
assert df.geometry.iloc[0].wkt == expected_wkt
|
|
1673
1965
|
|
|
1674
1966
|
|
|
1675
|
-
def test_read_multisurface(
|
|
1967
|
+
def test_read_multisurface(multisurface_file, use_arrow):
|
|
1676
1968
|
if use_arrow:
|
|
1969
|
+
# TODO: revisit once https://github.com/geopandas/pyogrio/issues/478
|
|
1970
|
+
# is resolved.
|
|
1971
|
+
pytest.skip("Shapely + GEOS 3.13 crashes in from_wkb for this case")
|
|
1972
|
+
|
|
1677
1973
|
with pytest.raises(shapely.errors.GEOSException):
|
|
1678
1974
|
# TODO(Arrow)
|
|
1679
1975
|
# shapely fails parsing the WKB
|
|
1680
|
-
read_dataframe(
|
|
1976
|
+
read_dataframe(multisurface_file, use_arrow=True)
|
|
1681
1977
|
else:
|
|
1682
|
-
df = read_dataframe(
|
|
1978
|
+
df = read_dataframe(multisurface_file)
|
|
1683
1979
|
|
|
1684
1980
|
# MultiSurface should be converted to MultiPolygon
|
|
1685
1981
|
assert df.geometry.type.tolist() == ["MultiPolygon"]
|
|
1686
1982
|
|
|
1687
1983
|
|
|
1688
|
-
def test_read_dataset_kwargs(
|
|
1689
|
-
filename = data_dir / "test_nested.geojson"
|
|
1690
|
-
|
|
1984
|
+
def test_read_dataset_kwargs(nested_geojson_file, use_arrow):
|
|
1691
1985
|
# by default, nested data are not flattened
|
|
1692
|
-
df = read_dataframe(
|
|
1986
|
+
df = read_dataframe(nested_geojson_file, use_arrow=use_arrow)
|
|
1693
1987
|
|
|
1694
1988
|
expected = gp.GeoDataFrame(
|
|
1695
1989
|
{
|
|
@@ -1699,10 +1993,16 @@ def test_read_dataset_kwargs(data_dir, use_arrow):
|
|
|
1699
1993
|
geometry=[shapely.Point(0, 0)],
|
|
1700
1994
|
crs="EPSG:4326",
|
|
1701
1995
|
)
|
|
1996
|
+
if GDAL_GE_311 and use_arrow:
|
|
1997
|
+
# GDAL 3.11 started to use json extension type, which is not yet handled
|
|
1998
|
+
# correctly in the arrow->pandas conversion (using object instead of str dtype)
|
|
1999
|
+
expected["intermediate_level"] = expected["intermediate_level"].astype(object)
|
|
1702
2000
|
|
|
1703
2001
|
assert_geodataframe_equal(df, expected)
|
|
1704
2002
|
|
|
1705
|
-
df = read_dataframe(
|
|
2003
|
+
df = read_dataframe(
|
|
2004
|
+
nested_geojson_file, use_arrow=use_arrow, FLATTEN_NESTED_ATTRIBUTES="YES"
|
|
2005
|
+
)
|
|
1706
2006
|
|
|
1707
2007
|
expected = gp.GeoDataFrame(
|
|
1708
2008
|
{
|
|
@@ -1742,7 +2042,7 @@ def test_write_nullable_dtypes(tmp_path, use_arrow):
|
|
|
1742
2042
|
expected["col2"] = expected["col2"].astype("float64")
|
|
1743
2043
|
expected["col3"] = expected["col3"].astype("float32")
|
|
1744
2044
|
expected["col4"] = expected["col4"].astype("float64")
|
|
1745
|
-
expected["col5"] = expected["col5"].astype(
|
|
2045
|
+
expected["col5"] = expected["col5"].astype("str")
|
|
1746
2046
|
expected.loc[1, "col5"] = None # pandas converts to pd.NA on line above
|
|
1747
2047
|
assert_geodataframe_equal(output_gdf, expected)
|
|
1748
2048
|
|
|
@@ -1904,6 +2204,9 @@ def test_write_memory(naturalearth_lowres, driver):
|
|
|
1904
2204
|
check_dtype=not is_json,
|
|
1905
2205
|
)
|
|
1906
2206
|
|
|
2207
|
+
# Check temp file was cleaned up. Filter, as gdal keeps cache files in /vsimem/.
|
|
2208
|
+
assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == []
|
|
2209
|
+
|
|
1907
2210
|
|
|
1908
2211
|
def test_write_memory_driver_required(naturalearth_lowres):
|
|
1909
2212
|
df = read_dataframe(naturalearth_lowres)
|
|
@@ -1916,6 +2219,9 @@ def test_write_memory_driver_required(naturalearth_lowres):
|
|
|
1916
2219
|
):
|
|
1917
2220
|
write_dataframe(df.head(1), buffer, driver=None, layer="test")
|
|
1918
2221
|
|
|
2222
|
+
# Check temp file was cleaned up. Filter, as gdal keeps cache files in /vsimem/.
|
|
2223
|
+
assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == []
|
|
2224
|
+
|
|
1919
2225
|
|
|
1920
2226
|
@pytest.mark.parametrize("driver", ["ESRI Shapefile", "OpenFileGDB"])
|
|
1921
2227
|
def test_write_memory_unsupported_driver(naturalearth_lowres, driver):
|
|
@@ -1931,6 +2237,9 @@ def test_write_memory_unsupported_driver(naturalearth_lowres, driver):
|
|
|
1931
2237
|
):
|
|
1932
2238
|
write_dataframe(df, buffer, driver=driver, layer="test")
|
|
1933
2239
|
|
|
2240
|
+
# Check temp file was cleaned up. Filter, as gdal keeps cache files in /vsimem/.
|
|
2241
|
+
assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == []
|
|
2242
|
+
|
|
1934
2243
|
|
|
1935
2244
|
@pytest.mark.parametrize("driver", ["GeoJSON", "GPKG"])
|
|
1936
2245
|
def test_write_memory_append_unsupported(naturalearth_lowres, driver):
|
|
@@ -1943,6 +2252,9 @@ def test_write_memory_append_unsupported(naturalearth_lowres, driver):
|
|
|
1943
2252
|
):
|
|
1944
2253
|
write_dataframe(df.head(1), buffer, driver=driver, layer="test", append=True)
|
|
1945
2254
|
|
|
2255
|
+
# Check temp file was cleaned up. Filter, as gdal keeps cache files in /vsimem/.
|
|
2256
|
+
assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == []
|
|
2257
|
+
|
|
1946
2258
|
|
|
1947
2259
|
def test_write_memory_existing_unsupported(naturalearth_lowres):
|
|
1948
2260
|
df = read_dataframe(naturalearth_lowres)
|
|
@@ -1954,6 +2266,33 @@ def test_write_memory_existing_unsupported(naturalearth_lowres):
|
|
|
1954
2266
|
):
|
|
1955
2267
|
write_dataframe(df.head(1), buffer, driver="GeoJSON", layer="test")
|
|
1956
2268
|
|
|
2269
|
+
# Check temp file was cleaned up. Filter, as gdal keeps cache files in /vsimem/.
|
|
2270
|
+
assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == []
|
|
2271
|
+
|
|
2272
|
+
|
|
2273
|
+
def test_write_open_file_handle(tmp_path, naturalearth_lowres):
|
|
2274
|
+
"""Verify that writing to an open file handle is not currently supported"""
|
|
2275
|
+
|
|
2276
|
+
df = read_dataframe(naturalearth_lowres)
|
|
2277
|
+
|
|
2278
|
+
# verify it fails for regular file handle
|
|
2279
|
+
with pytest.raises(
|
|
2280
|
+
NotImplementedError, match="writing to an open file handle is not yet supported"
|
|
2281
|
+
):
|
|
2282
|
+
with open(tmp_path / "test.geojson", "wb") as f:
|
|
2283
|
+
write_dataframe(df.head(1), f)
|
|
2284
|
+
|
|
2285
|
+
# verify it fails for ZipFile
|
|
2286
|
+
with pytest.raises(
|
|
2287
|
+
NotImplementedError, match="writing to an open file handle is not yet supported"
|
|
2288
|
+
):
|
|
2289
|
+
with ZipFile(tmp_path / "test.geojson.zip", "w") as z:
|
|
2290
|
+
with z.open("test.geojson", "w") as f:
|
|
2291
|
+
write_dataframe(df.head(1), f)
|
|
2292
|
+
|
|
2293
|
+
# Check temp file was cleaned up. Filter, as gdal keeps cache files in /vsimem/.
|
|
2294
|
+
assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == []
|
|
2295
|
+
|
|
1957
2296
|
|
|
1958
2297
|
@pytest.mark.parametrize("ext", ["gpkg", "geojson"])
|
|
1959
2298
|
def test_non_utf8_encoding_io(tmp_path, ext, encoded_text):
|
|
@@ -2026,7 +2365,10 @@ def test_non_utf8_encoding_io_shapefile(tmp_path, encoded_text, use_arrow):
|
|
|
2026
2365
|
|
|
2027
2366
|
if use_arrow:
|
|
2028
2367
|
# pyarrow cannot decode column name with incorrect encoding
|
|
2029
|
-
with pytest.raises(
|
|
2368
|
+
with pytest.raises(
|
|
2369
|
+
DataSourceError,
|
|
2370
|
+
match="The file being read is not encoded in UTF-8; please use_arrow=False",
|
|
2371
|
+
):
|
|
2030
2372
|
read_dataframe(output_path, use_arrow=True)
|
|
2031
2373
|
else:
|
|
2032
2374
|
bad = read_dataframe(output_path, use_arrow=False)
|
|
@@ -2045,7 +2387,8 @@ def test_non_utf8_encoding_io_shapefile(tmp_path, encoded_text, use_arrow):
|
|
|
2045
2387
|
|
|
2046
2388
|
|
|
2047
2389
|
def test_encoding_read_option_collision_shapefile(naturalearth_lowres, use_arrow):
|
|
2048
|
-
"""Providing both encoding parameter and ENCODING open option
|
|
2390
|
+
"""Providing both encoding parameter and ENCODING open option
|
|
2391
|
+
(even if blank) is not allowed."""
|
|
2049
2392
|
|
|
2050
2393
|
with pytest.raises(
|
|
2051
2394
|
ValueError, match='cannot provide both encoding parameter and "ENCODING" option'
|
|
@@ -2056,7 +2399,8 @@ def test_encoding_read_option_collision_shapefile(naturalearth_lowres, use_arrow
|
|
|
2056
2399
|
|
|
2057
2400
|
|
|
2058
2401
|
def test_encoding_write_layer_option_collision_shapefile(tmp_path, encoded_text):
|
|
2059
|
-
"""Providing both encoding parameter and ENCODING layer creation option
|
|
2402
|
+
"""Providing both encoding parameter and ENCODING layer creation option
|
|
2403
|
+
(even if blank) is not allowed."""
|
|
2060
2404
|
encoding, text = encoded_text
|
|
2061
2405
|
|
|
2062
2406
|
output_path = tmp_path / "test.shp"
|
|
@@ -2064,7 +2408,10 @@ def test_encoding_write_layer_option_collision_shapefile(tmp_path, encoded_text)
|
|
|
2064
2408
|
|
|
2065
2409
|
with pytest.raises(
|
|
2066
2410
|
ValueError,
|
|
2067
|
-
match=
|
|
2411
|
+
match=(
|
|
2412
|
+
'cannot provide both encoding parameter and "ENCODING" layer creation '
|
|
2413
|
+
"option"
|
|
2414
|
+
),
|
|
2068
2415
|
):
|
|
2069
2416
|
write_dataframe(
|
|
2070
2417
|
df, output_path, encoding=encoding, layer_options={"ENCODING": ""}
|
|
@@ -2102,7 +2449,8 @@ def test_non_utf8_encoding_shapefile_sql(tmp_path, use_arrow):
|
|
|
2102
2449
|
|
|
2103
2450
|
@pytest.mark.requires_arrow_write_api
|
|
2104
2451
|
def test_write_kml_file_coordinate_order(tmp_path, use_arrow):
|
|
2105
|
-
# confirm KML coordinates are written in lon, lat order even if CRS axis
|
|
2452
|
+
# confirm KML coordinates are written in lon, lat order even if CRS axis
|
|
2453
|
+
# specifies otherwise
|
|
2106
2454
|
points = [Point(10, 20), Point(30, 40), Point(50, 60)]
|
|
2107
2455
|
gdf = gp.GeoDataFrame(geometry=points, crs="EPSG:4326")
|
|
2108
2456
|
output_path = tmp_path / "test.kml"
|
|
@@ -2117,7 +2465,7 @@ def test_write_kml_file_coordinate_order(tmp_path, use_arrow):
|
|
|
2117
2465
|
if "LIBKML" in list_drivers():
|
|
2118
2466
|
# test appending to the existing file only if LIBKML is available
|
|
2119
2467
|
# as it appears to fall back on LIBKML driver when appending.
|
|
2120
|
-
points_append = [Point(
|
|
2468
|
+
points_append = [Point(7, 8), Point(9, 10), Point(11, 12)]
|
|
2121
2469
|
gdf_append = gp.GeoDataFrame(geometry=points_append, crs="EPSG:4326")
|
|
2122
2470
|
|
|
2123
2471
|
write_dataframe(
|