pyogrio 0.7.2__cp38-cp38-win_amd64.whl → 0.9.0__cp38-cp38-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyogrio might be problematic. Click here for more details.
- pyogrio/__init__.py +12 -7
- pyogrio/_compat.py +6 -1
- pyogrio/_err.c +855 -321
- pyogrio/_err.cp38-win_amd64.pyd +0 -0
- pyogrio/_err.pyx +7 -3
- pyogrio/_geometry.c +134 -75
- pyogrio/_geometry.cp38-win_amd64.pyd +0 -0
- pyogrio/_io.c +28947 -23126
- pyogrio/_io.cp38-win_amd64.pyd +0 -0
- pyogrio/_io.pyx +904 -242
- pyogrio/_ogr.c +1317 -1640
- pyogrio/_ogr.cp38-win_amd64.pyd +0 -0
- pyogrio/_ogr.pxd +69 -13
- pyogrio/_ogr.pyx +8 -24
- pyogrio/_version.py +3 -3
- pyogrio/_vsi.c +6815 -0
- pyogrio/_vsi.cp38-win_amd64.pyd +0 -0
- pyogrio/_vsi.pxd +4 -0
- pyogrio/_vsi.pyx +140 -0
- pyogrio/core.py +43 -44
- pyogrio/gdal_data/GDAL-targets-release.cmake +1 -1
- pyogrio/gdal_data/GDAL-targets.cmake +10 -6
- pyogrio/gdal_data/GDALConfigVersion.cmake +3 -3
- pyogrio/gdal_data/gdalinfo_output.schema.json +2 -0
- pyogrio/gdal_data/gdalvrt.xsd +163 -0
- pyogrio/gdal_data/ogrinfo_output.schema.json +12 -1
- pyogrio/gdal_data/vcpkg.spdx.json +23 -23
- pyogrio/gdal_data/vcpkg_abi_info.txt +29 -28
- pyogrio/geopandas.py +140 -34
- pyogrio/proj_data/ITRF2008 +2 -2
- pyogrio/proj_data/proj-config-version.cmake +2 -2
- pyogrio/proj_data/proj-config.cmake +2 -1
- pyogrio/proj_data/proj-targets-release.cmake +0 -1
- pyogrio/proj_data/proj-targets.cmake +10 -6
- pyogrio/proj_data/proj.db +0 -0
- pyogrio/proj_data/proj4-targets-release.cmake +0 -1
- pyogrio/proj_data/proj4-targets.cmake +10 -6
- pyogrio/proj_data/vcpkg.spdx.json +21 -43
- pyogrio/proj_data/vcpkg_abi_info.txt +16 -17
- pyogrio/raw.py +438 -116
- pyogrio/tests/conftest.py +75 -6
- pyogrio/tests/fixtures/poly_not_enough_points.shp.zip +0 -0
- pyogrio/tests/test_arrow.py +841 -7
- pyogrio/tests/test_core.py +99 -7
- pyogrio/tests/test_geopandas_io.py +827 -121
- pyogrio/tests/test_path.py +23 -3
- pyogrio/tests/test_raw_io.py +276 -50
- pyogrio/util.py +39 -19
- pyogrio-0.9.0.dist-info/DELVEWHEEL +2 -0
- {pyogrio-0.7.2.dist-info → pyogrio-0.9.0.dist-info}/METADATA +2 -2
- {pyogrio-0.7.2.dist-info → pyogrio-0.9.0.dist-info}/RECORD +74 -69
- {pyogrio-0.7.2.dist-info → pyogrio-0.9.0.dist-info}/WHEEL +1 -1
- pyogrio.libs/.load-order-pyogrio-0.9.0 +19 -0
- pyogrio.libs/Lerc-5e4d8cbeeabca06f95e2270792304dc3.dll +0 -0
- pyogrio.libs/{gdal-c3b1d8f66682071d0cd26d86e4182013.dll → gdal-b434963605a006e01c486c0df6dea4e0.dll} +0 -0
- pyogrio.libs/geos-f0622d0794b81c937a851b2e6fa9b712.dll +0 -0
- pyogrio.libs/geos_c-0e16bf70612fc3301d077b9d863a3fdb.dll +0 -0
- pyogrio.libs/{geotiff-e43cdab688866b59f8800cfcde836d16.dll → geotiff-772e7c705fb15ddf91b432adb4eb1f6c.dll} +0 -0
- pyogrio.libs/iconv-2-8fcc23ddc6f096c45871011b6e008b44.dll +0 -0
- pyogrio.libs/{jpeg62-567ab743ac805dfb57fe3867ba5788a4.dll → jpeg62-2f9b7af22d78338e8f0be0058503dc35.dll} +0 -0
- pyogrio.libs/json-c-e52a077545e4057de42beb4948289b41.dll +0 -0
- pyogrio.libs/libcurl-bc81cd8afe15b10c0821b181b6af8bd0.dll +0 -0
- pyogrio.libs/libexpat-fbe03ca8917dfda776562d4338b289b8.dll +0 -0
- pyogrio.libs/{liblzma-de7f4770d4e3715acd031ca93883f10c.dll → liblzma-6b36f24d54d3dd45f274a2aebef81085.dll} +0 -0
- pyogrio.libs/libpng16-13928571ad910705eae8d7dd8eef8b11.dll +0 -0
- pyogrio.libs/{msvcp140-83b6a1a2fa8b1735a358b2fe13cabe4e.dll → msvcp140-46db46e967c8db2cb7a20fc75872a57e.dll} +0 -0
- pyogrio.libs/proj-8a30239ef2dfc3b9dd2bb48e8abb330f.dll +0 -0
- pyogrio.libs/{qhull_r-99ae8a526357acc44b162cb4df2c3bb6.dll → qhull_r-c45abde5d0c92faf723cc2942138af77.dll} +0 -0
- pyogrio.libs/sqlite3-df30c3cf230727e23c43c40126a530f7.dll +0 -0
- pyogrio.libs/{tiff-7c2d4b204ec2db46c81f6a597895c2f7.dll → tiff-43630f30487a9015213475ae86ed3fa3.dll} +0 -0
- pyogrio.libs/vcruntime140_1-8f7e93381c4b3d1e411993c0bae01646.dll +0 -0
- pyogrio.libs/{zlib1-824de9299616f0908aeeb9441a084848.dll → zlib1-e1272810861a13dd8d6cff3beac47f17.dll} +0 -0
- pyogrio/tests/win32.py +0 -86
- pyogrio-0.7.2.dist-info/DELVEWHEEL +0 -2
- pyogrio.libs/.load-order-pyogrio-0.7.2 +0 -18
- pyogrio.libs/Lerc-d5afc4101deffe7de21241ccd4d562f6.dll +0 -0
- pyogrio.libs/geos-1c764a1384537a0ad2995e83d23e8642.dll +0 -0
- pyogrio.libs/geos_c-0d7dfdcee49efa8df585e2fb993157aa.dll +0 -0
- pyogrio.libs/json-c-36c91e30c4410d41c22b2010c31183e3.dll +0 -0
- pyogrio.libs/libcurl-ebcc8c18195071a90e59f818902e10c6.dll +0 -0
- pyogrio.libs/libexpat-345379c9c11632130d8c383cbacde1a6.dll +0 -0
- pyogrio.libs/libpng16-2c30e6846653c47ef2ff9d7dec3338ba.dll +0 -0
- pyogrio.libs/proj-98758c96a6cb682b5cec7e8dc5e29a50.dll +0 -0
- pyogrio.libs/sqlite3-327ed7b38bfd91fb4a17544960e055e9.dll +0 -0
- pyogrio.libs/vcruntime140_1-d1a1506707e0c0a26950a60c5f97ad99.dll +0 -0
- {pyogrio-0.7.2.dist-info → pyogrio-0.9.0.dist-info}/LICENSE +0 -0
- {pyogrio-0.7.2.dist-info → pyogrio-0.9.0.dist-info}/top_level.txt +0 -0
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
import contextlib
|
|
2
2
|
from datetime import datetime
|
|
3
|
-
import
|
|
3
|
+
from io import BytesIO
|
|
4
|
+
import locale
|
|
5
|
+
|
|
4
6
|
import numpy as np
|
|
5
7
|
import pytest
|
|
6
8
|
|
|
7
|
-
from pyogrio import list_layers, read_info, __gdal_version__
|
|
9
|
+
from pyogrio import list_layers, list_drivers, read_info, __gdal_version__
|
|
8
10
|
from pyogrio.errors import DataLayerError, DataSourceError, FeatureError, GeometryError
|
|
9
11
|
from pyogrio.geopandas import read_dataframe, write_dataframe, PANDAS_GE_20
|
|
10
12
|
from pyogrio.raw import (
|
|
@@ -14,10 +16,11 @@ from pyogrio.raw import (
|
|
|
14
16
|
from pyogrio.tests.conftest import (
|
|
15
17
|
ALL_EXTS,
|
|
16
18
|
DRIVERS,
|
|
17
|
-
|
|
19
|
+
requires_pyarrow_api,
|
|
20
|
+
requires_arrow_write_api,
|
|
18
21
|
requires_gdal_geos,
|
|
19
22
|
)
|
|
20
|
-
from pyogrio._compat import PANDAS_GE_15
|
|
23
|
+
from pyogrio._compat import PANDAS_GE_15, HAS_ARROW_WRITE_API
|
|
21
24
|
|
|
22
25
|
try:
|
|
23
26
|
import pandas as pd
|
|
@@ -45,13 +48,30 @@ pytest.importorskip("geopandas")
|
|
|
45
48
|
scope="session",
|
|
46
49
|
params=[
|
|
47
50
|
False,
|
|
48
|
-
pytest.param(True, marks=
|
|
51
|
+
pytest.param(True, marks=requires_pyarrow_api),
|
|
49
52
|
],
|
|
50
53
|
)
|
|
51
54
|
def use_arrow(request):
|
|
52
55
|
return request.param
|
|
53
56
|
|
|
54
57
|
|
|
58
|
+
@pytest.fixture(autouse=True)
|
|
59
|
+
def skip_if_no_arrow_write_api(request):
|
|
60
|
+
# automatically skip tests with use_arrow=True and that require Arrow write
|
|
61
|
+
# API (marked with `@pytest.mark.requires_arrow_write_api`) if it is not available
|
|
62
|
+
use_arrow = (
|
|
63
|
+
request.getfixturevalue("use_arrow")
|
|
64
|
+
if "use_arrow" in request.fixturenames
|
|
65
|
+
else False
|
|
66
|
+
)
|
|
67
|
+
if (
|
|
68
|
+
use_arrow
|
|
69
|
+
and not HAS_ARROW_WRITE_API
|
|
70
|
+
and request.node.get_closest_marker("requires_arrow_write_api")
|
|
71
|
+
):
|
|
72
|
+
pytest.skip("GDAL>=3.8 required for Arrow write API")
|
|
73
|
+
|
|
74
|
+
|
|
55
75
|
def spatialite_available(path):
|
|
56
76
|
try:
|
|
57
77
|
_ = read_dataframe(
|
|
@@ -62,6 +82,45 @@ def spatialite_available(path):
|
|
|
62
82
|
return False
|
|
63
83
|
|
|
64
84
|
|
|
85
|
+
@pytest.mark.parametrize("encoding", ["utf-8", "cp1252", None])
|
|
86
|
+
def test_read_csv_encoding(tmp_path, encoding):
|
|
87
|
+
# Write csv test file. Depending on the os this will be written in a different
|
|
88
|
+
# encoding: for linux and macos this is utf-8, for windows it is cp1252.
|
|
89
|
+
csv_path = tmp_path / "test.csv"
|
|
90
|
+
with open(csv_path, "w", encoding=encoding) as csv:
|
|
91
|
+
csv.write("näme,city\n")
|
|
92
|
+
csv.write("Wilhelm Röntgen,Zürich\n")
|
|
93
|
+
|
|
94
|
+
# Read csv. The data should be read with the same default encoding as the csv file
|
|
95
|
+
# was written in, but should have been converted to utf-8 in the dataframe returned.
|
|
96
|
+
# Hence, the asserts below, with strings in utf-8, be OK.
|
|
97
|
+
df = read_dataframe(csv_path, encoding=encoding)
|
|
98
|
+
|
|
99
|
+
assert len(df) == 1
|
|
100
|
+
assert df.columns.tolist() == ["näme", "city"]
|
|
101
|
+
assert df.city.tolist() == ["Zürich"]
|
|
102
|
+
assert df.näme.tolist() == ["Wilhelm Röntgen"]
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
@pytest.mark.skipif(
|
|
106
|
+
locale.getpreferredencoding().upper() == "UTF-8",
|
|
107
|
+
reason="test requires non-UTF-8 default platform",
|
|
108
|
+
)
|
|
109
|
+
def test_read_csv_platform_encoding(tmp_path):
|
|
110
|
+
"""verify that read defaults to platform encoding; only works on Windows (CP1252)"""
|
|
111
|
+
csv_path = tmp_path / "test.csv"
|
|
112
|
+
with open(csv_path, "w", encoding=locale.getpreferredencoding()) as csv:
|
|
113
|
+
csv.write("näme,city\n")
|
|
114
|
+
csv.write("Wilhelm Röntgen,Zürich\n")
|
|
115
|
+
|
|
116
|
+
df = read_dataframe(csv_path)
|
|
117
|
+
|
|
118
|
+
assert len(df) == 1
|
|
119
|
+
assert df.columns.tolist() == ["näme", "city"]
|
|
120
|
+
assert df.city.tolist() == ["Zürich"]
|
|
121
|
+
assert df.näme.tolist() == ["Wilhelm Röntgen"]
|
|
122
|
+
|
|
123
|
+
|
|
65
124
|
def test_read_dataframe(naturalearth_lowres_all_ext):
|
|
66
125
|
df = read_dataframe(naturalearth_lowres_all_ext)
|
|
67
126
|
|
|
@@ -77,8 +136,8 @@ def test_read_dataframe(naturalearth_lowres_all_ext):
|
|
|
77
136
|
]
|
|
78
137
|
|
|
79
138
|
|
|
80
|
-
def test_read_dataframe_vsi(naturalearth_lowres_vsi):
|
|
81
|
-
df = read_dataframe(naturalearth_lowres_vsi[1])
|
|
139
|
+
def test_read_dataframe_vsi(naturalearth_lowres_vsi, use_arrow):
|
|
140
|
+
df = read_dataframe(naturalearth_lowres_vsi[1], use_arrow=use_arrow)
|
|
82
141
|
assert len(df) == 177
|
|
83
142
|
|
|
84
143
|
|
|
@@ -154,6 +213,7 @@ def test_read_force_2d(test_fgdb_vsi, use_arrow):
|
|
|
154
213
|
|
|
155
214
|
|
|
156
215
|
@pytest.mark.filterwarnings("ignore: Measured")
|
|
216
|
+
@pytest.mark.filterwarnings("ignore: More than one layer found in")
|
|
157
217
|
def test_read_layer(test_fgdb_vsi, use_arrow):
|
|
158
218
|
layers = list_layers(test_fgdb_vsi)
|
|
159
219
|
kwargs = {"use_arrow": use_arrow, "read_geometry": False, "max_features": 1}
|
|
@@ -186,8 +246,13 @@ def test_read_datetime(test_fgdb_vsi, use_arrow):
|
|
|
186
246
|
assert df.SURVEY_DAT.dtype.name == "datetime64[ns]"
|
|
187
247
|
|
|
188
248
|
|
|
189
|
-
|
|
249
|
+
@pytest.mark.filterwarnings("ignore: Non-conformant content for record 1 in column ")
|
|
250
|
+
@pytest.mark.requires_arrow_write_api
|
|
251
|
+
def test_read_datetime_tz(test_datetime_tz, tmp_path, use_arrow):
|
|
190
252
|
df = read_dataframe(test_datetime_tz)
|
|
253
|
+
# Make the index non-consecutive to test this case as well. Added for issue
|
|
254
|
+
# https://github.com/geopandas/pyogrio/issues/324
|
|
255
|
+
df = df.set_index(np.array([0, 2]))
|
|
191
256
|
raw_expected = ["2020-01-01T09:00:00.123-05:00", "2020-01-01T10:00:00-05:00"]
|
|
192
257
|
|
|
193
258
|
if PANDAS_GE_20:
|
|
@@ -195,15 +260,22 @@ def test_read_datetime_tz(test_datetime_tz, tmp_path):
|
|
|
195
260
|
else:
|
|
196
261
|
expected = pd.to_datetime(raw_expected)
|
|
197
262
|
expected = pd.Series(expected, name="datetime_col")
|
|
198
|
-
assert_series_equal(df.datetime_col, expected)
|
|
263
|
+
assert_series_equal(df.datetime_col, expected, check_index=False)
|
|
199
264
|
# test write and read round trips
|
|
200
265
|
fpath = tmp_path / "test.gpkg"
|
|
201
|
-
write_dataframe(df, fpath)
|
|
202
|
-
df_read = read_dataframe(fpath)
|
|
266
|
+
write_dataframe(df, fpath, use_arrow=use_arrow)
|
|
267
|
+
df_read = read_dataframe(fpath, use_arrow=use_arrow)
|
|
268
|
+
if use_arrow:
|
|
269
|
+
# with Arrow, the datetimes are always read as UTC
|
|
270
|
+
expected = expected.dt.tz_convert("UTC")
|
|
203
271
|
assert_series_equal(df_read.datetime_col, expected)
|
|
204
272
|
|
|
205
273
|
|
|
206
|
-
|
|
274
|
+
@pytest.mark.filterwarnings(
|
|
275
|
+
"ignore: Non-conformant content for record 1 in column dates"
|
|
276
|
+
)
|
|
277
|
+
@pytest.mark.requires_arrow_write_api
|
|
278
|
+
def test_write_datetime_mixed_offset(tmp_path, use_arrow):
|
|
207
279
|
# Australian Summer Time AEDT (GMT+11), Standard Time AEST (GMT+10)
|
|
208
280
|
dates = ["2023-01-01 11:00:01.111", "2023-06-01 10:00:01.111"]
|
|
209
281
|
naive_col = pd.Series(pd.to_datetime(dates), name="dates")
|
|
@@ -217,14 +289,18 @@ def test_write_datetime_mixed_offset(tmp_path):
|
|
|
217
289
|
crs="EPSG:4326",
|
|
218
290
|
)
|
|
219
291
|
fpath = tmp_path / "test.gpkg"
|
|
220
|
-
write_dataframe(df, fpath)
|
|
221
|
-
result = read_dataframe(fpath)
|
|
292
|
+
write_dataframe(df, fpath, use_arrow=use_arrow)
|
|
293
|
+
result = read_dataframe(fpath, use_arrow=use_arrow)
|
|
222
294
|
# GDAL tz only encodes offsets, not timezones
|
|
223
295
|
# check multiple offsets are read as utc datetime instead of string values
|
|
224
296
|
assert_series_equal(result["dates"], utc_col)
|
|
225
297
|
|
|
226
298
|
|
|
227
|
-
|
|
299
|
+
@pytest.mark.filterwarnings(
|
|
300
|
+
"ignore: Non-conformant content for record 1 in column dates"
|
|
301
|
+
)
|
|
302
|
+
@pytest.mark.requires_arrow_write_api
|
|
303
|
+
def test_read_write_datetime_tz_with_nulls(tmp_path, use_arrow):
|
|
228
304
|
dates_raw = ["2020-01-01T09:00:00.123-05:00", "2020-01-01T10:00:00-05:00", pd.NaT]
|
|
229
305
|
if PANDAS_GE_20:
|
|
230
306
|
dates = pd.to_datetime(dates_raw, format="ISO8601").as_unit("ms")
|
|
@@ -235,13 +311,18 @@ def test_read_write_datetime_tz_with_nulls(tmp_path):
|
|
|
235
311
|
crs="EPSG:4326",
|
|
236
312
|
)
|
|
237
313
|
fpath = tmp_path / "test.gpkg"
|
|
238
|
-
write_dataframe(df, fpath)
|
|
239
|
-
result = read_dataframe(fpath)
|
|
314
|
+
write_dataframe(df, fpath, use_arrow=use_arrow)
|
|
315
|
+
result = read_dataframe(fpath, use_arrow=use_arrow)
|
|
316
|
+
if use_arrow:
|
|
317
|
+
# with Arrow, the datetimes are always read as UTC
|
|
318
|
+
df["dates"] = df["dates"].dt.tz_convert("UTC")
|
|
240
319
|
assert_geodataframe_equal(df, result)
|
|
241
320
|
|
|
242
321
|
|
|
243
322
|
def test_read_null_values(test_fgdb_vsi, use_arrow):
|
|
244
|
-
df = read_dataframe(
|
|
323
|
+
df = read_dataframe(
|
|
324
|
+
test_fgdb_vsi, layer="basetable_2", use_arrow=use_arrow, read_geometry=False
|
|
325
|
+
)
|
|
245
326
|
|
|
246
327
|
# make sure that Null values are preserved
|
|
247
328
|
assert df.SEGMENT_NAME.isnull().max()
|
|
@@ -331,6 +412,21 @@ def test_read_where_invalid(request, naturalearth_lowres_all_ext, use_arrow):
|
|
|
331
412
|
)
|
|
332
413
|
|
|
333
414
|
|
|
415
|
+
def test_read_where_ignored_field(naturalearth_lowres, use_arrow):
|
|
416
|
+
# column included in where is not also included in list of columns, which means
|
|
417
|
+
# GDAL will return no features
|
|
418
|
+
# NOTE: this behavior is inconsistent across drivers so only shapefiles are
|
|
419
|
+
# tested for this
|
|
420
|
+
df = read_dataframe(
|
|
421
|
+
naturalearth_lowres,
|
|
422
|
+
where=""" "iso_a3" = 'CAN' """,
|
|
423
|
+
columns=["name"],
|
|
424
|
+
use_arrow=use_arrow,
|
|
425
|
+
)
|
|
426
|
+
|
|
427
|
+
assert len(df) == 0
|
|
428
|
+
|
|
429
|
+
|
|
334
430
|
@pytest.mark.parametrize("bbox", [(1,), (1, 2), (1, 2, 3)])
|
|
335
431
|
def test_read_bbox_invalid(naturalearth_lowres_all_ext, bbox, use_arrow):
|
|
336
432
|
with pytest.raises(ValueError, match="Invalid bbox"):
|
|
@@ -349,7 +445,7 @@ def test_read_bbox(naturalearth_lowres_all_ext, use_arrow, bbox, expected):
|
|
|
349
445
|
if (
|
|
350
446
|
use_arrow
|
|
351
447
|
and __gdal_version__ < (3, 8, 0)
|
|
352
|
-
and
|
|
448
|
+
and naturalearth_lowres_all_ext.suffix == ".gpkg"
|
|
353
449
|
):
|
|
354
450
|
pytest.xfail(reason="GDAL bug: https://github.com/OSGeo/gdal/issues/8347")
|
|
355
451
|
|
|
@@ -438,7 +534,7 @@ def test_read_mask(
|
|
|
438
534
|
if (
|
|
439
535
|
use_arrow
|
|
440
536
|
and __gdal_version__ < (3, 8, 0)
|
|
441
|
-
and
|
|
537
|
+
and naturalearth_lowres_all_ext.suffix == ".gpkg"
|
|
442
538
|
):
|
|
443
539
|
pytest.xfail(reason="GDAL bug: https://github.com/OSGeo/gdal/issues/8347")
|
|
444
540
|
|
|
@@ -470,14 +566,45 @@ def test_read_mask_where(naturalearth_lowres_all_ext, use_arrow):
|
|
|
470
566
|
assert np.array_equal(df.iso_a3, ["CAN"])
|
|
471
567
|
|
|
472
568
|
|
|
473
|
-
|
|
569
|
+
@pytest.mark.parametrize("fids", [[1, 5, 10], np.array([1, 5, 10], dtype=np.int64)])
|
|
570
|
+
def test_read_fids(naturalearth_lowres_all_ext, fids, use_arrow):
|
|
474
571
|
# ensure keyword is properly passed through
|
|
475
|
-
|
|
476
|
-
|
|
572
|
+
df = read_dataframe(
|
|
573
|
+
naturalearth_lowres_all_ext, fids=fids, fid_as_index=True, use_arrow=use_arrow
|
|
574
|
+
)
|
|
477
575
|
assert len(df) == 3
|
|
478
576
|
assert np.array_equal(fids, df.index.values)
|
|
479
577
|
|
|
480
578
|
|
|
579
|
+
@requires_pyarrow_api
|
|
580
|
+
def test_read_fids_arrow_max_exception(naturalearth_lowres):
|
|
581
|
+
# Maximum number at time of writing is 4997 for "OGRSQL". For e.g. for SQLite based
|
|
582
|
+
# formats like Geopackage, there is no limit.
|
|
583
|
+
nb_fids = 4998
|
|
584
|
+
fids = range(nb_fids)
|
|
585
|
+
with pytest.raises(ValueError, match=f"error applying filter for {nb_fids} fids"):
|
|
586
|
+
_ = read_dataframe(naturalearth_lowres, fids=fids, use_arrow=True)
|
|
587
|
+
|
|
588
|
+
|
|
589
|
+
@requires_pyarrow_api
|
|
590
|
+
@pytest.mark.skipif(
|
|
591
|
+
__gdal_version__ >= (3, 8, 0), reason="GDAL >= 3.8.0 does not need to warn"
|
|
592
|
+
)
|
|
593
|
+
def test_read_fids_arrow_warning_old_gdal(naturalearth_lowres_all_ext):
|
|
594
|
+
# A warning should be given for old GDAL versions, except for some file formats.
|
|
595
|
+
if naturalearth_lowres_all_ext.suffix not in [".gpkg", ".geojson"]:
|
|
596
|
+
handler = pytest.warns(
|
|
597
|
+
UserWarning,
|
|
598
|
+
match="Using 'fids' and 'use_arrow=True' with GDAL < 3.8 can be slow",
|
|
599
|
+
)
|
|
600
|
+
else:
|
|
601
|
+
handler = contextlib.nullcontext()
|
|
602
|
+
|
|
603
|
+
with handler:
|
|
604
|
+
df = read_dataframe(naturalearth_lowres_all_ext, fids=[22], use_arrow=True)
|
|
605
|
+
assert len(df) == 1
|
|
606
|
+
|
|
607
|
+
|
|
481
608
|
def test_read_fids_force_2d(test_fgdb_vsi):
|
|
482
609
|
with pytest.warns(
|
|
483
610
|
UserWarning, match=r"Measured \(M\) geometry types are not supported"
|
|
@@ -573,13 +700,17 @@ def test_read_sql(naturalearth_lowres_all_ext, use_arrow):
|
|
|
573
700
|
# The geometry column cannot be specified when using the
|
|
574
701
|
# default OGRSQL dialect but is returned nonetheless, so 4 columns.
|
|
575
702
|
sql = "SELECT iso_a3 AS iso_a3_renamed, name, pop_est FROM naturalearth_lowres"
|
|
576
|
-
df = read_dataframe(
|
|
703
|
+
df = read_dataframe(
|
|
704
|
+
naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL", use_arrow=use_arrow
|
|
705
|
+
)
|
|
577
706
|
assert len(df.columns) == 4
|
|
578
707
|
assert len(df) == 177
|
|
579
708
|
|
|
580
709
|
# Should return single row
|
|
581
710
|
sql = "SELECT * FROM naturalearth_lowres WHERE iso_a3 = 'CAN'"
|
|
582
|
-
df = read_dataframe(
|
|
711
|
+
df = read_dataframe(
|
|
712
|
+
naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL", use_arrow=use_arrow
|
|
713
|
+
)
|
|
583
714
|
assert len(df) == 1
|
|
584
715
|
assert len(df.columns) == 6
|
|
585
716
|
assert df.iloc[0].iso_a3 == "CAN"
|
|
@@ -587,7 +718,9 @@ def test_read_sql(naturalearth_lowres_all_ext, use_arrow):
|
|
|
587
718
|
sql = """SELECT *
|
|
588
719
|
FROM naturalearth_lowres
|
|
589
720
|
WHERE iso_a3 IN ('CAN', 'USA', 'MEX')"""
|
|
590
|
-
df = read_dataframe(
|
|
721
|
+
df = read_dataframe(
|
|
722
|
+
naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL", use_arrow=use_arrow
|
|
723
|
+
)
|
|
591
724
|
assert len(df.columns) == 6
|
|
592
725
|
assert len(df) == 3
|
|
593
726
|
assert df.iso_a3.tolist() == ["CAN", "USA", "MEX"]
|
|
@@ -596,7 +729,9 @@ def test_read_sql(naturalearth_lowres_all_ext, use_arrow):
|
|
|
596
729
|
FROM naturalearth_lowres
|
|
597
730
|
WHERE iso_a3 IN ('CAN', 'USA', 'MEX')
|
|
598
731
|
ORDER BY name"""
|
|
599
|
-
df = read_dataframe(
|
|
732
|
+
df = read_dataframe(
|
|
733
|
+
naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL", use_arrow=use_arrow
|
|
734
|
+
)
|
|
600
735
|
assert len(df.columns) == 6
|
|
601
736
|
assert len(df) == 3
|
|
602
737
|
assert df.iso_a3.tolist() == ["CAN", "MEX", "USA"]
|
|
@@ -605,7 +740,9 @@ def test_read_sql(naturalearth_lowres_all_ext, use_arrow):
|
|
|
605
740
|
sql = """SELECT *
|
|
606
741
|
FROM naturalearth_lowres
|
|
607
742
|
WHERE POP_EST >= 10000000 AND POP_EST < 100000000"""
|
|
608
|
-
df = read_dataframe(
|
|
743
|
+
df = read_dataframe(
|
|
744
|
+
naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL", use_arrow=use_arrow
|
|
745
|
+
)
|
|
609
746
|
assert len(df) == 75
|
|
610
747
|
assert len(df.columns) == 6
|
|
611
748
|
assert df.pop_est.min() >= 10000000
|
|
@@ -613,25 +750,36 @@ def test_read_sql(naturalearth_lowres_all_ext, use_arrow):
|
|
|
613
750
|
|
|
614
751
|
# Should match no items.
|
|
615
752
|
sql = "SELECT * FROM naturalearth_lowres WHERE ISO_A3 = 'INVALID'"
|
|
616
|
-
df = read_dataframe(
|
|
753
|
+
df = read_dataframe(
|
|
754
|
+
naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL", use_arrow=use_arrow
|
|
755
|
+
)
|
|
617
756
|
assert len(df) == 0
|
|
618
757
|
|
|
619
758
|
|
|
620
|
-
def test_read_sql_invalid(naturalearth_lowres_all_ext):
|
|
759
|
+
def test_read_sql_invalid(naturalearth_lowres_all_ext, use_arrow):
|
|
621
760
|
if naturalearth_lowres_all_ext.suffix == ".gpkg":
|
|
622
761
|
with pytest.raises(Exception, match="In ExecuteSQL().*"):
|
|
623
|
-
read_dataframe(
|
|
762
|
+
read_dataframe(
|
|
763
|
+
naturalearth_lowres_all_ext, sql="invalid", use_arrow=use_arrow
|
|
764
|
+
)
|
|
624
765
|
else:
|
|
625
766
|
with pytest.raises(Exception, match="SQL Expression Parsing Error"):
|
|
626
|
-
read_dataframe(
|
|
767
|
+
read_dataframe(
|
|
768
|
+
naturalearth_lowres_all_ext, sql="invalid", use_arrow=use_arrow
|
|
769
|
+
)
|
|
627
770
|
|
|
628
771
|
with pytest.raises(
|
|
629
772
|
ValueError, match="'sql' paramater cannot be combined with 'layer'"
|
|
630
773
|
):
|
|
631
|
-
read_dataframe(
|
|
774
|
+
read_dataframe(
|
|
775
|
+
naturalearth_lowres_all_ext,
|
|
776
|
+
sql="whatever",
|
|
777
|
+
layer="invalid",
|
|
778
|
+
use_arrow=use_arrow,
|
|
779
|
+
)
|
|
632
780
|
|
|
633
781
|
|
|
634
|
-
def test_read_sql_columns_where(naturalearth_lowres_all_ext):
|
|
782
|
+
def test_read_sql_columns_where(naturalearth_lowres_all_ext, use_arrow):
|
|
635
783
|
sql = "SELECT iso_a3 AS iso_a3_renamed, name, pop_est FROM naturalearth_lowres"
|
|
636
784
|
df = read_dataframe(
|
|
637
785
|
naturalearth_lowres_all_ext,
|
|
@@ -639,13 +787,14 @@ def test_read_sql_columns_where(naturalearth_lowres_all_ext):
|
|
|
639
787
|
sql_dialect="OGRSQL",
|
|
640
788
|
columns=["iso_a3_renamed", "name"],
|
|
641
789
|
where="iso_a3_renamed IN ('CAN', 'USA', 'MEX')",
|
|
790
|
+
use_arrow=use_arrow,
|
|
642
791
|
)
|
|
643
792
|
assert len(df.columns) == 3
|
|
644
793
|
assert len(df) == 3
|
|
645
794
|
assert df.iso_a3_renamed.tolist() == ["CAN", "USA", "MEX"]
|
|
646
795
|
|
|
647
796
|
|
|
648
|
-
def test_read_sql_columns_where_bbox(naturalearth_lowres_all_ext):
|
|
797
|
+
def test_read_sql_columns_where_bbox(naturalearth_lowres_all_ext, use_arrow):
|
|
649
798
|
sql = "SELECT iso_a3 AS iso_a3_renamed, name, pop_est FROM naturalearth_lowres"
|
|
650
799
|
df = read_dataframe(
|
|
651
800
|
naturalearth_lowres_all_ext,
|
|
@@ -654,13 +803,14 @@ def test_read_sql_columns_where_bbox(naturalearth_lowres_all_ext):
|
|
|
654
803
|
columns=["iso_a3_renamed", "name"],
|
|
655
804
|
where="iso_a3_renamed IN ('CRI', 'PAN')",
|
|
656
805
|
bbox=(-85, 8, -80, 10),
|
|
806
|
+
use_arrow=use_arrow,
|
|
657
807
|
)
|
|
658
808
|
assert len(df.columns) == 3
|
|
659
809
|
assert len(df) == 2
|
|
660
810
|
assert df.iso_a3_renamed.tolist() == ["PAN", "CRI"]
|
|
661
811
|
|
|
662
812
|
|
|
663
|
-
def test_read_sql_skip_max(naturalearth_lowres_all_ext):
|
|
813
|
+
def test_read_sql_skip_max(naturalearth_lowres_all_ext, use_arrow):
|
|
664
814
|
sql = """SELECT *
|
|
665
815
|
FROM naturalearth_lowres
|
|
666
816
|
WHERE iso_a3 IN ('CAN', 'MEX', 'USA')
|
|
@@ -671,6 +821,7 @@ def test_read_sql_skip_max(naturalearth_lowres_all_ext):
|
|
|
671
821
|
skip_features=1,
|
|
672
822
|
max_features=1,
|
|
673
823
|
sql_dialect="OGRSQL",
|
|
824
|
+
use_arrow=use_arrow,
|
|
674
825
|
)
|
|
675
826
|
assert len(df.columns) == 6
|
|
676
827
|
assert len(df) == 1
|
|
@@ -678,13 +829,21 @@ def test_read_sql_skip_max(naturalearth_lowres_all_ext):
|
|
|
678
829
|
|
|
679
830
|
sql = "SELECT * FROM naturalearth_lowres LIMIT 1"
|
|
680
831
|
df = read_dataframe(
|
|
681
|
-
naturalearth_lowres_all_ext,
|
|
832
|
+
naturalearth_lowres_all_ext,
|
|
833
|
+
sql=sql,
|
|
834
|
+
max_features=3,
|
|
835
|
+
sql_dialect="OGRSQL",
|
|
836
|
+
use_arrow=use_arrow,
|
|
682
837
|
)
|
|
683
838
|
assert len(df) == 1
|
|
684
839
|
|
|
685
840
|
sql = "SELECT * FROM naturalearth_lowres LIMIT 1"
|
|
686
841
|
df = read_dataframe(
|
|
687
|
-
naturalearth_lowres_all_ext,
|
|
842
|
+
naturalearth_lowres_all_ext,
|
|
843
|
+
sql=sql,
|
|
844
|
+
sql_dialect="OGRSQL",
|
|
845
|
+
skip_features=1,
|
|
846
|
+
use_arrow=use_arrow,
|
|
688
847
|
)
|
|
689
848
|
assert len(df) == 0
|
|
690
849
|
|
|
@@ -695,10 +854,12 @@ def test_read_sql_skip_max(naturalearth_lowres_all_ext):
|
|
|
695
854
|
[ext for ext in ALL_EXTS if ext != ".gpkg"],
|
|
696
855
|
indirect=["naturalearth_lowres"],
|
|
697
856
|
)
|
|
698
|
-
def test_read_sql_dialect_sqlite_nogpkg(naturalearth_lowres):
|
|
857
|
+
def test_read_sql_dialect_sqlite_nogpkg(naturalearth_lowres, use_arrow):
|
|
699
858
|
# Should return singular item
|
|
700
859
|
sql = "SELECT * FROM naturalearth_lowres WHERE iso_a3 = 'CAN'"
|
|
701
|
-
df = read_dataframe(
|
|
860
|
+
df = read_dataframe(
|
|
861
|
+
naturalearth_lowres, sql=sql, sql_dialect="SQLITE", use_arrow=use_arrow
|
|
862
|
+
)
|
|
702
863
|
assert len(df) == 1
|
|
703
864
|
assert len(df.columns) == 6
|
|
704
865
|
assert df.iloc[0].iso_a3 == "CAN"
|
|
@@ -708,7 +869,9 @@ def test_read_sql_dialect_sqlite_nogpkg(naturalearth_lowres):
|
|
|
708
869
|
sql = """SELECT ST_Buffer(geometry, 5) AS geometry, name, pop_est, iso_a3
|
|
709
870
|
FROM naturalearth_lowres
|
|
710
871
|
WHERE ISO_A3 = 'CAN'"""
|
|
711
|
-
df = read_dataframe(
|
|
872
|
+
df = read_dataframe(
|
|
873
|
+
naturalearth_lowres, sql=sql, sql_dialect="SQLITE", use_arrow=use_arrow
|
|
874
|
+
)
|
|
712
875
|
assert len(df) == 1
|
|
713
876
|
assert len(df.columns) == 4
|
|
714
877
|
assert df.iloc[0].geometry.area > area_canada
|
|
@@ -718,12 +881,14 @@ def test_read_sql_dialect_sqlite_nogpkg(naturalearth_lowres):
|
|
|
718
881
|
@pytest.mark.parametrize(
|
|
719
882
|
"naturalearth_lowres", [".gpkg"], indirect=["naturalearth_lowres"]
|
|
720
883
|
)
|
|
721
|
-
def test_read_sql_dialect_sqlite_gpkg(naturalearth_lowres):
|
|
884
|
+
def test_read_sql_dialect_sqlite_gpkg(naturalearth_lowres, use_arrow):
|
|
722
885
|
# "INDIRECT_SQL" prohibits GDAL from passing the SQL statement to sqlite.
|
|
723
886
|
# Because the statement is processed within GDAL it is possible to use
|
|
724
887
|
# spatialite functions even if sqlite isn't built with spatialite support.
|
|
725
888
|
sql = "SELECT * FROM naturalearth_lowres WHERE iso_a3 = 'CAN'"
|
|
726
|
-
df = read_dataframe(
|
|
889
|
+
df = read_dataframe(
|
|
890
|
+
naturalearth_lowres, sql=sql, sql_dialect="INDIRECT_SQLITE", use_arrow=use_arrow
|
|
891
|
+
)
|
|
727
892
|
assert len(df) == 1
|
|
728
893
|
assert len(df.columns) == 6
|
|
729
894
|
assert df.iloc[0].iso_a3 == "CAN"
|
|
@@ -733,29 +898,67 @@ def test_read_sql_dialect_sqlite_gpkg(naturalearth_lowres):
|
|
|
733
898
|
sql = """SELECT ST_Buffer(geom, 5) AS geometry, name, pop_est, iso_a3
|
|
734
899
|
FROM naturalearth_lowres
|
|
735
900
|
WHERE ISO_A3 = 'CAN'"""
|
|
736
|
-
df = read_dataframe(
|
|
901
|
+
df = read_dataframe(
|
|
902
|
+
naturalearth_lowres, sql=sql, sql_dialect="INDIRECT_SQLITE", use_arrow=use_arrow
|
|
903
|
+
)
|
|
737
904
|
assert len(df) == 1
|
|
738
905
|
assert len(df.columns) == 4
|
|
739
906
|
assert df.iloc[0].geometry.area > area_canada
|
|
740
907
|
|
|
741
908
|
|
|
909
|
+
@pytest.mark.parametrize("encoding", ["utf-8", "cp1252", None])
|
|
910
|
+
def test_write_csv_encoding(tmp_path, encoding):
|
|
911
|
+
"""Test if write_dataframe uses the default encoding correctly."""
|
|
912
|
+
# Write csv test file. Depending on the os this will be written in a different
|
|
913
|
+
# encoding: for linux and macos this is utf-8, for windows it is cp1252.
|
|
914
|
+
csv_path = tmp_path / "test.csv"
|
|
915
|
+
|
|
916
|
+
with open(csv_path, "w", encoding=encoding) as csv:
|
|
917
|
+
csv.write("näme,city\n")
|
|
918
|
+
csv.write("Wilhelm Röntgen,Zürich\n")
|
|
919
|
+
|
|
920
|
+
# Write csv test file with the same data using write_dataframe. It should use the
|
|
921
|
+
# same encoding as above.
|
|
922
|
+
df = pd.DataFrame({"näme": ["Wilhelm Röntgen"], "city": ["Zürich"]})
|
|
923
|
+
csv_pyogrio_path = tmp_path / "test_pyogrio.csv"
|
|
924
|
+
write_dataframe(df, csv_pyogrio_path, encoding=encoding)
|
|
925
|
+
|
|
926
|
+
# Check if the text files written both ways can be read again and give same result.
|
|
927
|
+
with open(csv_path, "r", encoding=encoding) as csv:
|
|
928
|
+
csv_str = csv.read()
|
|
929
|
+
with open(csv_pyogrio_path, "r", encoding=encoding) as csv_pyogrio:
|
|
930
|
+
csv_pyogrio_str = csv_pyogrio.read()
|
|
931
|
+
assert csv_str == csv_pyogrio_str
|
|
932
|
+
|
|
933
|
+
# Check if they files are binary identical, to be 100% sure they were written with
|
|
934
|
+
# the same encoding.
|
|
935
|
+
with open(csv_path, "rb") as csv:
|
|
936
|
+
csv_bytes = csv.read()
|
|
937
|
+
with open(csv_pyogrio_path, "rb") as csv_pyogrio:
|
|
938
|
+
csv_pyogrio_bytes = csv_pyogrio.read()
|
|
939
|
+
assert csv_bytes == csv_pyogrio_bytes
|
|
940
|
+
|
|
941
|
+
|
|
742
942
|
@pytest.mark.parametrize("ext", ALL_EXTS)
|
|
743
|
-
|
|
943
|
+
@pytest.mark.requires_arrow_write_api
|
|
944
|
+
def test_write_dataframe(tmp_path, naturalearth_lowres, ext, use_arrow):
|
|
744
945
|
input_gdf = read_dataframe(naturalearth_lowres)
|
|
745
946
|
output_path = tmp_path / f"test{ext}"
|
|
746
947
|
|
|
747
948
|
if ext == ".fgb":
|
|
748
949
|
# For .fgb, spatial_index=False to avoid the rows being reordered
|
|
749
|
-
write_dataframe(
|
|
950
|
+
write_dataframe(
|
|
951
|
+
input_gdf, output_path, use_arrow=use_arrow, spatial_index=False
|
|
952
|
+
)
|
|
750
953
|
else:
|
|
751
|
-
write_dataframe(input_gdf, output_path)
|
|
954
|
+
write_dataframe(input_gdf, output_path, use_arrow=use_arrow)
|
|
752
955
|
|
|
753
956
|
assert output_path.exists()
|
|
754
957
|
result_gdf = read_dataframe(output_path)
|
|
755
958
|
|
|
756
959
|
geometry_types = result_gdf.geometry.type.unique()
|
|
757
960
|
if DRIVERS[ext] in DRIVERS_NO_MIXED_SINGLE_MULTI:
|
|
758
|
-
assert geometry_types == ["MultiPolygon"]
|
|
961
|
+
assert list(geometry_types) == ["MultiPolygon"]
|
|
759
962
|
else:
|
|
760
963
|
assert set(geometry_types) == set(["MultiPolygon", "Polygon"])
|
|
761
964
|
|
|
@@ -776,14 +979,21 @@ def test_write_dataframe(tmp_path, naturalearth_lowres, ext):
|
|
|
776
979
|
|
|
777
980
|
|
|
778
981
|
@pytest.mark.filterwarnings("ignore:.*No SRS set on layer.*")
|
|
982
|
+
@pytest.mark.parametrize("write_geodf", [True, False])
|
|
779
983
|
@pytest.mark.parametrize("ext", [ext for ext in ALL_EXTS + [".xlsx"] if ext != ".fgb"])
|
|
780
|
-
|
|
781
|
-
|
|
984
|
+
@pytest.mark.requires_arrow_write_api
|
|
985
|
+
def test_write_dataframe_no_geom(
|
|
986
|
+
request, tmp_path, naturalearth_lowres, write_geodf, ext, use_arrow
|
|
987
|
+
):
|
|
988
|
+
"""Test writing a (geo)dataframe without a geometry column.
|
|
782
989
|
|
|
783
990
|
FlatGeobuf (.fgb) doesn't seem to support this, and just writes an empty file.
|
|
784
991
|
"""
|
|
785
992
|
# Prepare test data
|
|
786
993
|
input_df = read_dataframe(naturalearth_lowres, read_geometry=False)
|
|
994
|
+
if write_geodf:
|
|
995
|
+
input_df = gp.GeoDataFrame(input_df)
|
|
996
|
+
|
|
787
997
|
output_path = tmp_path / f"test{ext}"
|
|
788
998
|
|
|
789
999
|
# A shapefile without geometry column results in only a .dbf file.
|
|
@@ -793,7 +1003,7 @@ def test_write_dataframe_no_geom(tmp_path, naturalearth_lowres, ext):
|
|
|
793
1003
|
# Determine driver
|
|
794
1004
|
driver = DRIVERS[ext] if ext != ".xlsx" else "XLSX"
|
|
795
1005
|
|
|
796
|
-
write_dataframe(input_df, output_path, driver=driver)
|
|
1006
|
+
write_dataframe(input_df, output_path, use_arrow=use_arrow, driver=driver)
|
|
797
1007
|
|
|
798
1008
|
assert output_path.exists()
|
|
799
1009
|
result_df = read_dataframe(output_path)
|
|
@@ -806,6 +1016,9 @@ def test_write_dataframe_no_geom(tmp_path, naturalearth_lowres, ext):
|
|
|
806
1016
|
if ext in [".gpkg", ".shp", ".xlsx"]:
|
|
807
1017
|
# These file types return a DataFrame when read.
|
|
808
1018
|
assert not isinstance(result_df, gp.GeoDataFrame)
|
|
1019
|
+
if isinstance(input_df, gp.GeoDataFrame):
|
|
1020
|
+
input_df = pd.DataFrame(input_df)
|
|
1021
|
+
|
|
809
1022
|
pd.testing.assert_frame_equal(
|
|
810
1023
|
result_df, input_df, check_index_type=False, check_dtype=check_dtype
|
|
811
1024
|
)
|
|
@@ -822,12 +1035,27 @@ def test_write_dataframe_no_geom(tmp_path, naturalearth_lowres, ext):
|
|
|
822
1035
|
)
|
|
823
1036
|
|
|
824
1037
|
|
|
1038
|
+
@pytest.mark.requires_arrow_write_api
|
|
1039
|
+
def test_write_dataframe_index(tmp_path, naturalearth_lowres, use_arrow):
|
|
1040
|
+
# dataframe writing ignores the index
|
|
1041
|
+
input_gdf = read_dataframe(naturalearth_lowres)
|
|
1042
|
+
input_gdf = input_gdf.set_index("iso_a3")
|
|
1043
|
+
|
|
1044
|
+
output_path = tmp_path / "test.shp"
|
|
1045
|
+
write_dataframe(input_gdf, output_path, use_arrow=use_arrow)
|
|
1046
|
+
|
|
1047
|
+
result_gdf = read_dataframe(output_path)
|
|
1048
|
+
assert isinstance(result_gdf.index, pd.RangeIndex)
|
|
1049
|
+
assert_geodataframe_equal(result_gdf, input_gdf.reset_index(drop=True))
|
|
1050
|
+
|
|
1051
|
+
|
|
825
1052
|
@pytest.mark.parametrize("ext", [ext for ext in ALL_EXTS if ext not in ".geojsonl"])
|
|
826
|
-
|
|
1053
|
+
@pytest.mark.requires_arrow_write_api
|
|
1054
|
+
def test_write_empty_dataframe(tmp_path, ext, use_arrow):
|
|
827
1055
|
expected = gp.GeoDataFrame(geometry=[], crs=4326)
|
|
828
1056
|
|
|
829
1057
|
filename = tmp_path / f"test{ext}"
|
|
830
|
-
write_dataframe(expected, filename)
|
|
1058
|
+
write_dataframe(expected, filename, use_arrow=use_arrow)
|
|
831
1059
|
|
|
832
1060
|
assert filename.exists()
|
|
833
1061
|
df = read_dataframe(filename)
|
|
@@ -835,83 +1063,119 @@ def test_write_empty_dataframe(tmp_path, ext):
|
|
|
835
1063
|
|
|
836
1064
|
|
|
837
1065
|
@pytest.mark.parametrize("ext", [".geojsonl", ".geojsons"])
|
|
838
|
-
|
|
1066
|
+
@pytest.mark.requires_arrow_write_api
|
|
1067
|
+
def test_write_read_empty_dataframe_unsupported(tmp_path, ext, use_arrow):
|
|
839
1068
|
# Writing empty dataframe to .geojsons or .geojsonl results logically in a 0 byte
|
|
840
1069
|
# file, but gdal isn't able to read those again at the time of writing.
|
|
841
1070
|
# Issue logged here: https://github.com/geopandas/pyogrio/issues/94
|
|
842
1071
|
expected = gp.GeoDataFrame(geometry=[], crs=4326)
|
|
843
1072
|
|
|
844
1073
|
filename = tmp_path / f"test{ext}"
|
|
845
|
-
write_dataframe(expected, filename)
|
|
1074
|
+
write_dataframe(expected, filename, use_arrow=use_arrow)
|
|
846
1075
|
|
|
847
1076
|
assert filename.exists()
|
|
848
1077
|
with pytest.raises(
|
|
849
|
-
Exception, match=".* not recognized as a supported file format."
|
|
1078
|
+
Exception, match=".* not recognized as( being in)? a supported file format."
|
|
850
1079
|
):
|
|
851
|
-
_ = read_dataframe(filename)
|
|
1080
|
+
_ = read_dataframe(filename, use_arrow=use_arrow)
|
|
852
1081
|
|
|
853
1082
|
|
|
854
|
-
|
|
1083
|
+
@pytest.mark.requires_arrow_write_api
|
|
1084
|
+
def test_write_dataframe_gpkg_multiple_layers(tmp_path, naturalearth_lowres, use_arrow):
|
|
855
1085
|
input_gdf = read_dataframe(naturalearth_lowres)
|
|
856
|
-
|
|
1086
|
+
filename = tmp_path / "test.gpkg"
|
|
857
1087
|
|
|
858
|
-
write_dataframe(
|
|
1088
|
+
write_dataframe(
|
|
1089
|
+
input_gdf,
|
|
1090
|
+
filename,
|
|
1091
|
+
layer="first",
|
|
1092
|
+
promote_to_multi=True,
|
|
1093
|
+
use_arrow=use_arrow,
|
|
1094
|
+
)
|
|
859
1095
|
|
|
860
|
-
assert
|
|
861
|
-
assert np.array_equal(list_layers(
|
|
1096
|
+
assert filename.exists()
|
|
1097
|
+
assert np.array_equal(list_layers(filename), [["first", "MultiPolygon"]])
|
|
862
1098
|
|
|
863
|
-
write_dataframe(
|
|
1099
|
+
write_dataframe(
|
|
1100
|
+
input_gdf,
|
|
1101
|
+
filename,
|
|
1102
|
+
layer="second",
|
|
1103
|
+
promote_to_multi=True,
|
|
1104
|
+
use_arrow=use_arrow,
|
|
1105
|
+
)
|
|
864
1106
|
assert np.array_equal(
|
|
865
|
-
list_layers(
|
|
1107
|
+
list_layers(filename),
|
|
866
1108
|
[["first", "MultiPolygon"], ["second", "MultiPolygon"]],
|
|
867
1109
|
)
|
|
868
1110
|
|
|
869
1111
|
|
|
870
1112
|
@pytest.mark.parametrize("ext", ALL_EXTS)
|
|
871
|
-
|
|
1113
|
+
@pytest.mark.requires_arrow_write_api
|
|
1114
|
+
def test_write_dataframe_append(request, tmp_path, naturalearth_lowres, ext, use_arrow):
|
|
872
1115
|
if ext == ".fgb" and __gdal_version__ <= (3, 5, 0):
|
|
873
1116
|
pytest.skip("Append to FlatGeobuf fails for GDAL <= 3.5.0")
|
|
874
1117
|
|
|
875
1118
|
if ext in (".geojsonl", ".geojsons") and __gdal_version__ <= (3, 6, 0):
|
|
876
1119
|
pytest.skip("Append to GeoJSONSeq only available for GDAL >= 3.6.0")
|
|
877
1120
|
|
|
1121
|
+
if use_arrow and ext.startswith(".geojson"):
|
|
1122
|
+
# Bug in GDAL when appending int64 to GeoJSON
|
|
1123
|
+
# (https://github.com/OSGeo/gdal/issues/9792)
|
|
1124
|
+
request.node.add_marker(
|
|
1125
|
+
pytest.mark.xfail(reason="Bugs with append when writing Arrow to GeoJSON")
|
|
1126
|
+
)
|
|
1127
|
+
|
|
878
1128
|
input_gdf = read_dataframe(naturalearth_lowres)
|
|
879
|
-
|
|
1129
|
+
filename = tmp_path / f"test{ext}"
|
|
880
1130
|
|
|
881
|
-
write_dataframe(input_gdf,
|
|
1131
|
+
write_dataframe(input_gdf, filename, use_arrow=use_arrow)
|
|
882
1132
|
|
|
883
|
-
|
|
884
|
-
assert len(read_dataframe(
|
|
1133
|
+
filename.exists()
|
|
1134
|
+
assert len(read_dataframe(filename)) == 177
|
|
885
1135
|
|
|
886
|
-
write_dataframe(input_gdf,
|
|
887
|
-
assert len(read_dataframe(
|
|
1136
|
+
write_dataframe(input_gdf, filename, use_arrow=use_arrow, append=True)
|
|
1137
|
+
assert len(read_dataframe(filename)) == 354
|
|
888
1138
|
|
|
889
1139
|
|
|
890
1140
|
@pytest.mark.parametrize("spatial_index", [False, True])
|
|
891
|
-
|
|
1141
|
+
@pytest.mark.requires_arrow_write_api
|
|
1142
|
+
def test_write_dataframe_gdal_options(
|
|
1143
|
+
tmp_path, naturalearth_lowres, spatial_index, use_arrow
|
|
1144
|
+
):
|
|
892
1145
|
df = read_dataframe(naturalearth_lowres)
|
|
893
1146
|
|
|
894
1147
|
outfilename1 = tmp_path / "test1.shp"
|
|
895
|
-
write_dataframe(
|
|
1148
|
+
write_dataframe(
|
|
1149
|
+
df,
|
|
1150
|
+
outfilename1,
|
|
1151
|
+
use_arrow=use_arrow,
|
|
1152
|
+
SPATIAL_INDEX="YES" if spatial_index else "NO",
|
|
1153
|
+
)
|
|
896
1154
|
assert outfilename1.exists() is True
|
|
897
1155
|
index_filename1 = tmp_path / "test1.qix"
|
|
898
1156
|
assert index_filename1.exists() is spatial_index
|
|
899
1157
|
|
|
900
1158
|
# using explicit layer_options instead
|
|
901
1159
|
outfilename2 = tmp_path / "test2.shp"
|
|
902
|
-
write_dataframe(
|
|
1160
|
+
write_dataframe(
|
|
1161
|
+
df,
|
|
1162
|
+
outfilename2,
|
|
1163
|
+
use_arrow=use_arrow,
|
|
1164
|
+
layer_options=dict(spatial_index=spatial_index),
|
|
1165
|
+
)
|
|
903
1166
|
assert outfilename2.exists() is True
|
|
904
1167
|
index_filename2 = tmp_path / "test2.qix"
|
|
905
1168
|
assert index_filename2.exists() is spatial_index
|
|
906
1169
|
|
|
907
1170
|
|
|
908
|
-
|
|
1171
|
+
@pytest.mark.requires_arrow_write_api
|
|
1172
|
+
def test_write_dataframe_gdal_options_unknown(tmp_path, naturalearth_lowres, use_arrow):
|
|
909
1173
|
df = read_dataframe(naturalearth_lowres)
|
|
910
1174
|
|
|
911
1175
|
# geojson has no spatial index, so passing keyword should raise
|
|
912
1176
|
outfilename = tmp_path / "test.geojson"
|
|
913
1177
|
with pytest.raises(ValueError, match="unrecognized option 'SPATIAL_INDEX'"):
|
|
914
|
-
write_dataframe(df, outfilename, spatial_index=True)
|
|
1178
|
+
write_dataframe(df, outfilename, use_arrow=use_arrow, spatial_index=True)
|
|
915
1179
|
|
|
916
1180
|
|
|
917
1181
|
def _get_gpkg_table_names(path):
|
|
@@ -924,21 +1188,25 @@ def _get_gpkg_table_names(path):
|
|
|
924
1188
|
return [res[0] for res in result]
|
|
925
1189
|
|
|
926
1190
|
|
|
927
|
-
|
|
1191
|
+
@pytest.mark.requires_arrow_write_api
|
|
1192
|
+
def test_write_dataframe_gdal_options_dataset(tmp_path, naturalearth_lowres, use_arrow):
|
|
928
1193
|
df = read_dataframe(naturalearth_lowres)
|
|
929
1194
|
|
|
930
1195
|
test_default_filename = tmp_path / "test_default.gpkg"
|
|
931
|
-
write_dataframe(df, test_default_filename)
|
|
1196
|
+
write_dataframe(df, test_default_filename, use_arrow=use_arrow)
|
|
932
1197
|
assert "gpkg_ogr_contents" in _get_gpkg_table_names(test_default_filename)
|
|
933
1198
|
|
|
934
1199
|
test_no_contents_filename = tmp_path / "test_no_contents.gpkg"
|
|
935
|
-
write_dataframe(
|
|
1200
|
+
write_dataframe(
|
|
1201
|
+
df, test_default_filename, use_arrow=use_arrow, ADD_GPKG_OGR_CONTENTS="NO"
|
|
1202
|
+
)
|
|
936
1203
|
assert "gpkg_ogr_contents" not in _get_gpkg_table_names(test_no_contents_filename)
|
|
937
1204
|
|
|
938
1205
|
test_no_contents_filename2 = tmp_path / "test_no_contents2.gpkg"
|
|
939
1206
|
write_dataframe(
|
|
940
1207
|
df,
|
|
941
1208
|
test_no_contents_filename2,
|
|
1209
|
+
use_arrow=use_arrow,
|
|
942
1210
|
dataset_options=dict(add_gpkg_ogr_contents=False),
|
|
943
1211
|
)
|
|
944
1212
|
assert "gpkg_ogr_contents" not in _get_gpkg_table_names(test_no_contents_filename2)
|
|
@@ -955,6 +1223,7 @@ def test_write_dataframe_gdal_options_dataset(tmp_path, naturalearth_lowres):
|
|
|
955
1223
|
(".geojson", False, ["MultiPolygon", "Polygon"], "Unknown"),
|
|
956
1224
|
],
|
|
957
1225
|
)
|
|
1226
|
+
@pytest.mark.requires_arrow_write_api
|
|
958
1227
|
def test_write_dataframe_promote_to_multi(
|
|
959
1228
|
tmp_path,
|
|
960
1229
|
naturalearth_lowres,
|
|
@@ -962,11 +1231,14 @@ def test_write_dataframe_promote_to_multi(
|
|
|
962
1231
|
promote_to_multi,
|
|
963
1232
|
expected_geometry_types,
|
|
964
1233
|
expected_geometry_type,
|
|
1234
|
+
use_arrow,
|
|
965
1235
|
):
|
|
966
1236
|
input_gdf = read_dataframe(naturalearth_lowres)
|
|
967
1237
|
|
|
968
1238
|
output_path = tmp_path / f"test_promote{ext}"
|
|
969
|
-
write_dataframe(
|
|
1239
|
+
write_dataframe(
|
|
1240
|
+
input_gdf, output_path, use_arrow=use_arrow, promote_to_multi=promote_to_multi
|
|
1241
|
+
)
|
|
970
1242
|
|
|
971
1243
|
assert output_path.exists()
|
|
972
1244
|
output_gdf = read_dataframe(output_path)
|
|
@@ -999,6 +1271,7 @@ def test_write_dataframe_promote_to_multi(
|
|
|
999
1271
|
(".shp", True, "Unknown", ["MultiPolygon", "Polygon"], "Polygon"),
|
|
1000
1272
|
],
|
|
1001
1273
|
)
|
|
1274
|
+
@pytest.mark.requires_arrow_write_api
|
|
1002
1275
|
def test_write_dataframe_promote_to_multi_layer_geom_type(
|
|
1003
1276
|
tmp_path,
|
|
1004
1277
|
naturalearth_lowres,
|
|
@@ -1007,6 +1280,7 @@ def test_write_dataframe_promote_to_multi_layer_geom_type(
|
|
|
1007
1280
|
geometry_type,
|
|
1008
1281
|
expected_geometry_types,
|
|
1009
1282
|
expected_geometry_type,
|
|
1283
|
+
use_arrow,
|
|
1010
1284
|
):
|
|
1011
1285
|
input_gdf = read_dataframe(naturalearth_lowres)
|
|
1012
1286
|
|
|
@@ -1023,6 +1297,7 @@ def test_write_dataframe_promote_to_multi_layer_geom_type(
|
|
|
1023
1297
|
write_dataframe(
|
|
1024
1298
|
input_gdf,
|
|
1025
1299
|
output_path,
|
|
1300
|
+
use_arrow=use_arrow,
|
|
1026
1301
|
promote_to_multi=promote_to_multi,
|
|
1027
1302
|
geometry_type=geometry_type,
|
|
1028
1303
|
)
|
|
@@ -1041,9 +1316,15 @@ def test_write_dataframe_promote_to_multi_layer_geom_type(
|
|
|
1041
1316
|
(".fgb", False, "Polygon", "Mismatched geometry type"),
|
|
1042
1317
|
(".fgb", None, "Point", "Mismatched geometry type"),
|
|
1043
1318
|
(".fgb", None, "Polygon", "Mismatched geometry type"),
|
|
1044
|
-
(
|
|
1319
|
+
(
|
|
1320
|
+
".shp",
|
|
1321
|
+
None,
|
|
1322
|
+
"Point",
|
|
1323
|
+
"Could not add feature to layer at index|Error while writing batch to OGR layer",
|
|
1324
|
+
),
|
|
1045
1325
|
],
|
|
1046
1326
|
)
|
|
1327
|
+
@pytest.mark.requires_arrow_write_api
|
|
1047
1328
|
def test_write_dataframe_promote_to_multi_layer_geom_type_invalid(
|
|
1048
1329
|
tmp_path,
|
|
1049
1330
|
naturalearth_lowres,
|
|
@@ -1051,31 +1332,37 @@ def test_write_dataframe_promote_to_multi_layer_geom_type_invalid(
|
|
|
1051
1332
|
promote_to_multi,
|
|
1052
1333
|
geometry_type,
|
|
1053
1334
|
expected_raises_match,
|
|
1335
|
+
use_arrow,
|
|
1054
1336
|
):
|
|
1055
1337
|
input_gdf = read_dataframe(naturalearth_lowres)
|
|
1056
1338
|
|
|
1057
1339
|
output_path = tmp_path / f"test{ext}"
|
|
1058
|
-
with pytest.raises(FeatureError, match=expected_raises_match):
|
|
1340
|
+
with pytest.raises((FeatureError, DataLayerError), match=expected_raises_match):
|
|
1059
1341
|
write_dataframe(
|
|
1060
1342
|
input_gdf,
|
|
1061
1343
|
output_path,
|
|
1344
|
+
use_arrow=use_arrow,
|
|
1062
1345
|
promote_to_multi=promote_to_multi,
|
|
1063
1346
|
geometry_type=geometry_type,
|
|
1064
1347
|
)
|
|
1065
1348
|
|
|
1066
1349
|
|
|
1067
|
-
|
|
1350
|
+
@pytest.mark.requires_arrow_write_api
|
|
1351
|
+
def test_write_dataframe_layer_geom_type_invalid(
|
|
1352
|
+
tmp_path, naturalearth_lowres, use_arrow
|
|
1353
|
+
):
|
|
1068
1354
|
df = read_dataframe(naturalearth_lowres)
|
|
1069
1355
|
|
|
1070
1356
|
filename = tmp_path / "test.geojson"
|
|
1071
1357
|
with pytest.raises(
|
|
1072
1358
|
GeometryError, match="Geometry type is not supported: NotSupported"
|
|
1073
1359
|
):
|
|
1074
|
-
write_dataframe(df, filename, geometry_type="NotSupported")
|
|
1360
|
+
write_dataframe(df, filename, use_arrow=use_arrow, geometry_type="NotSupported")
|
|
1075
1361
|
|
|
1076
1362
|
|
|
1077
1363
|
@pytest.mark.parametrize("ext", [ext for ext in ALL_EXTS if ext not in ".shp"])
|
|
1078
|
-
|
|
1364
|
+
@pytest.mark.requires_arrow_write_api
|
|
1365
|
+
def test_write_dataframe_truly_mixed(tmp_path, ext, use_arrow):
|
|
1079
1366
|
geometry = [
|
|
1080
1367
|
shapely.Point(0, 0),
|
|
1081
1368
|
shapely.LineString([(0, 0), (1, 1)]),
|
|
@@ -1095,9 +1382,9 @@ def test_write_dataframe_truly_mixed(tmp_path, ext):
|
|
|
1095
1382
|
|
|
1096
1383
|
if ext == ".fgb":
|
|
1097
1384
|
# For .fgb, spatial_index=False to avoid the rows being reordered
|
|
1098
|
-
write_dataframe(df, filename, spatial_index=False)
|
|
1385
|
+
write_dataframe(df, filename, use_arrow=use_arrow, spatial_index=False)
|
|
1099
1386
|
else:
|
|
1100
|
-
write_dataframe(df, filename)
|
|
1387
|
+
write_dataframe(df, filename, use_arrow=use_arrow)
|
|
1101
1388
|
|
|
1102
1389
|
# Drivers that support mixed geometries will default to "Unknown" geometry type
|
|
1103
1390
|
assert read_info(filename)["geometry_type"] == "Unknown"
|
|
@@ -1105,7 +1392,8 @@ def test_write_dataframe_truly_mixed(tmp_path, ext):
|
|
|
1105
1392
|
assert_geodataframe_equal(result, df, check_geom_type=True)
|
|
1106
1393
|
|
|
1107
1394
|
|
|
1108
|
-
|
|
1395
|
+
@pytest.mark.requires_arrow_write_api
|
|
1396
|
+
def test_write_dataframe_truly_mixed_invalid(tmp_path, use_arrow):
|
|
1109
1397
|
# Shapefile doesn't support generic "Geometry" / "Unknown" type
|
|
1110
1398
|
# for mixed geometries
|
|
1111
1399
|
|
|
@@ -1123,9 +1411,12 @@ def test_write_dataframe_truly_mixed_invalid(tmp_path):
|
|
|
1123
1411
|
msg = (
|
|
1124
1412
|
"Could not add feature to layer at index 1: Attempt to "
|
|
1125
1413
|
r"write non-point \(LINESTRING\) geometry to point shapefile."
|
|
1414
|
+
# DataLayerError when using Arrow
|
|
1415
|
+
"|Error while writing batch to OGR layer: Attempt to "
|
|
1416
|
+
r"write non-point \(LINESTRING\) geometry to point shapefile."
|
|
1126
1417
|
)
|
|
1127
|
-
with pytest.raises(FeatureError, match=msg):
|
|
1128
|
-
write_dataframe(df, tmp_path / "test.shp")
|
|
1418
|
+
with pytest.raises((FeatureError, DataLayerError), match=msg):
|
|
1419
|
+
write_dataframe(df, tmp_path / "test.shp", use_arrow=use_arrow)
|
|
1129
1420
|
|
|
1130
1421
|
|
|
1131
1422
|
@pytest.mark.parametrize("ext", [ext for ext in ALL_EXTS if ext not in ".fgb"])
|
|
@@ -1138,11 +1429,12 @@ def test_write_dataframe_truly_mixed_invalid(tmp_path):
|
|
|
1138
1429
|
[None, None],
|
|
1139
1430
|
],
|
|
1140
1431
|
)
|
|
1141
|
-
|
|
1432
|
+
@pytest.mark.requires_arrow_write_api
|
|
1433
|
+
def test_write_dataframe_infer_geometry_with_nulls(tmp_path, geoms, ext, use_arrow):
|
|
1142
1434
|
filename = tmp_path / f"test{ext}"
|
|
1143
1435
|
|
|
1144
1436
|
df = gp.GeoDataFrame({"col": [1.0, 2.0]}, geometry=geoms, crs="EPSG:4326")
|
|
1145
|
-
write_dataframe(df, filename)
|
|
1437
|
+
write_dataframe(df, filename, use_arrow=use_arrow)
|
|
1146
1438
|
result = read_dataframe(filename)
|
|
1147
1439
|
assert_geodataframe_equal(result, df)
|
|
1148
1440
|
|
|
@@ -1150,16 +1442,19 @@ def test_write_dataframe_infer_geometry_with_nulls(tmp_path, geoms, ext):
|
|
|
1150
1442
|
@pytest.mark.filterwarnings(
|
|
1151
1443
|
"ignore: You will likely lose important projection information"
|
|
1152
1444
|
)
|
|
1153
|
-
|
|
1445
|
+
@pytest.mark.requires_arrow_write_api
|
|
1446
|
+
def test_custom_crs_io(tmp_path, naturalearth_lowres_all_ext, use_arrow):
|
|
1154
1447
|
df = read_dataframe(naturalearth_lowres_all_ext)
|
|
1155
1448
|
# project Belgium to a custom Albers Equal Area projection
|
|
1156
|
-
expected =
|
|
1157
|
-
|
|
1449
|
+
expected = (
|
|
1450
|
+
df.loc[df.name == "Belgium"]
|
|
1451
|
+
.reset_index(drop=True)
|
|
1452
|
+
.to_crs("+proj=aea +lat_1=49.5 +lat_2=51.5 +lon_0=4.3")
|
|
1158
1453
|
)
|
|
1159
|
-
filename =
|
|
1160
|
-
write_dataframe(expected, filename)
|
|
1454
|
+
filename = tmp_path / "test.shp"
|
|
1455
|
+
write_dataframe(expected, filename, use_arrow=use_arrow)
|
|
1161
1456
|
|
|
1162
|
-
assert
|
|
1457
|
+
assert filename.exists()
|
|
1163
1458
|
|
|
1164
1459
|
df = read_dataframe(filename)
|
|
1165
1460
|
|
|
@@ -1171,6 +1466,7 @@ def test_custom_crs_io(tmpdir, naturalearth_lowres_all_ext):
|
|
|
1171
1466
|
|
|
1172
1467
|
|
|
1173
1468
|
def test_write_read_mixed_column_values(tmp_path):
|
|
1469
|
+
# use_arrow=True is tested separately below
|
|
1174
1470
|
mixed_values = ["test", 1.0, 1, datetime.now(), None, np.nan]
|
|
1175
1471
|
geoms = [shapely.Point(0, 0) for _ in mixed_values]
|
|
1176
1472
|
test_gdf = gp.GeoDataFrame(
|
|
@@ -1187,7 +1483,21 @@ def test_write_read_mixed_column_values(tmp_path):
|
|
|
1187
1483
|
assert output_gdf["mixed"][idx] == str(value)
|
|
1188
1484
|
|
|
1189
1485
|
|
|
1190
|
-
|
|
1486
|
+
@requires_arrow_write_api
|
|
1487
|
+
def test_write_read_mixed_column_values_arrow(tmp_path):
|
|
1488
|
+
# Arrow cannot represent a column of mixed types
|
|
1489
|
+
mixed_values = ["test", 1.0, 1, datetime.now(), None, np.nan]
|
|
1490
|
+
geoms = [shapely.Point(0, 0) for _ in mixed_values]
|
|
1491
|
+
test_gdf = gp.GeoDataFrame(
|
|
1492
|
+
{"geometry": geoms, "mixed": mixed_values}, crs="epsg:31370"
|
|
1493
|
+
)
|
|
1494
|
+
output_path = tmp_path / "test_write_mixed_column.gpkg"
|
|
1495
|
+
with pytest.raises(TypeError, match=".*Conversion failed for column"):
|
|
1496
|
+
write_dataframe(test_gdf, output_path, use_arrow=True)
|
|
1497
|
+
|
|
1498
|
+
|
|
1499
|
+
@pytest.mark.requires_arrow_write_api
|
|
1500
|
+
def test_write_read_null(tmp_path, use_arrow):
|
|
1191
1501
|
output_path = tmp_path / "test_write_nan.gpkg"
|
|
1192
1502
|
geom = shapely.Point(0, 0)
|
|
1193
1503
|
test_data = {
|
|
@@ -1196,7 +1506,7 @@ def test_write_read_null(tmp_path):
|
|
|
1196
1506
|
"object_str": ["test", None, np.nan],
|
|
1197
1507
|
}
|
|
1198
1508
|
test_gdf = gp.GeoDataFrame(test_data, crs="epsg:31370")
|
|
1199
|
-
write_dataframe(test_gdf, output_path)
|
|
1509
|
+
write_dataframe(test_gdf, output_path, use_arrow=use_arrow)
|
|
1200
1510
|
result_gdf = read_dataframe(output_path)
|
|
1201
1511
|
assert len(test_gdf) == len(result_gdf)
|
|
1202
1512
|
assert result_gdf["float64"][0] == 1.0
|
|
@@ -1219,7 +1529,7 @@ def test_write_read_null(tmp_path):
|
|
|
1219
1529
|
["2.5D MultiLineString", "MultiLineString Z"],
|
|
1220
1530
|
),
|
|
1221
1531
|
(
|
|
1222
|
-
"MultiPolygon Z (((0 0 0, 0 1 0, 1 1 0, 0 0 0)), ((1 1 1, 1 2 1, 2 2 1, 1 1 1)))",
|
|
1532
|
+
"MultiPolygon Z (((0 0 0, 0 1 0, 1 1 0, 0 0 0)), ((1 1 1, 1 2 1, 2 2 1, 1 1 1)))",
|
|
1223
1533
|
["2.5D MultiPolygon", "MultiPolygon Z"],
|
|
1224
1534
|
),
|
|
1225
1535
|
(
|
|
@@ -1228,11 +1538,12 @@ def test_write_read_null(tmp_path):
|
|
|
1228
1538
|
),
|
|
1229
1539
|
],
|
|
1230
1540
|
)
|
|
1231
|
-
|
|
1541
|
+
@pytest.mark.requires_arrow_write_api
|
|
1542
|
+
def test_write_geometry_z_types(tmp_path, wkt, geom_types, use_arrow):
|
|
1232
1543
|
filename = tmp_path / "test.fgb"
|
|
1233
1544
|
gdf = gp.GeoDataFrame(geometry=from_wkt([wkt]), crs="EPSG:4326")
|
|
1234
1545
|
for geom_type in geom_types:
|
|
1235
|
-
write_dataframe(gdf, filename, geometry_type=geom_type)
|
|
1546
|
+
write_dataframe(gdf, filename, use_arrow=use_arrow, geometry_type=geom_type)
|
|
1236
1547
|
df = read_dataframe(filename)
|
|
1237
1548
|
assert_geodataframe_equal(df, gdf)
|
|
1238
1549
|
|
|
@@ -1261,7 +1572,7 @@ def test_write_geometry_z_types(tmp_path, wkt, geom_types):
|
|
|
1261
1572
|
"MultiPolygon Z",
|
|
1262
1573
|
False,
|
|
1263
1574
|
[
|
|
1264
|
-
"MultiPolygon Z (((0 0 0, 0 1 0, 1 1 0, 0 0 0)), ((1 1 1, 1 2 1, 2 2 1, 1 1 1)))"
|
|
1575
|
+
"MultiPolygon Z (((0 0 0, 0 1 0, 1 1 0, 0 0 0)), ((1 1 1, 1 2 1, 2 2 1, 1 1 1)))"
|
|
1265
1576
|
],
|
|
1266
1577
|
),
|
|
1267
1578
|
(
|
|
@@ -1286,8 +1597,9 @@ def test_write_geometry_z_types(tmp_path, wkt, geom_types):
|
|
|
1286
1597
|
),
|
|
1287
1598
|
],
|
|
1288
1599
|
)
|
|
1600
|
+
@pytest.mark.requires_arrow_write_api
|
|
1289
1601
|
def test_write_geometry_z_types_auto(
|
|
1290
|
-
tmp_path, ext, test_descr, exp_geometry_type, mixed_dimensions, wkt
|
|
1602
|
+
tmp_path, ext, test_descr, exp_geometry_type, mixed_dimensions, wkt, use_arrow
|
|
1291
1603
|
):
|
|
1292
1604
|
# Shapefile has some different behaviour that other file types
|
|
1293
1605
|
if ext == ".shp":
|
|
@@ -1314,10 +1626,10 @@ def test_write_geometry_z_types_auto(
|
|
|
1314
1626
|
DataSourceError,
|
|
1315
1627
|
match=("Mixed 2D and 3D coordinates are not supported by"),
|
|
1316
1628
|
):
|
|
1317
|
-
write_dataframe(gdf, filename)
|
|
1629
|
+
write_dataframe(gdf, filename, use_arrow=use_arrow)
|
|
1318
1630
|
return
|
|
1319
1631
|
else:
|
|
1320
|
-
write_dataframe(gdf, filename)
|
|
1632
|
+
write_dataframe(gdf, filename, use_arrow=use_arrow)
|
|
1321
1633
|
|
|
1322
1634
|
info = read_info(filename)
|
|
1323
1635
|
assert info["geometry_type"] == exp_geometry_type
|
|
@@ -1329,11 +1641,48 @@ def test_write_geometry_z_types_auto(
|
|
|
1329
1641
|
assert_geodataframe_equal(gdf, result_gdf)
|
|
1330
1642
|
|
|
1331
1643
|
|
|
1332
|
-
|
|
1333
|
-
|
|
1644
|
+
@pytest.mark.parametrize(
|
|
1645
|
+
"on_invalid, message",
|
|
1646
|
+
[
|
|
1647
|
+
(
|
|
1648
|
+
"warn",
|
|
1649
|
+
"Invalid WKB: geometry is returned as None. IllegalArgumentException: "
|
|
1650
|
+
"Invalid number of points in LinearRing found 2 - must be 0 or >=",
|
|
1651
|
+
),
|
|
1652
|
+
("raise", "Invalid number of points in LinearRing found 2 - must be 0 or >="),
|
|
1653
|
+
("ignore", None),
|
|
1654
|
+
],
|
|
1655
|
+
)
|
|
1656
|
+
def test_read_invalid_shp(data_dir, use_arrow, on_invalid, message):
|
|
1657
|
+
if on_invalid == "raise":
|
|
1658
|
+
handler = pytest.raises(shapely.errors.GEOSException, match=message)
|
|
1659
|
+
elif on_invalid == "warn":
|
|
1660
|
+
handler = pytest.warns(match=message)
|
|
1661
|
+
elif on_invalid == "ignore":
|
|
1662
|
+
handler = contextlib.nullcontext()
|
|
1663
|
+
else:
|
|
1664
|
+
raise ValueError(f"unknown value for on_invalid: {on_invalid}")
|
|
1665
|
+
|
|
1666
|
+
with handler:
|
|
1667
|
+
df = read_dataframe(
|
|
1668
|
+
data_dir / "poly_not_enough_points.shp.zip",
|
|
1669
|
+
use_arrow=use_arrow,
|
|
1670
|
+
on_invalid=on_invalid,
|
|
1671
|
+
)
|
|
1672
|
+
df.geometry.isnull().all()
|
|
1673
|
+
|
|
1674
|
+
|
|
1675
|
+
def test_read_multisurface(data_dir, use_arrow):
|
|
1676
|
+
if use_arrow:
|
|
1677
|
+
with pytest.raises(shapely.errors.GEOSException):
|
|
1678
|
+
# TODO(Arrow)
|
|
1679
|
+
# shapely fails parsing the WKB
|
|
1680
|
+
read_dataframe(data_dir / "test_multisurface.gpkg", use_arrow=True)
|
|
1681
|
+
else:
|
|
1682
|
+
df = read_dataframe(data_dir / "test_multisurface.gpkg")
|
|
1334
1683
|
|
|
1335
|
-
|
|
1336
|
-
|
|
1684
|
+
# MultiSurface should be converted to MultiPolygon
|
|
1685
|
+
assert df.geometry.type.tolist() == ["MultiPolygon"]
|
|
1337
1686
|
|
|
1338
1687
|
|
|
1339
1688
|
def test_read_dataset_kwargs(data_dir, use_arrow):
|
|
@@ -1372,7 +1721,8 @@ def test_read_invalid_dataset_kwargs(naturalearth_lowres, use_arrow):
|
|
|
1372
1721
|
read_dataframe(naturalearth_lowres, use_arrow=use_arrow, INVALID="YES")
|
|
1373
1722
|
|
|
1374
1723
|
|
|
1375
|
-
|
|
1724
|
+
@pytest.mark.requires_arrow_write_api
|
|
1725
|
+
def test_write_nullable_dtypes(tmp_path, use_arrow):
|
|
1376
1726
|
path = tmp_path / "test_nullable_dtypes.gpkg"
|
|
1377
1727
|
test_data = {
|
|
1378
1728
|
"col1": pd.Series([1, 2, 3], dtype="int64"),
|
|
@@ -1384,7 +1734,7 @@ def test_write_nullable_dtypes(tmp_path):
|
|
|
1384
1734
|
input_gdf = gp.GeoDataFrame(
|
|
1385
1735
|
test_data, geometry=[shapely.Point(0, 0)] * 3, crs="epsg:31370"
|
|
1386
1736
|
)
|
|
1387
|
-
write_dataframe(input_gdf, path)
|
|
1737
|
+
write_dataframe(input_gdf, path, use_arrow=use_arrow)
|
|
1388
1738
|
output_gdf = read_dataframe(path)
|
|
1389
1739
|
# We read it back as default (non-nullable) numpy dtypes, so we cast
|
|
1390
1740
|
# to those for the expected result
|
|
@@ -1393,19 +1743,21 @@ def test_write_nullable_dtypes(tmp_path):
|
|
|
1393
1743
|
expected["col3"] = expected["col3"].astype("float32")
|
|
1394
1744
|
expected["col4"] = expected["col4"].astype("float64")
|
|
1395
1745
|
expected["col5"] = expected["col5"].astype(object)
|
|
1746
|
+
expected.loc[1, "col5"] = None # pandas converts to pd.NA on line above
|
|
1396
1747
|
assert_geodataframe_equal(output_gdf, expected)
|
|
1397
1748
|
|
|
1398
1749
|
|
|
1399
1750
|
@pytest.mark.parametrize(
|
|
1400
1751
|
"metadata_type", ["dataset_metadata", "layer_metadata", "metadata"]
|
|
1401
1752
|
)
|
|
1402
|
-
|
|
1753
|
+
@pytest.mark.requires_arrow_write_api
|
|
1754
|
+
def test_metadata_io(tmp_path, naturalearth_lowres, metadata_type, use_arrow):
|
|
1403
1755
|
metadata = {"level": metadata_type}
|
|
1404
1756
|
|
|
1405
1757
|
df = read_dataframe(naturalearth_lowres)
|
|
1406
1758
|
|
|
1407
|
-
filename =
|
|
1408
|
-
write_dataframe(df, filename, **{metadata_type: metadata})
|
|
1759
|
+
filename = tmp_path / "test.gpkg"
|
|
1760
|
+
write_dataframe(df, filename, use_arrow=use_arrow, **{metadata_type: metadata})
|
|
1409
1761
|
|
|
1410
1762
|
metadata_key = "layer_metadata" if metadata_type == "metadata" else metadata_type
|
|
1411
1763
|
|
|
@@ -1421,22 +1773,27 @@ def test_metadata_io(tmpdir, naturalearth_lowres, metadata_type):
|
|
|
1421
1773
|
{"key": 1},
|
|
1422
1774
|
],
|
|
1423
1775
|
)
|
|
1424
|
-
|
|
1776
|
+
@pytest.mark.requires_arrow_write_api
|
|
1777
|
+
def test_invalid_metadata(
|
|
1778
|
+
tmp_path, naturalearth_lowres, metadata_type, metadata, use_arrow
|
|
1779
|
+
):
|
|
1780
|
+
df = read_dataframe(naturalearth_lowres)
|
|
1425
1781
|
with pytest.raises(ValueError, match="must be a string"):
|
|
1426
|
-
filename = os.path.join(str(tmpdir), "test.gpkg")
|
|
1427
1782
|
write_dataframe(
|
|
1428
|
-
|
|
1783
|
+
df, tmp_path / "test.gpkg", use_arrow=use_arrow, **{metadata_type: metadata}
|
|
1429
1784
|
)
|
|
1430
1785
|
|
|
1431
1786
|
|
|
1432
1787
|
@pytest.mark.parametrize("metadata_type", ["dataset_metadata", "layer_metadata"])
|
|
1433
|
-
|
|
1788
|
+
@pytest.mark.requires_arrow_write_api
|
|
1789
|
+
def test_metadata_unsupported(tmp_path, naturalearth_lowres, metadata_type, use_arrow):
|
|
1434
1790
|
"""metadata is silently ignored"""
|
|
1435
1791
|
|
|
1436
|
-
filename =
|
|
1792
|
+
filename = tmp_path / "test.geojson"
|
|
1437
1793
|
write_dataframe(
|
|
1438
1794
|
read_dataframe(naturalearth_lowres),
|
|
1439
1795
|
filename,
|
|
1796
|
+
use_arrow=use_arrow,
|
|
1440
1797
|
**{metadata_type: {"key": "value"}},
|
|
1441
1798
|
)
|
|
1442
1799
|
|
|
@@ -1466,3 +1823,352 @@ def test_read_dataframe_arrow_dtypes(tmp_path):
|
|
|
1466
1823
|
assert isinstance(result["col"].dtype, pd.ArrowDtype)
|
|
1467
1824
|
result["col"] = result["col"].astype("float64")
|
|
1468
1825
|
assert_geodataframe_equal(result, df)
|
|
1826
|
+
|
|
1827
|
+
|
|
1828
|
+
@requires_pyarrow_api
|
|
1829
|
+
@pytest.mark.skipif(
|
|
1830
|
+
__gdal_version__ < (3, 8, 3), reason="Arrow bool value bug fixed in GDAL >= 3.8.3"
|
|
1831
|
+
)
|
|
1832
|
+
@pytest.mark.parametrize("ext", ALL_EXTS)
|
|
1833
|
+
def test_arrow_bool_roundtrip(tmp_path, ext):
|
|
1834
|
+
filename = tmp_path / f"test{ext}"
|
|
1835
|
+
|
|
1836
|
+
kwargs = {}
|
|
1837
|
+
|
|
1838
|
+
if ext == ".fgb":
|
|
1839
|
+
# For .fgb, spatial_index=False to avoid the rows being reordered
|
|
1840
|
+
kwargs["spatial_index"] = False
|
|
1841
|
+
|
|
1842
|
+
df = gp.GeoDataFrame(
|
|
1843
|
+
{"bool_col": [True, False, True, False, True], "geometry": [Point(0, 0)] * 5},
|
|
1844
|
+
crs="EPSG:4326",
|
|
1845
|
+
)
|
|
1846
|
+
|
|
1847
|
+
write_dataframe(df, filename, **kwargs)
|
|
1848
|
+
result = read_dataframe(filename, use_arrow=True)
|
|
1849
|
+
# Shapefiles do not support bool columns; these are returned as int32
|
|
1850
|
+
assert_geodataframe_equal(result, df, check_dtype=ext != ".shp")
|
|
1851
|
+
|
|
1852
|
+
|
|
1853
|
+
@requires_pyarrow_api
|
|
1854
|
+
@pytest.mark.skipif(
|
|
1855
|
+
__gdal_version__ >= (3, 8, 3), reason="Arrow bool value bug fixed in GDAL >= 3.8.3"
|
|
1856
|
+
)
|
|
1857
|
+
@pytest.mark.parametrize("ext", ALL_EXTS)
|
|
1858
|
+
def test_arrow_bool_exception(tmp_path, ext):
|
|
1859
|
+
filename = tmp_path / f"test{ext}"
|
|
1860
|
+
|
|
1861
|
+
df = gp.GeoDataFrame(
|
|
1862
|
+
{"bool_col": [True, False, True, False, True], "geometry": [Point(0, 0)] * 5},
|
|
1863
|
+
crs="EPSG:4326",
|
|
1864
|
+
)
|
|
1865
|
+
|
|
1866
|
+
write_dataframe(df, filename)
|
|
1867
|
+
|
|
1868
|
+
if ext in {".fgb", ".gpkg"}:
|
|
1869
|
+
# only raise exception for GPKG / FGB
|
|
1870
|
+
with pytest.raises(
|
|
1871
|
+
RuntimeError,
|
|
1872
|
+
match="GDAL < 3.8.3 does not correctly read boolean data values using "
|
|
1873
|
+
"the Arrow API",
|
|
1874
|
+
):
|
|
1875
|
+
read_dataframe(filename, use_arrow=True)
|
|
1876
|
+
|
|
1877
|
+
# do not raise exception if no bool columns are read
|
|
1878
|
+
read_dataframe(filename, use_arrow=True, columns=[])
|
|
1879
|
+
|
|
1880
|
+
else:
|
|
1881
|
+
_ = read_dataframe(filename, use_arrow=True)
|
|
1882
|
+
|
|
1883
|
+
|
|
1884
|
+
@pytest.mark.filterwarnings("ignore:File /vsimem:RuntimeWarning")
|
|
1885
|
+
@pytest.mark.parametrize("driver", ["GeoJSON", "GPKG"])
|
|
1886
|
+
def test_write_memory(naturalearth_lowres, driver):
|
|
1887
|
+
df = read_dataframe(naturalearth_lowres)
|
|
1888
|
+
|
|
1889
|
+
buffer = BytesIO()
|
|
1890
|
+
write_dataframe(df, buffer, driver=driver, layer="test")
|
|
1891
|
+
|
|
1892
|
+
assert len(buffer.getbuffer()) > 0
|
|
1893
|
+
|
|
1894
|
+
actual = read_dataframe(buffer)
|
|
1895
|
+
assert len(actual) == len(df)
|
|
1896
|
+
|
|
1897
|
+
is_json = driver == "GeoJSON"
|
|
1898
|
+
|
|
1899
|
+
assert_geodataframe_equal(
|
|
1900
|
+
actual,
|
|
1901
|
+
df,
|
|
1902
|
+
check_less_precise=is_json,
|
|
1903
|
+
check_index_type=False,
|
|
1904
|
+
check_dtype=not is_json,
|
|
1905
|
+
)
|
|
1906
|
+
|
|
1907
|
+
|
|
1908
|
+
def test_write_memory_driver_required(naturalearth_lowres):
|
|
1909
|
+
df = read_dataframe(naturalearth_lowres)
|
|
1910
|
+
|
|
1911
|
+
buffer = BytesIO()
|
|
1912
|
+
|
|
1913
|
+
with pytest.raises(
|
|
1914
|
+
ValueError,
|
|
1915
|
+
match="driver must be provided to write to in-memory file",
|
|
1916
|
+
):
|
|
1917
|
+
write_dataframe(df.head(1), buffer, driver=None, layer="test")
|
|
1918
|
+
|
|
1919
|
+
|
|
1920
|
+
@pytest.mark.parametrize("driver", ["ESRI Shapefile", "OpenFileGDB"])
|
|
1921
|
+
def test_write_memory_unsupported_driver(naturalearth_lowres, driver):
|
|
1922
|
+
if driver == "OpenFileGDB" and __gdal_version__ < (3, 6, 0):
|
|
1923
|
+
pytest.skip("OpenFileGDB write support only available for GDAL >= 3.6.0")
|
|
1924
|
+
|
|
1925
|
+
df = read_dataframe(naturalearth_lowres)
|
|
1926
|
+
|
|
1927
|
+
buffer = BytesIO()
|
|
1928
|
+
|
|
1929
|
+
with pytest.raises(
|
|
1930
|
+
ValueError, match=f"writing to in-memory file is not supported for {driver}"
|
|
1931
|
+
):
|
|
1932
|
+
write_dataframe(df, buffer, driver=driver, layer="test")
|
|
1933
|
+
|
|
1934
|
+
|
|
1935
|
+
@pytest.mark.parametrize("driver", ["GeoJSON", "GPKG"])
|
|
1936
|
+
def test_write_memory_append_unsupported(naturalearth_lowres, driver):
|
|
1937
|
+
df = read_dataframe(naturalearth_lowres)
|
|
1938
|
+
|
|
1939
|
+
buffer = BytesIO()
|
|
1940
|
+
|
|
1941
|
+
with pytest.raises(
|
|
1942
|
+
NotImplementedError, match="append is not supported for in-memory files"
|
|
1943
|
+
):
|
|
1944
|
+
write_dataframe(df.head(1), buffer, driver=driver, layer="test", append=True)
|
|
1945
|
+
|
|
1946
|
+
|
|
1947
|
+
def test_write_memory_existing_unsupported(naturalearth_lowres):
|
|
1948
|
+
df = read_dataframe(naturalearth_lowres)
|
|
1949
|
+
|
|
1950
|
+
buffer = BytesIO(b"0000")
|
|
1951
|
+
with pytest.raises(
|
|
1952
|
+
NotImplementedError,
|
|
1953
|
+
match="writing to existing in-memory object is not supported",
|
|
1954
|
+
):
|
|
1955
|
+
write_dataframe(df.head(1), buffer, driver="GeoJSON", layer="test")
|
|
1956
|
+
|
|
1957
|
+
|
|
1958
|
+
@pytest.mark.parametrize("ext", ["gpkg", "geojson"])
|
|
1959
|
+
def test_non_utf8_encoding_io(tmp_path, ext, encoded_text):
|
|
1960
|
+
"""Verify that we write non-UTF data to the data source
|
|
1961
|
+
|
|
1962
|
+
IMPORTANT: this may not be valid for the data source and will likely render
|
|
1963
|
+
them unusable in other tools, but should successfully roundtrip unless we
|
|
1964
|
+
disable writing using other encodings.
|
|
1965
|
+
|
|
1966
|
+
NOTE: FlatGeobuff driver cannot handle non-UTF data in GDAL >= 3.9
|
|
1967
|
+
|
|
1968
|
+
NOTE: pyarrow cannot handle non-UTF-8 characters in this way
|
|
1969
|
+
"""
|
|
1970
|
+
|
|
1971
|
+
encoding, text = encoded_text
|
|
1972
|
+
output_path = tmp_path / f"test.{ext}"
|
|
1973
|
+
|
|
1974
|
+
df = gp.GeoDataFrame({text: [text], "geometry": [Point(0, 0)]}, crs="EPSG:4326")
|
|
1975
|
+
write_dataframe(df, output_path, encoding=encoding)
|
|
1976
|
+
|
|
1977
|
+
# cannot open these files without specifying encoding
|
|
1978
|
+
with pytest.raises(UnicodeDecodeError):
|
|
1979
|
+
read_dataframe(output_path)
|
|
1980
|
+
|
|
1981
|
+
# must provide encoding to read these properly
|
|
1982
|
+
actual = read_dataframe(output_path, encoding=encoding)
|
|
1983
|
+
assert actual.columns[0] == text
|
|
1984
|
+
assert actual[text].values[0] == text
|
|
1985
|
+
|
|
1986
|
+
|
|
1987
|
+
@requires_pyarrow_api
|
|
1988
|
+
@pytest.mark.parametrize("ext", ["gpkg", "geojson"])
|
|
1989
|
+
def test_non_utf8_encoding_io_arrow_exception(tmp_path, ext, encoded_text):
|
|
1990
|
+
encoding, text = encoded_text
|
|
1991
|
+
output_path = tmp_path / f"test.{ext}"
|
|
1992
|
+
|
|
1993
|
+
df = gp.GeoDataFrame({text: [text], "geometry": [Point(0, 0)]}, crs="EPSG:4326")
|
|
1994
|
+
write_dataframe(df, output_path, encoding=encoding)
|
|
1995
|
+
|
|
1996
|
+
# cannot open these files without specifying encoding
|
|
1997
|
+
with pytest.raises(UnicodeDecodeError):
|
|
1998
|
+
read_dataframe(output_path)
|
|
1999
|
+
|
|
2000
|
+
with pytest.raises(
|
|
2001
|
+
ValueError, match="non-UTF-8 encoding is not supported for Arrow"
|
|
2002
|
+
):
|
|
2003
|
+
read_dataframe(output_path, encoding=encoding, use_arrow=True)
|
|
2004
|
+
|
|
2005
|
+
|
|
2006
|
+
def test_non_utf8_encoding_io_shapefile(tmp_path, encoded_text, use_arrow):
|
|
2007
|
+
encoding, text = encoded_text
|
|
2008
|
+
|
|
2009
|
+
output_path = tmp_path / "test.shp"
|
|
2010
|
+
|
|
2011
|
+
df = gp.GeoDataFrame({text: [text], "geometry": [Point(0, 0)]}, crs="EPSG:4326")
|
|
2012
|
+
write_dataframe(df, output_path, encoding=encoding)
|
|
2013
|
+
|
|
2014
|
+
# NOTE: GDAL automatically creates a cpg file with the encoding name, which
|
|
2015
|
+
# means that if we read this without specifying the encoding it uses the
|
|
2016
|
+
# correct one
|
|
2017
|
+
actual = read_dataframe(output_path, use_arrow=use_arrow)
|
|
2018
|
+
assert actual.columns[0] == text
|
|
2019
|
+
assert actual[text].values[0] == text
|
|
2020
|
+
|
|
2021
|
+
# verify that if cpg file is not present, that user-provided encoding must be used
|
|
2022
|
+
output_path.with_suffix(".cpg").unlink()
|
|
2023
|
+
|
|
2024
|
+
# We will assume ISO-8859-1, which is wrong
|
|
2025
|
+
miscoded = text.encode(encoding).decode("ISO-8859-1")
|
|
2026
|
+
|
|
2027
|
+
if use_arrow:
|
|
2028
|
+
# pyarrow cannot decode column name with incorrect encoding
|
|
2029
|
+
with pytest.raises(UnicodeDecodeError):
|
|
2030
|
+
read_dataframe(output_path, use_arrow=True)
|
|
2031
|
+
else:
|
|
2032
|
+
bad = read_dataframe(output_path, use_arrow=False)
|
|
2033
|
+
assert bad.columns[0] == miscoded
|
|
2034
|
+
assert bad[miscoded].values[0] == miscoded
|
|
2035
|
+
|
|
2036
|
+
# If encoding is provided, that should yield correct text
|
|
2037
|
+
actual = read_dataframe(output_path, encoding=encoding, use_arrow=use_arrow)
|
|
2038
|
+
assert actual.columns[0] == text
|
|
2039
|
+
assert actual[text].values[0] == text
|
|
2040
|
+
|
|
2041
|
+
# if ENCODING open option, that should yield correct text
|
|
2042
|
+
actual = read_dataframe(output_path, use_arrow=use_arrow, ENCODING=encoding)
|
|
2043
|
+
assert actual.columns[0] == text
|
|
2044
|
+
assert actual[text].values[0] == text
|
|
2045
|
+
|
|
2046
|
+
|
|
2047
|
+
def test_encoding_read_option_collision_shapefile(naturalearth_lowres, use_arrow):
|
|
2048
|
+
"""Providing both encoding parameter and ENCODING open option (even if blank) is not allowed"""
|
|
2049
|
+
|
|
2050
|
+
with pytest.raises(
|
|
2051
|
+
ValueError, match='cannot provide both encoding parameter and "ENCODING" option'
|
|
2052
|
+
):
|
|
2053
|
+
read_dataframe(
|
|
2054
|
+
naturalearth_lowres, encoding="CP936", ENCODING="", use_arrow=use_arrow
|
|
2055
|
+
)
|
|
2056
|
+
|
|
2057
|
+
|
|
2058
|
+
def test_encoding_write_layer_option_collision_shapefile(tmp_path, encoded_text):
|
|
2059
|
+
"""Providing both encoding parameter and ENCODING layer creation option (even if blank) is not allowed"""
|
|
2060
|
+
encoding, text = encoded_text
|
|
2061
|
+
|
|
2062
|
+
output_path = tmp_path / "test.shp"
|
|
2063
|
+
df = gp.GeoDataFrame({text: [text], "geometry": [Point(0, 0)]}, crs="EPSG:4326")
|
|
2064
|
+
|
|
2065
|
+
with pytest.raises(
|
|
2066
|
+
ValueError,
|
|
2067
|
+
match='cannot provide both encoding parameter and "ENCODING" layer creation option',
|
|
2068
|
+
):
|
|
2069
|
+
write_dataframe(
|
|
2070
|
+
df, output_path, encoding=encoding, layer_options={"ENCODING": ""}
|
|
2071
|
+
)
|
|
2072
|
+
|
|
2073
|
+
|
|
2074
|
+
def test_non_utf8_encoding_shapefile_sql(tmp_path, use_arrow):
|
|
2075
|
+
encoding = "CP936"
|
|
2076
|
+
|
|
2077
|
+
output_path = tmp_path / "test.shp"
|
|
2078
|
+
|
|
2079
|
+
mandarin = "中文"
|
|
2080
|
+
df = gp.GeoDataFrame(
|
|
2081
|
+
{mandarin: mandarin, "geometry": [Point(0, 0)]}, crs="EPSG:4326"
|
|
2082
|
+
)
|
|
2083
|
+
write_dataframe(df, output_path, encoding=encoding)
|
|
2084
|
+
|
|
2085
|
+
actual = read_dataframe(
|
|
2086
|
+
output_path,
|
|
2087
|
+
sql=f"select * from test where \"{mandarin}\" = '{mandarin}'",
|
|
2088
|
+
use_arrow=use_arrow,
|
|
2089
|
+
)
|
|
2090
|
+
assert actual.columns[0] == mandarin
|
|
2091
|
+
assert actual[mandarin].values[0] == mandarin
|
|
2092
|
+
|
|
2093
|
+
actual = read_dataframe(
|
|
2094
|
+
output_path,
|
|
2095
|
+
sql=f"select * from test where \"{mandarin}\" = '{mandarin}'",
|
|
2096
|
+
encoding=encoding,
|
|
2097
|
+
use_arrow=use_arrow,
|
|
2098
|
+
)
|
|
2099
|
+
assert actual.columns[0] == mandarin
|
|
2100
|
+
assert actual[mandarin].values[0] == mandarin
|
|
2101
|
+
|
|
2102
|
+
|
|
2103
|
+
@pytest.mark.requires_arrow_write_api
|
|
2104
|
+
def test_write_kml_file_coordinate_order(tmp_path, use_arrow):
|
|
2105
|
+
# confirm KML coordinates are written in lon, lat order even if CRS axis specifies otherwise
|
|
2106
|
+
points = [Point(10, 20), Point(30, 40), Point(50, 60)]
|
|
2107
|
+
gdf = gp.GeoDataFrame(geometry=points, crs="EPSG:4326")
|
|
2108
|
+
output_path = tmp_path / "test.kml"
|
|
2109
|
+
write_dataframe(
|
|
2110
|
+
gdf, output_path, layer="tmp_layer", driver="KML", use_arrow=use_arrow
|
|
2111
|
+
)
|
|
2112
|
+
|
|
2113
|
+
gdf_in = read_dataframe(output_path, use_arrow=use_arrow)
|
|
2114
|
+
|
|
2115
|
+
assert np.array_equal(gdf_in.geometry.values, points)
|
|
2116
|
+
|
|
2117
|
+
if "LIBKML" in list_drivers():
|
|
2118
|
+
# test appending to the existing file only if LIBKML is available
|
|
2119
|
+
# as it appears to fall back on LIBKML driver when appending.
|
|
2120
|
+
points_append = [Point(70, 80), Point(90, 100), Point(110, 120)]
|
|
2121
|
+
gdf_append = gp.GeoDataFrame(geometry=points_append, crs="EPSG:4326")
|
|
2122
|
+
|
|
2123
|
+
write_dataframe(
|
|
2124
|
+
gdf_append,
|
|
2125
|
+
output_path,
|
|
2126
|
+
layer="tmp_layer",
|
|
2127
|
+
driver="KML",
|
|
2128
|
+
use_arrow=use_arrow,
|
|
2129
|
+
append=True,
|
|
2130
|
+
)
|
|
2131
|
+
# force_2d used to only compare xy geometry as z-dimension is undesirably
|
|
2132
|
+
# introduced when the kml file is over-written.
|
|
2133
|
+
gdf_in_appended = read_dataframe(
|
|
2134
|
+
output_path, use_arrow=use_arrow, force_2d=True
|
|
2135
|
+
)
|
|
2136
|
+
|
|
2137
|
+
assert np.array_equal(gdf_in_appended.geometry.values, points + points_append)
|
|
2138
|
+
|
|
2139
|
+
|
|
2140
|
+
@pytest.mark.requires_arrow_write_api
|
|
2141
|
+
def test_write_geojson_rfc7946_coordinates(tmp_path, use_arrow):
|
|
2142
|
+
points = [Point(10, 20), Point(30, 40), Point(50, 60)]
|
|
2143
|
+
gdf = gp.GeoDataFrame(geometry=points, crs="EPSG:4326")
|
|
2144
|
+
output_path = tmp_path / "test.geojson"
|
|
2145
|
+
write_dataframe(
|
|
2146
|
+
gdf,
|
|
2147
|
+
output_path,
|
|
2148
|
+
layer="tmp_layer",
|
|
2149
|
+
driver="GeoJSON",
|
|
2150
|
+
RFC7946=True,
|
|
2151
|
+
use_arrow=use_arrow,
|
|
2152
|
+
)
|
|
2153
|
+
|
|
2154
|
+
gdf_in = read_dataframe(output_path, use_arrow=use_arrow)
|
|
2155
|
+
|
|
2156
|
+
assert np.array_equal(gdf_in.geometry.values, points)
|
|
2157
|
+
|
|
2158
|
+
# test appending to the existing file
|
|
2159
|
+
|
|
2160
|
+
points_append = [Point(70, 80), Point(90, 100), Point(110, 120)]
|
|
2161
|
+
gdf_append = gp.GeoDataFrame(geometry=points_append, crs="EPSG:4326")
|
|
2162
|
+
|
|
2163
|
+
write_dataframe(
|
|
2164
|
+
gdf_append,
|
|
2165
|
+
output_path,
|
|
2166
|
+
layer="tmp_layer",
|
|
2167
|
+
driver="GeoJSON",
|
|
2168
|
+
RFC7946=True,
|
|
2169
|
+
use_arrow=use_arrow,
|
|
2170
|
+
append=True,
|
|
2171
|
+
)
|
|
2172
|
+
|
|
2173
|
+
gdf_in_appended = read_dataframe(output_path, use_arrow=use_arrow)
|
|
2174
|
+
assert np.array_equal(gdf_in_appended.geometry.values, points + points_append)
|