pyogrio 0.7.1__cp310-cp310-manylinux_2_28_aarch64.whl → 0.8.0__cp310-cp310-manylinux_2_28_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyogrio might be problematic. Click here for more details.
- pyogrio/__init__.py +4 -0
- pyogrio/_compat.py +7 -1
- pyogrio/_err.cpython-310-aarch64-linux-gnu.so +0 -0
- pyogrio/_err.pyx +7 -3
- pyogrio/_geometry.cpython-310-aarch64-linux-gnu.so +0 -0
- pyogrio/_io.cpython-310-aarch64-linux-gnu.so +0 -0
- pyogrio/_io.pyx +900 -242
- pyogrio/_ogr.cpython-310-aarch64-linux-gnu.so +0 -0
- pyogrio/_ogr.pxd +65 -12
- pyogrio/_ogr.pyx +8 -24
- pyogrio/_version.py +3 -3
- pyogrio/_vsi.cpython-310-aarch64-linux-gnu.so +0 -0
- pyogrio/_vsi.pxd +4 -0
- pyogrio/_vsi.pyx +140 -0
- pyogrio/core.py +43 -44
- pyogrio/gdal_data/GDAL-targets-release.cmake +3 -3
- pyogrio/gdal_data/GDAL-targets.cmake +10 -6
- pyogrio/gdal_data/GDALConfigVersion.cmake +3 -3
- pyogrio/gdal_data/gdalinfo_output.schema.json +2 -0
- pyogrio/gdal_data/gdalvrt.xsd +163 -0
- pyogrio/gdal_data/ogrinfo_output.schema.json +12 -1
- pyogrio/gdal_data/vcpkg.spdx.json +25 -25
- pyogrio/gdal_data/vcpkg_abi_info.txt +27 -26
- pyogrio/geopandas.py +131 -30
- pyogrio/proj_data/ITRF2008 +2 -2
- pyogrio/proj_data/proj-config-version.cmake +2 -2
- pyogrio/proj_data/proj-config.cmake +2 -1
- pyogrio/proj_data/proj-targets.cmake +13 -13
- pyogrio/proj_data/proj.db +0 -0
- pyogrio/proj_data/proj4-targets.cmake +13 -13
- pyogrio/proj_data/vcpkg.spdx.json +20 -42
- pyogrio/proj_data/vcpkg_abi_info.txt +14 -15
- pyogrio/raw.py +438 -116
- pyogrio/tests/conftest.py +75 -6
- pyogrio/tests/test_arrow.py +841 -7
- pyogrio/tests/test_core.py +99 -7
- pyogrio/tests/test_geopandas_io.py +744 -119
- pyogrio/tests/test_path.py +22 -3
- pyogrio/tests/test_raw_io.py +276 -50
- pyogrio/util.py +41 -19
- {pyogrio-0.7.1.dist-info → pyogrio-0.8.0.dist-info}/METADATA +3 -2
- {pyogrio-0.7.1.dist-info → pyogrio-0.8.0.dist-info}/RECORD +211 -209
- {pyogrio-0.7.1.dist-info → pyogrio-0.8.0.dist-info}/WHEEL +1 -1
- pyogrio.libs/{libgdal-d9f9f680.so.33.3.7.2 → libgdal-b2fb2022.so.34.3.8.5} +0 -0
- pyogrio/tests/win32.py +0 -86
- {pyogrio-0.7.1.dist-info → pyogrio-0.8.0.dist-info}/LICENSE +0 -0
- {pyogrio-0.7.1.dist-info → pyogrio-0.8.0.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import contextlib
|
|
2
2
|
from datetime import datetime
|
|
3
|
-
import
|
|
3
|
+
from io import BytesIO
|
|
4
|
+
import locale
|
|
5
|
+
|
|
4
6
|
import numpy as np
|
|
5
7
|
import pytest
|
|
6
8
|
|
|
@@ -14,9 +16,11 @@ from pyogrio.raw import (
|
|
|
14
16
|
from pyogrio.tests.conftest import (
|
|
15
17
|
ALL_EXTS,
|
|
16
18
|
DRIVERS,
|
|
17
|
-
|
|
19
|
+
requires_pyarrow_api,
|
|
20
|
+
requires_arrow_write_api,
|
|
18
21
|
requires_gdal_geos,
|
|
19
22
|
)
|
|
23
|
+
from pyogrio._compat import PANDAS_GE_15, HAS_ARROW_WRITE_API
|
|
20
24
|
|
|
21
25
|
try:
|
|
22
26
|
import pandas as pd
|
|
@@ -44,13 +48,30 @@ pytest.importorskip("geopandas")
|
|
|
44
48
|
scope="session",
|
|
45
49
|
params=[
|
|
46
50
|
False,
|
|
47
|
-
pytest.param(True, marks=
|
|
51
|
+
pytest.param(True, marks=requires_pyarrow_api),
|
|
48
52
|
],
|
|
49
53
|
)
|
|
50
54
|
def use_arrow(request):
|
|
51
55
|
return request.param
|
|
52
56
|
|
|
53
57
|
|
|
58
|
+
@pytest.fixture(autouse=True)
|
|
59
|
+
def skip_if_no_arrow_write_api(request):
|
|
60
|
+
# automatically skip tests with use_arrow=True and that require Arrow write
|
|
61
|
+
# API (marked with `@pytest.mark.requires_arrow_write_api`) if it is not available
|
|
62
|
+
use_arrow = (
|
|
63
|
+
request.getfixturevalue("use_arrow")
|
|
64
|
+
if "use_arrow" in request.fixturenames
|
|
65
|
+
else False
|
|
66
|
+
)
|
|
67
|
+
if (
|
|
68
|
+
use_arrow
|
|
69
|
+
and not HAS_ARROW_WRITE_API
|
|
70
|
+
and request.node.get_closest_marker("requires_arrow_write_api")
|
|
71
|
+
):
|
|
72
|
+
pytest.skip("GDAL>=3.8 required for Arrow write API")
|
|
73
|
+
|
|
74
|
+
|
|
54
75
|
def spatialite_available(path):
|
|
55
76
|
try:
|
|
56
77
|
_ = read_dataframe(
|
|
@@ -61,6 +82,45 @@ def spatialite_available(path):
|
|
|
61
82
|
return False
|
|
62
83
|
|
|
63
84
|
|
|
85
|
+
@pytest.mark.parametrize("encoding", ["utf-8", "cp1252", None])
|
|
86
|
+
def test_read_csv_encoding(tmp_path, encoding):
|
|
87
|
+
# Write csv test file. Depending on the os this will be written in a different
|
|
88
|
+
# encoding: for linux and macos this is utf-8, for windows it is cp1252.
|
|
89
|
+
csv_path = tmp_path / "test.csv"
|
|
90
|
+
with open(csv_path, "w", encoding=encoding) as csv:
|
|
91
|
+
csv.write("näme,city\n")
|
|
92
|
+
csv.write("Wilhelm Röntgen,Zürich\n")
|
|
93
|
+
|
|
94
|
+
# Read csv. The data should be read with the same default encoding as the csv file
|
|
95
|
+
# was written in, but should have been converted to utf-8 in the dataframe returned.
|
|
96
|
+
# Hence, the asserts below, with strings in utf-8, be OK.
|
|
97
|
+
df = read_dataframe(csv_path, encoding=encoding)
|
|
98
|
+
|
|
99
|
+
assert len(df) == 1
|
|
100
|
+
assert df.columns.tolist() == ["näme", "city"]
|
|
101
|
+
assert df.city.tolist() == ["Zürich"]
|
|
102
|
+
assert df.näme.tolist() == ["Wilhelm Röntgen"]
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
@pytest.mark.skipif(
|
|
106
|
+
locale.getpreferredencoding().upper() == "UTF-8",
|
|
107
|
+
reason="test requires non-UTF-8 default platform",
|
|
108
|
+
)
|
|
109
|
+
def test_read_csv_platform_encoding(tmp_path):
|
|
110
|
+
"""verify that read defaults to platform encoding; only works on Windows (CP1252)"""
|
|
111
|
+
csv_path = tmp_path / "test.csv"
|
|
112
|
+
with open(csv_path, "w", encoding=locale.getpreferredencoding()) as csv:
|
|
113
|
+
csv.write("näme,city\n")
|
|
114
|
+
csv.write("Wilhelm Röntgen,Zürich\n")
|
|
115
|
+
|
|
116
|
+
df = read_dataframe(csv_path)
|
|
117
|
+
|
|
118
|
+
assert len(df) == 1
|
|
119
|
+
assert df.columns.tolist() == ["näme", "city"]
|
|
120
|
+
assert df.city.tolist() == ["Zürich"]
|
|
121
|
+
assert df.näme.tolist() == ["Wilhelm Röntgen"]
|
|
122
|
+
|
|
123
|
+
|
|
64
124
|
def test_read_dataframe(naturalearth_lowres_all_ext):
|
|
65
125
|
df = read_dataframe(naturalearth_lowres_all_ext)
|
|
66
126
|
|
|
@@ -76,8 +136,8 @@ def test_read_dataframe(naturalearth_lowres_all_ext):
|
|
|
76
136
|
]
|
|
77
137
|
|
|
78
138
|
|
|
79
|
-
def test_read_dataframe_vsi(naturalearth_lowres_vsi):
|
|
80
|
-
df = read_dataframe(naturalearth_lowres_vsi[1])
|
|
139
|
+
def test_read_dataframe_vsi(naturalearth_lowres_vsi, use_arrow):
|
|
140
|
+
df = read_dataframe(naturalearth_lowres_vsi[1], use_arrow=use_arrow)
|
|
81
141
|
assert len(df) == 177
|
|
82
142
|
|
|
83
143
|
|
|
@@ -153,6 +213,7 @@ def test_read_force_2d(test_fgdb_vsi, use_arrow):
|
|
|
153
213
|
|
|
154
214
|
|
|
155
215
|
@pytest.mark.filterwarnings("ignore: Measured")
|
|
216
|
+
@pytest.mark.filterwarnings("ignore: More than one layer found in")
|
|
156
217
|
def test_read_layer(test_fgdb_vsi, use_arrow):
|
|
157
218
|
layers = list_layers(test_fgdb_vsi)
|
|
158
219
|
kwargs = {"use_arrow": use_arrow, "read_geometry": False, "max_features": 1}
|
|
@@ -185,8 +246,13 @@ def test_read_datetime(test_fgdb_vsi, use_arrow):
|
|
|
185
246
|
assert df.SURVEY_DAT.dtype.name == "datetime64[ns]"
|
|
186
247
|
|
|
187
248
|
|
|
188
|
-
|
|
249
|
+
@pytest.mark.filterwarnings("ignore: Non-conformant content for record 1 in column ")
|
|
250
|
+
@pytest.mark.requires_arrow_write_api
|
|
251
|
+
def test_read_datetime_tz(test_datetime_tz, tmp_path, use_arrow):
|
|
189
252
|
df = read_dataframe(test_datetime_tz)
|
|
253
|
+
# Make the index non-consecutive to test this case as well. Added for issue
|
|
254
|
+
# https://github.com/geopandas/pyogrio/issues/324
|
|
255
|
+
df = df.set_index(np.array([0, 2]))
|
|
190
256
|
raw_expected = ["2020-01-01T09:00:00.123-05:00", "2020-01-01T10:00:00-05:00"]
|
|
191
257
|
|
|
192
258
|
if PANDAS_GE_20:
|
|
@@ -194,15 +260,22 @@ def test_read_datetime_tz(test_datetime_tz, tmp_path):
|
|
|
194
260
|
else:
|
|
195
261
|
expected = pd.to_datetime(raw_expected)
|
|
196
262
|
expected = pd.Series(expected, name="datetime_col")
|
|
197
|
-
assert_series_equal(df.datetime_col, expected)
|
|
263
|
+
assert_series_equal(df.datetime_col, expected, check_index=False)
|
|
198
264
|
# test write and read round trips
|
|
199
265
|
fpath = tmp_path / "test.gpkg"
|
|
200
|
-
write_dataframe(df, fpath)
|
|
201
|
-
df_read = read_dataframe(fpath)
|
|
266
|
+
write_dataframe(df, fpath, use_arrow=use_arrow)
|
|
267
|
+
df_read = read_dataframe(fpath, use_arrow=use_arrow)
|
|
268
|
+
if use_arrow:
|
|
269
|
+
# with Arrow, the datetimes are always read as UTC
|
|
270
|
+
expected = expected.dt.tz_convert("UTC")
|
|
202
271
|
assert_series_equal(df_read.datetime_col, expected)
|
|
203
272
|
|
|
204
273
|
|
|
205
|
-
|
|
274
|
+
@pytest.mark.filterwarnings(
|
|
275
|
+
"ignore: Non-conformant content for record 1 in column dates"
|
|
276
|
+
)
|
|
277
|
+
@pytest.mark.requires_arrow_write_api
|
|
278
|
+
def test_write_datetime_mixed_offset(tmp_path, use_arrow):
|
|
206
279
|
# Australian Summer Time AEDT (GMT+11), Standard Time AEST (GMT+10)
|
|
207
280
|
dates = ["2023-01-01 11:00:01.111", "2023-06-01 10:00:01.111"]
|
|
208
281
|
naive_col = pd.Series(pd.to_datetime(dates), name="dates")
|
|
@@ -216,14 +289,18 @@ def test_write_datetime_mixed_offset(tmp_path):
|
|
|
216
289
|
crs="EPSG:4326",
|
|
217
290
|
)
|
|
218
291
|
fpath = tmp_path / "test.gpkg"
|
|
219
|
-
write_dataframe(df, fpath)
|
|
220
|
-
result = read_dataframe(fpath)
|
|
292
|
+
write_dataframe(df, fpath, use_arrow=use_arrow)
|
|
293
|
+
result = read_dataframe(fpath, use_arrow=use_arrow)
|
|
221
294
|
# GDAL tz only encodes offsets, not timezones
|
|
222
295
|
# check multiple offsets are read as utc datetime instead of string values
|
|
223
296
|
assert_series_equal(result["dates"], utc_col)
|
|
224
297
|
|
|
225
298
|
|
|
226
|
-
|
|
299
|
+
@pytest.mark.filterwarnings(
|
|
300
|
+
"ignore: Non-conformant content for record 1 in column dates"
|
|
301
|
+
)
|
|
302
|
+
@pytest.mark.requires_arrow_write_api
|
|
303
|
+
def test_read_write_datetime_tz_with_nulls(tmp_path, use_arrow):
|
|
227
304
|
dates_raw = ["2020-01-01T09:00:00.123-05:00", "2020-01-01T10:00:00-05:00", pd.NaT]
|
|
228
305
|
if PANDAS_GE_20:
|
|
229
306
|
dates = pd.to_datetime(dates_raw, format="ISO8601").as_unit("ms")
|
|
@@ -234,13 +311,18 @@ def test_read_write_datetime_tz_with_nulls(tmp_path):
|
|
|
234
311
|
crs="EPSG:4326",
|
|
235
312
|
)
|
|
236
313
|
fpath = tmp_path / "test.gpkg"
|
|
237
|
-
write_dataframe(df, fpath)
|
|
238
|
-
result = read_dataframe(fpath)
|
|
314
|
+
write_dataframe(df, fpath, use_arrow=use_arrow)
|
|
315
|
+
result = read_dataframe(fpath, use_arrow=use_arrow)
|
|
316
|
+
if use_arrow:
|
|
317
|
+
# with Arrow, the datetimes are always read as UTC
|
|
318
|
+
df["dates"] = df["dates"].dt.tz_convert("UTC")
|
|
239
319
|
assert_geodataframe_equal(df, result)
|
|
240
320
|
|
|
241
321
|
|
|
242
322
|
def test_read_null_values(test_fgdb_vsi, use_arrow):
|
|
243
|
-
df = read_dataframe(
|
|
323
|
+
df = read_dataframe(
|
|
324
|
+
test_fgdb_vsi, layer="basetable_2", use_arrow=use_arrow, read_geometry=False
|
|
325
|
+
)
|
|
244
326
|
|
|
245
327
|
# make sure that Null values are preserved
|
|
246
328
|
assert df.SEGMENT_NAME.isnull().max()
|
|
@@ -330,6 +412,21 @@ def test_read_where_invalid(request, naturalearth_lowres_all_ext, use_arrow):
|
|
|
330
412
|
)
|
|
331
413
|
|
|
332
414
|
|
|
415
|
+
def test_read_where_ignored_field(naturalearth_lowres, use_arrow):
|
|
416
|
+
# column included in where is not also included in list of columns, which means
|
|
417
|
+
# GDAL will return no features
|
|
418
|
+
# NOTE: this behavior is inconsistent across drivers so only shapefiles are
|
|
419
|
+
# tested for this
|
|
420
|
+
df = read_dataframe(
|
|
421
|
+
naturalearth_lowres,
|
|
422
|
+
where=""" "iso_a3" = 'CAN' """,
|
|
423
|
+
columns=["name"],
|
|
424
|
+
use_arrow=use_arrow,
|
|
425
|
+
)
|
|
426
|
+
|
|
427
|
+
assert len(df) == 0
|
|
428
|
+
|
|
429
|
+
|
|
333
430
|
@pytest.mark.parametrize("bbox", [(1,), (1, 2), (1, 2, 3)])
|
|
334
431
|
def test_read_bbox_invalid(naturalearth_lowres_all_ext, bbox, use_arrow):
|
|
335
432
|
with pytest.raises(ValueError, match="Invalid bbox"):
|
|
@@ -348,7 +445,7 @@ def test_read_bbox(naturalearth_lowres_all_ext, use_arrow, bbox, expected):
|
|
|
348
445
|
if (
|
|
349
446
|
use_arrow
|
|
350
447
|
and __gdal_version__ < (3, 8, 0)
|
|
351
|
-
and
|
|
448
|
+
and naturalearth_lowres_all_ext.suffix == ".gpkg"
|
|
352
449
|
):
|
|
353
450
|
pytest.xfail(reason="GDAL bug: https://github.com/OSGeo/gdal/issues/8347")
|
|
354
451
|
|
|
@@ -437,7 +534,7 @@ def test_read_mask(
|
|
|
437
534
|
if (
|
|
438
535
|
use_arrow
|
|
439
536
|
and __gdal_version__ < (3, 8, 0)
|
|
440
|
-
and
|
|
537
|
+
and naturalearth_lowres_all_ext.suffix == ".gpkg"
|
|
441
538
|
):
|
|
442
539
|
pytest.xfail(reason="GDAL bug: https://github.com/OSGeo/gdal/issues/8347")
|
|
443
540
|
|
|
@@ -469,14 +566,45 @@ def test_read_mask_where(naturalearth_lowres_all_ext, use_arrow):
|
|
|
469
566
|
assert np.array_equal(df.iso_a3, ["CAN"])
|
|
470
567
|
|
|
471
568
|
|
|
472
|
-
|
|
569
|
+
@pytest.mark.parametrize("fids", [[1, 5, 10], np.array([1, 5, 10], dtype=np.int64)])
|
|
570
|
+
def test_read_fids(naturalearth_lowres_all_ext, fids, use_arrow):
|
|
473
571
|
# ensure keyword is properly passed through
|
|
474
|
-
|
|
475
|
-
|
|
572
|
+
df = read_dataframe(
|
|
573
|
+
naturalearth_lowres_all_ext, fids=fids, fid_as_index=True, use_arrow=use_arrow
|
|
574
|
+
)
|
|
476
575
|
assert len(df) == 3
|
|
477
576
|
assert np.array_equal(fids, df.index.values)
|
|
478
577
|
|
|
479
578
|
|
|
579
|
+
@requires_pyarrow_api
|
|
580
|
+
def test_read_fids_arrow_max_exception(naturalearth_lowres):
|
|
581
|
+
# Maximum number at time of writing is 4997 for "OGRSQL". For e.g. for SQLite based
|
|
582
|
+
# formats like Geopackage, there is no limit.
|
|
583
|
+
nb_fids = 4998
|
|
584
|
+
fids = range(nb_fids)
|
|
585
|
+
with pytest.raises(ValueError, match=f"error applying filter for {nb_fids} fids"):
|
|
586
|
+
_ = read_dataframe(naturalearth_lowres, fids=fids, use_arrow=True)
|
|
587
|
+
|
|
588
|
+
|
|
589
|
+
@requires_pyarrow_api
|
|
590
|
+
@pytest.mark.skipif(
|
|
591
|
+
__gdal_version__ >= (3, 8, 0), reason="GDAL >= 3.8.0 does not need to warn"
|
|
592
|
+
)
|
|
593
|
+
def test_read_fids_arrow_warning_old_gdal(naturalearth_lowres_all_ext):
|
|
594
|
+
# A warning should be given for old GDAL versions, except for some file formats.
|
|
595
|
+
if naturalearth_lowres_all_ext.suffix not in [".gpkg", ".geojson"]:
|
|
596
|
+
handler = pytest.warns(
|
|
597
|
+
UserWarning,
|
|
598
|
+
match="Using 'fids' and 'use_arrow=True' with GDAL < 3.8 can be slow",
|
|
599
|
+
)
|
|
600
|
+
else:
|
|
601
|
+
handler = contextlib.nullcontext()
|
|
602
|
+
|
|
603
|
+
with handler:
|
|
604
|
+
df = read_dataframe(naturalearth_lowres_all_ext, fids=[22], use_arrow=True)
|
|
605
|
+
assert len(df) == 1
|
|
606
|
+
|
|
607
|
+
|
|
480
608
|
def test_read_fids_force_2d(test_fgdb_vsi):
|
|
481
609
|
with pytest.warns(
|
|
482
610
|
UserWarning, match=r"Measured \(M\) geometry types are not supported"
|
|
@@ -572,13 +700,17 @@ def test_read_sql(naturalearth_lowres_all_ext, use_arrow):
|
|
|
572
700
|
# The geometry column cannot be specified when using the
|
|
573
701
|
# default OGRSQL dialect but is returned nonetheless, so 4 columns.
|
|
574
702
|
sql = "SELECT iso_a3 AS iso_a3_renamed, name, pop_est FROM naturalearth_lowres"
|
|
575
|
-
df = read_dataframe(
|
|
703
|
+
df = read_dataframe(
|
|
704
|
+
naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL", use_arrow=use_arrow
|
|
705
|
+
)
|
|
576
706
|
assert len(df.columns) == 4
|
|
577
707
|
assert len(df) == 177
|
|
578
708
|
|
|
579
709
|
# Should return single row
|
|
580
710
|
sql = "SELECT * FROM naturalearth_lowres WHERE iso_a3 = 'CAN'"
|
|
581
|
-
df = read_dataframe(
|
|
711
|
+
df = read_dataframe(
|
|
712
|
+
naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL", use_arrow=use_arrow
|
|
713
|
+
)
|
|
582
714
|
assert len(df) == 1
|
|
583
715
|
assert len(df.columns) == 6
|
|
584
716
|
assert df.iloc[0].iso_a3 == "CAN"
|
|
@@ -586,7 +718,9 @@ def test_read_sql(naturalearth_lowres_all_ext, use_arrow):
|
|
|
586
718
|
sql = """SELECT *
|
|
587
719
|
FROM naturalearth_lowres
|
|
588
720
|
WHERE iso_a3 IN ('CAN', 'USA', 'MEX')"""
|
|
589
|
-
df = read_dataframe(
|
|
721
|
+
df = read_dataframe(
|
|
722
|
+
naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL", use_arrow=use_arrow
|
|
723
|
+
)
|
|
590
724
|
assert len(df.columns) == 6
|
|
591
725
|
assert len(df) == 3
|
|
592
726
|
assert df.iso_a3.tolist() == ["CAN", "USA", "MEX"]
|
|
@@ -595,7 +729,9 @@ def test_read_sql(naturalearth_lowres_all_ext, use_arrow):
|
|
|
595
729
|
FROM naturalearth_lowres
|
|
596
730
|
WHERE iso_a3 IN ('CAN', 'USA', 'MEX')
|
|
597
731
|
ORDER BY name"""
|
|
598
|
-
df = read_dataframe(
|
|
732
|
+
df = read_dataframe(
|
|
733
|
+
naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL", use_arrow=use_arrow
|
|
734
|
+
)
|
|
599
735
|
assert len(df.columns) == 6
|
|
600
736
|
assert len(df) == 3
|
|
601
737
|
assert df.iso_a3.tolist() == ["CAN", "MEX", "USA"]
|
|
@@ -604,7 +740,9 @@ def test_read_sql(naturalearth_lowres_all_ext, use_arrow):
|
|
|
604
740
|
sql = """SELECT *
|
|
605
741
|
FROM naturalearth_lowres
|
|
606
742
|
WHERE POP_EST >= 10000000 AND POP_EST < 100000000"""
|
|
607
|
-
df = read_dataframe(
|
|
743
|
+
df = read_dataframe(
|
|
744
|
+
naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL", use_arrow=use_arrow
|
|
745
|
+
)
|
|
608
746
|
assert len(df) == 75
|
|
609
747
|
assert len(df.columns) == 6
|
|
610
748
|
assert df.pop_est.min() >= 10000000
|
|
@@ -612,25 +750,36 @@ def test_read_sql(naturalearth_lowres_all_ext, use_arrow):
|
|
|
612
750
|
|
|
613
751
|
# Should match no items.
|
|
614
752
|
sql = "SELECT * FROM naturalearth_lowres WHERE ISO_A3 = 'INVALID'"
|
|
615
|
-
df = read_dataframe(
|
|
753
|
+
df = read_dataframe(
|
|
754
|
+
naturalearth_lowres_all_ext, sql=sql, sql_dialect="OGRSQL", use_arrow=use_arrow
|
|
755
|
+
)
|
|
616
756
|
assert len(df) == 0
|
|
617
757
|
|
|
618
758
|
|
|
619
|
-
def test_read_sql_invalid(naturalearth_lowres_all_ext):
|
|
759
|
+
def test_read_sql_invalid(naturalearth_lowres_all_ext, use_arrow):
|
|
620
760
|
if naturalearth_lowres_all_ext.suffix == ".gpkg":
|
|
621
761
|
with pytest.raises(Exception, match="In ExecuteSQL().*"):
|
|
622
|
-
read_dataframe(
|
|
762
|
+
read_dataframe(
|
|
763
|
+
naturalearth_lowres_all_ext, sql="invalid", use_arrow=use_arrow
|
|
764
|
+
)
|
|
623
765
|
else:
|
|
624
766
|
with pytest.raises(Exception, match="SQL Expression Parsing Error"):
|
|
625
|
-
read_dataframe(
|
|
767
|
+
read_dataframe(
|
|
768
|
+
naturalearth_lowres_all_ext, sql="invalid", use_arrow=use_arrow
|
|
769
|
+
)
|
|
626
770
|
|
|
627
771
|
with pytest.raises(
|
|
628
772
|
ValueError, match="'sql' paramater cannot be combined with 'layer'"
|
|
629
773
|
):
|
|
630
|
-
read_dataframe(
|
|
774
|
+
read_dataframe(
|
|
775
|
+
naturalearth_lowres_all_ext,
|
|
776
|
+
sql="whatever",
|
|
777
|
+
layer="invalid",
|
|
778
|
+
use_arrow=use_arrow,
|
|
779
|
+
)
|
|
631
780
|
|
|
632
781
|
|
|
633
|
-
def test_read_sql_columns_where(naturalearth_lowres_all_ext):
|
|
782
|
+
def test_read_sql_columns_where(naturalearth_lowres_all_ext, use_arrow):
|
|
634
783
|
sql = "SELECT iso_a3 AS iso_a3_renamed, name, pop_est FROM naturalearth_lowres"
|
|
635
784
|
df = read_dataframe(
|
|
636
785
|
naturalearth_lowres_all_ext,
|
|
@@ -638,13 +787,14 @@ def test_read_sql_columns_where(naturalearth_lowres_all_ext):
|
|
|
638
787
|
sql_dialect="OGRSQL",
|
|
639
788
|
columns=["iso_a3_renamed", "name"],
|
|
640
789
|
where="iso_a3_renamed IN ('CAN', 'USA', 'MEX')",
|
|
790
|
+
use_arrow=use_arrow,
|
|
641
791
|
)
|
|
642
792
|
assert len(df.columns) == 3
|
|
643
793
|
assert len(df) == 3
|
|
644
794
|
assert df.iso_a3_renamed.tolist() == ["CAN", "USA", "MEX"]
|
|
645
795
|
|
|
646
796
|
|
|
647
|
-
def test_read_sql_columns_where_bbox(naturalearth_lowres_all_ext):
|
|
797
|
+
def test_read_sql_columns_where_bbox(naturalearth_lowres_all_ext, use_arrow):
|
|
648
798
|
sql = "SELECT iso_a3 AS iso_a3_renamed, name, pop_est FROM naturalearth_lowres"
|
|
649
799
|
df = read_dataframe(
|
|
650
800
|
naturalearth_lowres_all_ext,
|
|
@@ -653,13 +803,14 @@ def test_read_sql_columns_where_bbox(naturalearth_lowres_all_ext):
|
|
|
653
803
|
columns=["iso_a3_renamed", "name"],
|
|
654
804
|
where="iso_a3_renamed IN ('CRI', 'PAN')",
|
|
655
805
|
bbox=(-85, 8, -80, 10),
|
|
806
|
+
use_arrow=use_arrow,
|
|
656
807
|
)
|
|
657
808
|
assert len(df.columns) == 3
|
|
658
809
|
assert len(df) == 2
|
|
659
810
|
assert df.iso_a3_renamed.tolist() == ["PAN", "CRI"]
|
|
660
811
|
|
|
661
812
|
|
|
662
|
-
def test_read_sql_skip_max(naturalearth_lowres_all_ext):
|
|
813
|
+
def test_read_sql_skip_max(naturalearth_lowres_all_ext, use_arrow):
|
|
663
814
|
sql = """SELECT *
|
|
664
815
|
FROM naturalearth_lowres
|
|
665
816
|
WHERE iso_a3 IN ('CAN', 'MEX', 'USA')
|
|
@@ -670,6 +821,7 @@ def test_read_sql_skip_max(naturalearth_lowres_all_ext):
|
|
|
670
821
|
skip_features=1,
|
|
671
822
|
max_features=1,
|
|
672
823
|
sql_dialect="OGRSQL",
|
|
824
|
+
use_arrow=use_arrow,
|
|
673
825
|
)
|
|
674
826
|
assert len(df.columns) == 6
|
|
675
827
|
assert len(df) == 1
|
|
@@ -677,13 +829,21 @@ def test_read_sql_skip_max(naturalearth_lowres_all_ext):
|
|
|
677
829
|
|
|
678
830
|
sql = "SELECT * FROM naturalearth_lowres LIMIT 1"
|
|
679
831
|
df = read_dataframe(
|
|
680
|
-
naturalearth_lowres_all_ext,
|
|
832
|
+
naturalearth_lowres_all_ext,
|
|
833
|
+
sql=sql,
|
|
834
|
+
max_features=3,
|
|
835
|
+
sql_dialect="OGRSQL",
|
|
836
|
+
use_arrow=use_arrow,
|
|
681
837
|
)
|
|
682
838
|
assert len(df) == 1
|
|
683
839
|
|
|
684
840
|
sql = "SELECT * FROM naturalearth_lowres LIMIT 1"
|
|
685
841
|
df = read_dataframe(
|
|
686
|
-
naturalearth_lowres_all_ext,
|
|
842
|
+
naturalearth_lowres_all_ext,
|
|
843
|
+
sql=sql,
|
|
844
|
+
sql_dialect="OGRSQL",
|
|
845
|
+
skip_features=1,
|
|
846
|
+
use_arrow=use_arrow,
|
|
687
847
|
)
|
|
688
848
|
assert len(df) == 0
|
|
689
849
|
|
|
@@ -694,10 +854,12 @@ def test_read_sql_skip_max(naturalearth_lowres_all_ext):
|
|
|
694
854
|
[ext for ext in ALL_EXTS if ext != ".gpkg"],
|
|
695
855
|
indirect=["naturalearth_lowres"],
|
|
696
856
|
)
|
|
697
|
-
def test_read_sql_dialect_sqlite_nogpkg(naturalearth_lowres):
|
|
857
|
+
def test_read_sql_dialect_sqlite_nogpkg(naturalearth_lowres, use_arrow):
|
|
698
858
|
# Should return singular item
|
|
699
859
|
sql = "SELECT * FROM naturalearth_lowres WHERE iso_a3 = 'CAN'"
|
|
700
|
-
df = read_dataframe(
|
|
860
|
+
df = read_dataframe(
|
|
861
|
+
naturalearth_lowres, sql=sql, sql_dialect="SQLITE", use_arrow=use_arrow
|
|
862
|
+
)
|
|
701
863
|
assert len(df) == 1
|
|
702
864
|
assert len(df.columns) == 6
|
|
703
865
|
assert df.iloc[0].iso_a3 == "CAN"
|
|
@@ -707,7 +869,9 @@ def test_read_sql_dialect_sqlite_nogpkg(naturalearth_lowres):
|
|
|
707
869
|
sql = """SELECT ST_Buffer(geometry, 5) AS geometry, name, pop_est, iso_a3
|
|
708
870
|
FROM naturalearth_lowres
|
|
709
871
|
WHERE ISO_A3 = 'CAN'"""
|
|
710
|
-
df = read_dataframe(
|
|
872
|
+
df = read_dataframe(
|
|
873
|
+
naturalearth_lowres, sql=sql, sql_dialect="SQLITE", use_arrow=use_arrow
|
|
874
|
+
)
|
|
711
875
|
assert len(df) == 1
|
|
712
876
|
assert len(df.columns) == 4
|
|
713
877
|
assert df.iloc[0].geometry.area > area_canada
|
|
@@ -717,12 +881,14 @@ def test_read_sql_dialect_sqlite_nogpkg(naturalearth_lowres):
|
|
|
717
881
|
@pytest.mark.parametrize(
|
|
718
882
|
"naturalearth_lowres", [".gpkg"], indirect=["naturalearth_lowres"]
|
|
719
883
|
)
|
|
720
|
-
def test_read_sql_dialect_sqlite_gpkg(naturalearth_lowres):
|
|
884
|
+
def test_read_sql_dialect_sqlite_gpkg(naturalearth_lowres, use_arrow):
|
|
721
885
|
# "INDIRECT_SQL" prohibits GDAL from passing the SQL statement to sqlite.
|
|
722
886
|
# Because the statement is processed within GDAL it is possible to use
|
|
723
887
|
# spatialite functions even if sqlite isn't built with spatialite support.
|
|
724
888
|
sql = "SELECT * FROM naturalearth_lowres WHERE iso_a3 = 'CAN'"
|
|
725
|
-
df = read_dataframe(
|
|
889
|
+
df = read_dataframe(
|
|
890
|
+
naturalearth_lowres, sql=sql, sql_dialect="INDIRECT_SQLITE", use_arrow=use_arrow
|
|
891
|
+
)
|
|
726
892
|
assert len(df) == 1
|
|
727
893
|
assert len(df.columns) == 6
|
|
728
894
|
assert df.iloc[0].iso_a3 == "CAN"
|
|
@@ -732,29 +898,67 @@ def test_read_sql_dialect_sqlite_gpkg(naturalearth_lowres):
|
|
|
732
898
|
sql = """SELECT ST_Buffer(geom, 5) AS geometry, name, pop_est, iso_a3
|
|
733
899
|
FROM naturalearth_lowres
|
|
734
900
|
WHERE ISO_A3 = 'CAN'"""
|
|
735
|
-
df = read_dataframe(
|
|
901
|
+
df = read_dataframe(
|
|
902
|
+
naturalearth_lowres, sql=sql, sql_dialect="INDIRECT_SQLITE", use_arrow=use_arrow
|
|
903
|
+
)
|
|
736
904
|
assert len(df) == 1
|
|
737
905
|
assert len(df.columns) == 4
|
|
738
906
|
assert df.iloc[0].geometry.area > area_canada
|
|
739
907
|
|
|
740
908
|
|
|
909
|
+
@pytest.mark.parametrize("encoding", ["utf-8", "cp1252", None])
|
|
910
|
+
def test_write_csv_encoding(tmp_path, encoding):
|
|
911
|
+
"""Test if write_dataframe uses the default encoding correctly."""
|
|
912
|
+
# Write csv test file. Depending on the os this will be written in a different
|
|
913
|
+
# encoding: for linux and macos this is utf-8, for windows it is cp1252.
|
|
914
|
+
csv_path = tmp_path / "test.csv"
|
|
915
|
+
|
|
916
|
+
with open(csv_path, "w", encoding=encoding) as csv:
|
|
917
|
+
csv.write("näme,city\n")
|
|
918
|
+
csv.write("Wilhelm Röntgen,Zürich\n")
|
|
919
|
+
|
|
920
|
+
# Write csv test file with the same data using write_dataframe. It should use the
|
|
921
|
+
# same encoding as above.
|
|
922
|
+
df = pd.DataFrame({"näme": ["Wilhelm Röntgen"], "city": ["Zürich"]})
|
|
923
|
+
csv_pyogrio_path = tmp_path / "test_pyogrio.csv"
|
|
924
|
+
write_dataframe(df, csv_pyogrio_path, encoding=encoding)
|
|
925
|
+
|
|
926
|
+
# Check if the text files written both ways can be read again and give same result.
|
|
927
|
+
with open(csv_path, "r", encoding=encoding) as csv:
|
|
928
|
+
csv_str = csv.read()
|
|
929
|
+
with open(csv_pyogrio_path, "r", encoding=encoding) as csv_pyogrio:
|
|
930
|
+
csv_pyogrio_str = csv_pyogrio.read()
|
|
931
|
+
assert csv_str == csv_pyogrio_str
|
|
932
|
+
|
|
933
|
+
# Check if they files are binary identical, to be 100% sure they were written with
|
|
934
|
+
# the same encoding.
|
|
935
|
+
with open(csv_path, "rb") as csv:
|
|
936
|
+
csv_bytes = csv.read()
|
|
937
|
+
with open(csv_pyogrio_path, "rb") as csv_pyogrio:
|
|
938
|
+
csv_pyogrio_bytes = csv_pyogrio.read()
|
|
939
|
+
assert csv_bytes == csv_pyogrio_bytes
|
|
940
|
+
|
|
941
|
+
|
|
741
942
|
@pytest.mark.parametrize("ext", ALL_EXTS)
|
|
742
|
-
|
|
943
|
+
@pytest.mark.requires_arrow_write_api
|
|
944
|
+
def test_write_dataframe(tmp_path, naturalearth_lowres, ext, use_arrow):
|
|
743
945
|
input_gdf = read_dataframe(naturalearth_lowres)
|
|
744
946
|
output_path = tmp_path / f"test{ext}"
|
|
745
947
|
|
|
746
948
|
if ext == ".fgb":
|
|
747
949
|
# For .fgb, spatial_index=False to avoid the rows being reordered
|
|
748
|
-
write_dataframe(
|
|
950
|
+
write_dataframe(
|
|
951
|
+
input_gdf, output_path, use_arrow=use_arrow, spatial_index=False
|
|
952
|
+
)
|
|
749
953
|
else:
|
|
750
|
-
write_dataframe(input_gdf, output_path)
|
|
954
|
+
write_dataframe(input_gdf, output_path, use_arrow=use_arrow)
|
|
751
955
|
|
|
752
956
|
assert output_path.exists()
|
|
753
957
|
result_gdf = read_dataframe(output_path)
|
|
754
958
|
|
|
755
959
|
geometry_types = result_gdf.geometry.type.unique()
|
|
756
960
|
if DRIVERS[ext] in DRIVERS_NO_MIXED_SINGLE_MULTI:
|
|
757
|
-
assert geometry_types == ["MultiPolygon"]
|
|
961
|
+
assert list(geometry_types) == ["MultiPolygon"]
|
|
758
962
|
else:
|
|
759
963
|
assert set(geometry_types) == set(["MultiPolygon", "Polygon"])
|
|
760
964
|
|
|
@@ -775,14 +979,21 @@ def test_write_dataframe(tmp_path, naturalearth_lowres, ext):
|
|
|
775
979
|
|
|
776
980
|
|
|
777
981
|
@pytest.mark.filterwarnings("ignore:.*No SRS set on layer.*")
|
|
982
|
+
@pytest.mark.parametrize("write_geodf", [True, False])
|
|
778
983
|
@pytest.mark.parametrize("ext", [ext for ext in ALL_EXTS + [".xlsx"] if ext != ".fgb"])
|
|
779
|
-
|
|
780
|
-
|
|
984
|
+
@pytest.mark.requires_arrow_write_api
|
|
985
|
+
def test_write_dataframe_no_geom(
|
|
986
|
+
request, tmp_path, naturalearth_lowres, write_geodf, ext, use_arrow
|
|
987
|
+
):
|
|
988
|
+
"""Test writing a (geo)dataframe without a geometry column.
|
|
781
989
|
|
|
782
990
|
FlatGeobuf (.fgb) doesn't seem to support this, and just writes an empty file.
|
|
783
991
|
"""
|
|
784
992
|
# Prepare test data
|
|
785
993
|
input_df = read_dataframe(naturalearth_lowres, read_geometry=False)
|
|
994
|
+
if write_geodf:
|
|
995
|
+
input_df = gp.GeoDataFrame(input_df)
|
|
996
|
+
|
|
786
997
|
output_path = tmp_path / f"test{ext}"
|
|
787
998
|
|
|
788
999
|
# A shapefile without geometry column results in only a .dbf file.
|
|
@@ -792,7 +1003,7 @@ def test_write_dataframe_no_geom(tmp_path, naturalearth_lowres, ext):
|
|
|
792
1003
|
# Determine driver
|
|
793
1004
|
driver = DRIVERS[ext] if ext != ".xlsx" else "XLSX"
|
|
794
1005
|
|
|
795
|
-
write_dataframe(input_df, output_path, driver=driver)
|
|
1006
|
+
write_dataframe(input_df, output_path, use_arrow=use_arrow, driver=driver)
|
|
796
1007
|
|
|
797
1008
|
assert output_path.exists()
|
|
798
1009
|
result_df = read_dataframe(output_path)
|
|
@@ -805,6 +1016,9 @@ def test_write_dataframe_no_geom(tmp_path, naturalearth_lowres, ext):
|
|
|
805
1016
|
if ext in [".gpkg", ".shp", ".xlsx"]:
|
|
806
1017
|
# These file types return a DataFrame when read.
|
|
807
1018
|
assert not isinstance(result_df, gp.GeoDataFrame)
|
|
1019
|
+
if isinstance(input_df, gp.GeoDataFrame):
|
|
1020
|
+
input_df = pd.DataFrame(input_df)
|
|
1021
|
+
|
|
808
1022
|
pd.testing.assert_frame_equal(
|
|
809
1023
|
result_df, input_df, check_index_type=False, check_dtype=check_dtype
|
|
810
1024
|
)
|
|
@@ -821,12 +1035,27 @@ def test_write_dataframe_no_geom(tmp_path, naturalearth_lowres, ext):
|
|
|
821
1035
|
)
|
|
822
1036
|
|
|
823
1037
|
|
|
1038
|
+
@pytest.mark.requires_arrow_write_api
|
|
1039
|
+
def test_write_dataframe_index(tmp_path, naturalearth_lowres, use_arrow):
|
|
1040
|
+
# dataframe writing ignores the index
|
|
1041
|
+
input_gdf = read_dataframe(naturalearth_lowres)
|
|
1042
|
+
input_gdf = input_gdf.set_index("iso_a3")
|
|
1043
|
+
|
|
1044
|
+
output_path = tmp_path / "test.shp"
|
|
1045
|
+
write_dataframe(input_gdf, output_path, use_arrow=use_arrow)
|
|
1046
|
+
|
|
1047
|
+
result_gdf = read_dataframe(output_path)
|
|
1048
|
+
assert isinstance(result_gdf.index, pd.RangeIndex)
|
|
1049
|
+
assert_geodataframe_equal(result_gdf, input_gdf.reset_index(drop=True))
|
|
1050
|
+
|
|
1051
|
+
|
|
824
1052
|
@pytest.mark.parametrize("ext", [ext for ext in ALL_EXTS if ext not in ".geojsonl"])
|
|
825
|
-
|
|
1053
|
+
@pytest.mark.requires_arrow_write_api
|
|
1054
|
+
def test_write_empty_dataframe(tmp_path, ext, use_arrow):
|
|
826
1055
|
expected = gp.GeoDataFrame(geometry=[], crs=4326)
|
|
827
1056
|
|
|
828
1057
|
filename = tmp_path / f"test{ext}"
|
|
829
|
-
write_dataframe(expected, filename)
|
|
1058
|
+
write_dataframe(expected, filename, use_arrow=use_arrow)
|
|
830
1059
|
|
|
831
1060
|
assert filename.exists()
|
|
832
1061
|
df = read_dataframe(filename)
|
|
@@ -834,83 +1063,119 @@ def test_write_empty_dataframe(tmp_path, ext):
|
|
|
834
1063
|
|
|
835
1064
|
|
|
836
1065
|
@pytest.mark.parametrize("ext", [".geojsonl", ".geojsons"])
|
|
837
|
-
|
|
1066
|
+
@pytest.mark.requires_arrow_write_api
|
|
1067
|
+
def test_write_read_empty_dataframe_unsupported(tmp_path, ext, use_arrow):
|
|
838
1068
|
# Writing empty dataframe to .geojsons or .geojsonl results logically in a 0 byte
|
|
839
1069
|
# file, but gdal isn't able to read those again at the time of writing.
|
|
840
1070
|
# Issue logged here: https://github.com/geopandas/pyogrio/issues/94
|
|
841
1071
|
expected = gp.GeoDataFrame(geometry=[], crs=4326)
|
|
842
1072
|
|
|
843
1073
|
filename = tmp_path / f"test{ext}"
|
|
844
|
-
write_dataframe(expected, filename)
|
|
1074
|
+
write_dataframe(expected, filename, use_arrow=use_arrow)
|
|
845
1075
|
|
|
846
1076
|
assert filename.exists()
|
|
847
1077
|
with pytest.raises(
|
|
848
|
-
Exception, match=".* not recognized as a supported file format."
|
|
1078
|
+
Exception, match=".* not recognized as( being in)? a supported file format."
|
|
849
1079
|
):
|
|
850
|
-
_ = read_dataframe(filename)
|
|
1080
|
+
_ = read_dataframe(filename, use_arrow=use_arrow)
|
|
851
1081
|
|
|
852
1082
|
|
|
853
|
-
|
|
1083
|
+
@pytest.mark.requires_arrow_write_api
|
|
1084
|
+
def test_write_dataframe_gpkg_multiple_layers(tmp_path, naturalearth_lowres, use_arrow):
|
|
854
1085
|
input_gdf = read_dataframe(naturalearth_lowres)
|
|
855
|
-
|
|
1086
|
+
filename = tmp_path / "test.gpkg"
|
|
856
1087
|
|
|
857
|
-
write_dataframe(
|
|
1088
|
+
write_dataframe(
|
|
1089
|
+
input_gdf,
|
|
1090
|
+
filename,
|
|
1091
|
+
layer="first",
|
|
1092
|
+
promote_to_multi=True,
|
|
1093
|
+
use_arrow=use_arrow,
|
|
1094
|
+
)
|
|
858
1095
|
|
|
859
|
-
assert
|
|
860
|
-
assert np.array_equal(list_layers(
|
|
1096
|
+
assert filename.exists()
|
|
1097
|
+
assert np.array_equal(list_layers(filename), [["first", "MultiPolygon"]])
|
|
861
1098
|
|
|
862
|
-
write_dataframe(
|
|
1099
|
+
write_dataframe(
|
|
1100
|
+
input_gdf,
|
|
1101
|
+
filename,
|
|
1102
|
+
layer="second",
|
|
1103
|
+
promote_to_multi=True,
|
|
1104
|
+
use_arrow=use_arrow,
|
|
1105
|
+
)
|
|
863
1106
|
assert np.array_equal(
|
|
864
|
-
list_layers(
|
|
1107
|
+
list_layers(filename),
|
|
865
1108
|
[["first", "MultiPolygon"], ["second", "MultiPolygon"]],
|
|
866
1109
|
)
|
|
867
1110
|
|
|
868
1111
|
|
|
869
1112
|
@pytest.mark.parametrize("ext", ALL_EXTS)
|
|
870
|
-
|
|
1113
|
+
@pytest.mark.requires_arrow_write_api
|
|
1114
|
+
def test_write_dataframe_append(request, tmp_path, naturalearth_lowres, ext, use_arrow):
|
|
871
1115
|
if ext == ".fgb" and __gdal_version__ <= (3, 5, 0):
|
|
872
1116
|
pytest.skip("Append to FlatGeobuf fails for GDAL <= 3.5.0")
|
|
873
1117
|
|
|
874
1118
|
if ext in (".geojsonl", ".geojsons") and __gdal_version__ <= (3, 6, 0):
|
|
875
1119
|
pytest.skip("Append to GeoJSONSeq only available for GDAL >= 3.6.0")
|
|
876
1120
|
|
|
1121
|
+
if use_arrow and ext.startswith(".geojson"):
|
|
1122
|
+
# Bug in GDAL when appending int64 to GeoJSON
|
|
1123
|
+
# (https://github.com/OSGeo/gdal/issues/9792)
|
|
1124
|
+
request.node.add_marker(
|
|
1125
|
+
pytest.mark.xfail(reason="Bugs with append when writing Arrow to GeoJSON")
|
|
1126
|
+
)
|
|
1127
|
+
|
|
877
1128
|
input_gdf = read_dataframe(naturalearth_lowres)
|
|
878
|
-
|
|
1129
|
+
filename = tmp_path / f"test{ext}"
|
|
879
1130
|
|
|
880
|
-
write_dataframe(input_gdf,
|
|
1131
|
+
write_dataframe(input_gdf, filename, use_arrow=use_arrow)
|
|
881
1132
|
|
|
882
|
-
|
|
883
|
-
assert len(read_dataframe(
|
|
1133
|
+
filename.exists()
|
|
1134
|
+
assert len(read_dataframe(filename)) == 177
|
|
884
1135
|
|
|
885
|
-
write_dataframe(input_gdf,
|
|
886
|
-
assert len(read_dataframe(
|
|
1136
|
+
write_dataframe(input_gdf, filename, use_arrow=use_arrow, append=True)
|
|
1137
|
+
assert len(read_dataframe(filename)) == 354
|
|
887
1138
|
|
|
888
1139
|
|
|
889
1140
|
@pytest.mark.parametrize("spatial_index", [False, True])
|
|
890
|
-
|
|
1141
|
+
@pytest.mark.requires_arrow_write_api
|
|
1142
|
+
def test_write_dataframe_gdal_options(
|
|
1143
|
+
tmp_path, naturalearth_lowres, spatial_index, use_arrow
|
|
1144
|
+
):
|
|
891
1145
|
df = read_dataframe(naturalearth_lowres)
|
|
892
1146
|
|
|
893
1147
|
outfilename1 = tmp_path / "test1.shp"
|
|
894
|
-
write_dataframe(
|
|
1148
|
+
write_dataframe(
|
|
1149
|
+
df,
|
|
1150
|
+
outfilename1,
|
|
1151
|
+
use_arrow=use_arrow,
|
|
1152
|
+
SPATIAL_INDEX="YES" if spatial_index else "NO",
|
|
1153
|
+
)
|
|
895
1154
|
assert outfilename1.exists() is True
|
|
896
1155
|
index_filename1 = tmp_path / "test1.qix"
|
|
897
1156
|
assert index_filename1.exists() is spatial_index
|
|
898
1157
|
|
|
899
1158
|
# using explicit layer_options instead
|
|
900
1159
|
outfilename2 = tmp_path / "test2.shp"
|
|
901
|
-
write_dataframe(
|
|
1160
|
+
write_dataframe(
|
|
1161
|
+
df,
|
|
1162
|
+
outfilename2,
|
|
1163
|
+
use_arrow=use_arrow,
|
|
1164
|
+
layer_options=dict(spatial_index=spatial_index),
|
|
1165
|
+
)
|
|
902
1166
|
assert outfilename2.exists() is True
|
|
903
1167
|
index_filename2 = tmp_path / "test2.qix"
|
|
904
1168
|
assert index_filename2.exists() is spatial_index
|
|
905
1169
|
|
|
906
1170
|
|
|
907
|
-
|
|
1171
|
+
@pytest.mark.requires_arrow_write_api
|
|
1172
|
+
def test_write_dataframe_gdal_options_unknown(tmp_path, naturalearth_lowres, use_arrow):
|
|
908
1173
|
df = read_dataframe(naturalearth_lowres)
|
|
909
1174
|
|
|
910
1175
|
# geojson has no spatial index, so passing keyword should raise
|
|
911
1176
|
outfilename = tmp_path / "test.geojson"
|
|
912
1177
|
with pytest.raises(ValueError, match="unrecognized option 'SPATIAL_INDEX'"):
|
|
913
|
-
write_dataframe(df, outfilename, spatial_index=True)
|
|
1178
|
+
write_dataframe(df, outfilename, use_arrow=use_arrow, spatial_index=True)
|
|
914
1179
|
|
|
915
1180
|
|
|
916
1181
|
def _get_gpkg_table_names(path):
|
|
@@ -923,21 +1188,25 @@ def _get_gpkg_table_names(path):
|
|
|
923
1188
|
return [res[0] for res in result]
|
|
924
1189
|
|
|
925
1190
|
|
|
926
|
-
|
|
1191
|
+
@pytest.mark.requires_arrow_write_api
|
|
1192
|
+
def test_write_dataframe_gdal_options_dataset(tmp_path, naturalearth_lowres, use_arrow):
|
|
927
1193
|
df = read_dataframe(naturalearth_lowres)
|
|
928
1194
|
|
|
929
1195
|
test_default_filename = tmp_path / "test_default.gpkg"
|
|
930
|
-
write_dataframe(df, test_default_filename)
|
|
1196
|
+
write_dataframe(df, test_default_filename, use_arrow=use_arrow)
|
|
931
1197
|
assert "gpkg_ogr_contents" in _get_gpkg_table_names(test_default_filename)
|
|
932
1198
|
|
|
933
1199
|
test_no_contents_filename = tmp_path / "test_no_contents.gpkg"
|
|
934
|
-
write_dataframe(
|
|
1200
|
+
write_dataframe(
|
|
1201
|
+
df, test_default_filename, use_arrow=use_arrow, ADD_GPKG_OGR_CONTENTS="NO"
|
|
1202
|
+
)
|
|
935
1203
|
assert "gpkg_ogr_contents" not in _get_gpkg_table_names(test_no_contents_filename)
|
|
936
1204
|
|
|
937
1205
|
test_no_contents_filename2 = tmp_path / "test_no_contents2.gpkg"
|
|
938
1206
|
write_dataframe(
|
|
939
1207
|
df,
|
|
940
1208
|
test_no_contents_filename2,
|
|
1209
|
+
use_arrow=use_arrow,
|
|
941
1210
|
dataset_options=dict(add_gpkg_ogr_contents=False),
|
|
942
1211
|
)
|
|
943
1212
|
assert "gpkg_ogr_contents" not in _get_gpkg_table_names(test_no_contents_filename2)
|
|
@@ -954,6 +1223,7 @@ def test_write_dataframe_gdal_options_dataset(tmp_path, naturalearth_lowres):
|
|
|
954
1223
|
(".geojson", False, ["MultiPolygon", "Polygon"], "Unknown"),
|
|
955
1224
|
],
|
|
956
1225
|
)
|
|
1226
|
+
@pytest.mark.requires_arrow_write_api
|
|
957
1227
|
def test_write_dataframe_promote_to_multi(
|
|
958
1228
|
tmp_path,
|
|
959
1229
|
naturalearth_lowres,
|
|
@@ -961,11 +1231,14 @@ def test_write_dataframe_promote_to_multi(
|
|
|
961
1231
|
promote_to_multi,
|
|
962
1232
|
expected_geometry_types,
|
|
963
1233
|
expected_geometry_type,
|
|
1234
|
+
use_arrow,
|
|
964
1235
|
):
|
|
965
1236
|
input_gdf = read_dataframe(naturalearth_lowres)
|
|
966
1237
|
|
|
967
1238
|
output_path = tmp_path / f"test_promote{ext}"
|
|
968
|
-
write_dataframe(
|
|
1239
|
+
write_dataframe(
|
|
1240
|
+
input_gdf, output_path, use_arrow=use_arrow, promote_to_multi=promote_to_multi
|
|
1241
|
+
)
|
|
969
1242
|
|
|
970
1243
|
assert output_path.exists()
|
|
971
1244
|
output_gdf = read_dataframe(output_path)
|
|
@@ -998,6 +1271,7 @@ def test_write_dataframe_promote_to_multi(
|
|
|
998
1271
|
(".shp", True, "Unknown", ["MultiPolygon", "Polygon"], "Polygon"),
|
|
999
1272
|
],
|
|
1000
1273
|
)
|
|
1274
|
+
@pytest.mark.requires_arrow_write_api
|
|
1001
1275
|
def test_write_dataframe_promote_to_multi_layer_geom_type(
|
|
1002
1276
|
tmp_path,
|
|
1003
1277
|
naturalearth_lowres,
|
|
@@ -1006,6 +1280,7 @@ def test_write_dataframe_promote_to_multi_layer_geom_type(
|
|
|
1006
1280
|
geometry_type,
|
|
1007
1281
|
expected_geometry_types,
|
|
1008
1282
|
expected_geometry_type,
|
|
1283
|
+
use_arrow,
|
|
1009
1284
|
):
|
|
1010
1285
|
input_gdf = read_dataframe(naturalearth_lowres)
|
|
1011
1286
|
|
|
@@ -1022,6 +1297,7 @@ def test_write_dataframe_promote_to_multi_layer_geom_type(
|
|
|
1022
1297
|
write_dataframe(
|
|
1023
1298
|
input_gdf,
|
|
1024
1299
|
output_path,
|
|
1300
|
+
use_arrow=use_arrow,
|
|
1025
1301
|
promote_to_multi=promote_to_multi,
|
|
1026
1302
|
geometry_type=geometry_type,
|
|
1027
1303
|
)
|
|
@@ -1040,9 +1316,15 @@ def test_write_dataframe_promote_to_multi_layer_geom_type(
|
|
|
1040
1316
|
(".fgb", False, "Polygon", "Mismatched geometry type"),
|
|
1041
1317
|
(".fgb", None, "Point", "Mismatched geometry type"),
|
|
1042
1318
|
(".fgb", None, "Polygon", "Mismatched geometry type"),
|
|
1043
|
-
(
|
|
1319
|
+
(
|
|
1320
|
+
".shp",
|
|
1321
|
+
None,
|
|
1322
|
+
"Point",
|
|
1323
|
+
"Could not add feature to layer at index|Error while writing batch to OGR layer",
|
|
1324
|
+
),
|
|
1044
1325
|
],
|
|
1045
1326
|
)
|
|
1327
|
+
@pytest.mark.requires_arrow_write_api
|
|
1046
1328
|
def test_write_dataframe_promote_to_multi_layer_geom_type_invalid(
|
|
1047
1329
|
tmp_path,
|
|
1048
1330
|
naturalearth_lowres,
|
|
@@ -1050,31 +1332,37 @@ def test_write_dataframe_promote_to_multi_layer_geom_type_invalid(
|
|
|
1050
1332
|
promote_to_multi,
|
|
1051
1333
|
geometry_type,
|
|
1052
1334
|
expected_raises_match,
|
|
1335
|
+
use_arrow,
|
|
1053
1336
|
):
|
|
1054
1337
|
input_gdf = read_dataframe(naturalearth_lowres)
|
|
1055
1338
|
|
|
1056
1339
|
output_path = tmp_path / f"test{ext}"
|
|
1057
|
-
with pytest.raises(FeatureError, match=expected_raises_match):
|
|
1340
|
+
with pytest.raises((FeatureError, DataLayerError), match=expected_raises_match):
|
|
1058
1341
|
write_dataframe(
|
|
1059
1342
|
input_gdf,
|
|
1060
1343
|
output_path,
|
|
1344
|
+
use_arrow=use_arrow,
|
|
1061
1345
|
promote_to_multi=promote_to_multi,
|
|
1062
1346
|
geometry_type=geometry_type,
|
|
1063
1347
|
)
|
|
1064
1348
|
|
|
1065
1349
|
|
|
1066
|
-
|
|
1350
|
+
@pytest.mark.requires_arrow_write_api
|
|
1351
|
+
def test_write_dataframe_layer_geom_type_invalid(
|
|
1352
|
+
tmp_path, naturalearth_lowres, use_arrow
|
|
1353
|
+
):
|
|
1067
1354
|
df = read_dataframe(naturalearth_lowres)
|
|
1068
1355
|
|
|
1069
1356
|
filename = tmp_path / "test.geojson"
|
|
1070
1357
|
with pytest.raises(
|
|
1071
1358
|
GeometryError, match="Geometry type is not supported: NotSupported"
|
|
1072
1359
|
):
|
|
1073
|
-
write_dataframe(df, filename, geometry_type="NotSupported")
|
|
1360
|
+
write_dataframe(df, filename, use_arrow=use_arrow, geometry_type="NotSupported")
|
|
1074
1361
|
|
|
1075
1362
|
|
|
1076
1363
|
@pytest.mark.parametrize("ext", [ext for ext in ALL_EXTS if ext not in ".shp"])
|
|
1077
|
-
|
|
1364
|
+
@pytest.mark.requires_arrow_write_api
|
|
1365
|
+
def test_write_dataframe_truly_mixed(tmp_path, ext, use_arrow):
|
|
1078
1366
|
geometry = [
|
|
1079
1367
|
shapely.Point(0, 0),
|
|
1080
1368
|
shapely.LineString([(0, 0), (1, 1)]),
|
|
@@ -1094,9 +1382,9 @@ def test_write_dataframe_truly_mixed(tmp_path, ext):
|
|
|
1094
1382
|
|
|
1095
1383
|
if ext == ".fgb":
|
|
1096
1384
|
# For .fgb, spatial_index=False to avoid the rows being reordered
|
|
1097
|
-
write_dataframe(df, filename, spatial_index=False)
|
|
1385
|
+
write_dataframe(df, filename, use_arrow=use_arrow, spatial_index=False)
|
|
1098
1386
|
else:
|
|
1099
|
-
write_dataframe(df, filename)
|
|
1387
|
+
write_dataframe(df, filename, use_arrow=use_arrow)
|
|
1100
1388
|
|
|
1101
1389
|
# Drivers that support mixed geometries will default to "Unknown" geometry type
|
|
1102
1390
|
assert read_info(filename)["geometry_type"] == "Unknown"
|
|
@@ -1104,7 +1392,8 @@ def test_write_dataframe_truly_mixed(tmp_path, ext):
|
|
|
1104
1392
|
assert_geodataframe_equal(result, df, check_geom_type=True)
|
|
1105
1393
|
|
|
1106
1394
|
|
|
1107
|
-
|
|
1395
|
+
@pytest.mark.requires_arrow_write_api
|
|
1396
|
+
def test_write_dataframe_truly_mixed_invalid(tmp_path, use_arrow):
|
|
1108
1397
|
# Shapefile doesn't support generic "Geometry" / "Unknown" type
|
|
1109
1398
|
# for mixed geometries
|
|
1110
1399
|
|
|
@@ -1122,9 +1411,12 @@ def test_write_dataframe_truly_mixed_invalid(tmp_path):
|
|
|
1122
1411
|
msg = (
|
|
1123
1412
|
"Could not add feature to layer at index 1: Attempt to "
|
|
1124
1413
|
r"write non-point \(LINESTRING\) geometry to point shapefile."
|
|
1414
|
+
# DataLayerError when using Arrow
|
|
1415
|
+
"|Error while writing batch to OGR layer: Attempt to "
|
|
1416
|
+
r"write non-point \(LINESTRING\) geometry to point shapefile."
|
|
1125
1417
|
)
|
|
1126
|
-
with pytest.raises(FeatureError, match=msg):
|
|
1127
|
-
write_dataframe(df, tmp_path / "test.shp")
|
|
1418
|
+
with pytest.raises((FeatureError, DataLayerError), match=msg):
|
|
1419
|
+
write_dataframe(df, tmp_path / "test.shp", use_arrow=use_arrow)
|
|
1128
1420
|
|
|
1129
1421
|
|
|
1130
1422
|
@pytest.mark.parametrize("ext", [ext for ext in ALL_EXTS if ext not in ".fgb"])
|
|
@@ -1137,11 +1429,12 @@ def test_write_dataframe_truly_mixed_invalid(tmp_path):
|
|
|
1137
1429
|
[None, None],
|
|
1138
1430
|
],
|
|
1139
1431
|
)
|
|
1140
|
-
|
|
1432
|
+
@pytest.mark.requires_arrow_write_api
|
|
1433
|
+
def test_write_dataframe_infer_geometry_with_nulls(tmp_path, geoms, ext, use_arrow):
|
|
1141
1434
|
filename = tmp_path / f"test{ext}"
|
|
1142
1435
|
|
|
1143
1436
|
df = gp.GeoDataFrame({"col": [1.0, 2.0]}, geometry=geoms, crs="EPSG:4326")
|
|
1144
|
-
write_dataframe(df, filename)
|
|
1437
|
+
write_dataframe(df, filename, use_arrow=use_arrow)
|
|
1145
1438
|
result = read_dataframe(filename)
|
|
1146
1439
|
assert_geodataframe_equal(result, df)
|
|
1147
1440
|
|
|
@@ -1149,16 +1442,19 @@ def test_write_dataframe_infer_geometry_with_nulls(tmp_path, geoms, ext):
|
|
|
1149
1442
|
@pytest.mark.filterwarnings(
|
|
1150
1443
|
"ignore: You will likely lose important projection information"
|
|
1151
1444
|
)
|
|
1152
|
-
|
|
1445
|
+
@pytest.mark.requires_arrow_write_api
|
|
1446
|
+
def test_custom_crs_io(tmp_path, naturalearth_lowres_all_ext, use_arrow):
|
|
1153
1447
|
df = read_dataframe(naturalearth_lowres_all_ext)
|
|
1154
1448
|
# project Belgium to a custom Albers Equal Area projection
|
|
1155
|
-
expected =
|
|
1156
|
-
|
|
1449
|
+
expected = (
|
|
1450
|
+
df.loc[df.name == "Belgium"]
|
|
1451
|
+
.reset_index(drop=True)
|
|
1452
|
+
.to_crs("+proj=aea +lat_1=49.5 +lat_2=51.5 +lon_0=4.3")
|
|
1157
1453
|
)
|
|
1158
|
-
filename =
|
|
1159
|
-
write_dataframe(expected, filename)
|
|
1454
|
+
filename = tmp_path / "test.shp"
|
|
1455
|
+
write_dataframe(expected, filename, use_arrow=use_arrow)
|
|
1160
1456
|
|
|
1161
|
-
assert
|
|
1457
|
+
assert filename.exists()
|
|
1162
1458
|
|
|
1163
1459
|
df = read_dataframe(filename)
|
|
1164
1460
|
|
|
@@ -1170,6 +1466,7 @@ def test_custom_crs_io(tmpdir, naturalearth_lowres_all_ext):
|
|
|
1170
1466
|
|
|
1171
1467
|
|
|
1172
1468
|
def test_write_read_mixed_column_values(tmp_path):
|
|
1469
|
+
# use_arrow=True is tested separately below
|
|
1173
1470
|
mixed_values = ["test", 1.0, 1, datetime.now(), None, np.nan]
|
|
1174
1471
|
geoms = [shapely.Point(0, 0) for _ in mixed_values]
|
|
1175
1472
|
test_gdf = gp.GeoDataFrame(
|
|
@@ -1186,7 +1483,21 @@ def test_write_read_mixed_column_values(tmp_path):
|
|
|
1186
1483
|
assert output_gdf["mixed"][idx] == str(value)
|
|
1187
1484
|
|
|
1188
1485
|
|
|
1189
|
-
|
|
1486
|
+
@requires_arrow_write_api
|
|
1487
|
+
def test_write_read_mixed_column_values_arrow(tmp_path):
|
|
1488
|
+
# Arrow cannot represent a column of mixed types
|
|
1489
|
+
mixed_values = ["test", 1.0, 1, datetime.now(), None, np.nan]
|
|
1490
|
+
geoms = [shapely.Point(0, 0) for _ in mixed_values]
|
|
1491
|
+
test_gdf = gp.GeoDataFrame(
|
|
1492
|
+
{"geometry": geoms, "mixed": mixed_values}, crs="epsg:31370"
|
|
1493
|
+
)
|
|
1494
|
+
output_path = tmp_path / "test_write_mixed_column.gpkg"
|
|
1495
|
+
with pytest.raises(TypeError, match=".*Conversion failed for column"):
|
|
1496
|
+
write_dataframe(test_gdf, output_path, use_arrow=True)
|
|
1497
|
+
|
|
1498
|
+
|
|
1499
|
+
@pytest.mark.requires_arrow_write_api
|
|
1500
|
+
def test_write_read_null(tmp_path, use_arrow):
|
|
1190
1501
|
output_path = tmp_path / "test_write_nan.gpkg"
|
|
1191
1502
|
geom = shapely.Point(0, 0)
|
|
1192
1503
|
test_data = {
|
|
@@ -1195,7 +1506,7 @@ def test_write_read_null(tmp_path):
|
|
|
1195
1506
|
"object_str": ["test", None, np.nan],
|
|
1196
1507
|
}
|
|
1197
1508
|
test_gdf = gp.GeoDataFrame(test_data, crs="epsg:31370")
|
|
1198
|
-
write_dataframe(test_gdf, output_path)
|
|
1509
|
+
write_dataframe(test_gdf, output_path, use_arrow=use_arrow)
|
|
1199
1510
|
result_gdf = read_dataframe(output_path)
|
|
1200
1511
|
assert len(test_gdf) == len(result_gdf)
|
|
1201
1512
|
assert result_gdf["float64"][0] == 1.0
|
|
@@ -1218,7 +1529,7 @@ def test_write_read_null(tmp_path):
|
|
|
1218
1529
|
["2.5D MultiLineString", "MultiLineString Z"],
|
|
1219
1530
|
),
|
|
1220
1531
|
(
|
|
1221
|
-
"MultiPolygon Z (((0 0 0, 0 1 0, 1 1 0, 0 0 0)), ((1 1 1, 1 2 1, 2 2 1, 1 1 1)))",
|
|
1532
|
+
"MultiPolygon Z (((0 0 0, 0 1 0, 1 1 0, 0 0 0)), ((1 1 1, 1 2 1, 2 2 1, 1 1 1)))",
|
|
1222
1533
|
["2.5D MultiPolygon", "MultiPolygon Z"],
|
|
1223
1534
|
),
|
|
1224
1535
|
(
|
|
@@ -1227,11 +1538,12 @@ def test_write_read_null(tmp_path):
|
|
|
1227
1538
|
),
|
|
1228
1539
|
],
|
|
1229
1540
|
)
|
|
1230
|
-
|
|
1541
|
+
@pytest.mark.requires_arrow_write_api
|
|
1542
|
+
def test_write_geometry_z_types(tmp_path, wkt, geom_types, use_arrow):
|
|
1231
1543
|
filename = tmp_path / "test.fgb"
|
|
1232
1544
|
gdf = gp.GeoDataFrame(geometry=from_wkt([wkt]), crs="EPSG:4326")
|
|
1233
1545
|
for geom_type in geom_types:
|
|
1234
|
-
write_dataframe(gdf, filename, geometry_type=geom_type)
|
|
1546
|
+
write_dataframe(gdf, filename, use_arrow=use_arrow, geometry_type=geom_type)
|
|
1235
1547
|
df = read_dataframe(filename)
|
|
1236
1548
|
assert_geodataframe_equal(df, gdf)
|
|
1237
1549
|
|
|
@@ -1260,7 +1572,7 @@ def test_write_geometry_z_types(tmp_path, wkt, geom_types):
|
|
|
1260
1572
|
"MultiPolygon Z",
|
|
1261
1573
|
False,
|
|
1262
1574
|
[
|
|
1263
|
-
"MultiPolygon Z (((0 0 0, 0 1 0, 1 1 0, 0 0 0)), ((1 1 1, 1 2 1, 2 2 1, 1 1 1)))"
|
|
1575
|
+
"MultiPolygon Z (((0 0 0, 0 1 0, 1 1 0, 0 0 0)), ((1 1 1, 1 2 1, 2 2 1, 1 1 1)))"
|
|
1264
1576
|
],
|
|
1265
1577
|
),
|
|
1266
1578
|
(
|
|
@@ -1285,8 +1597,9 @@ def test_write_geometry_z_types(tmp_path, wkt, geom_types):
|
|
|
1285
1597
|
),
|
|
1286
1598
|
],
|
|
1287
1599
|
)
|
|
1600
|
+
@pytest.mark.requires_arrow_write_api
|
|
1288
1601
|
def test_write_geometry_z_types_auto(
|
|
1289
|
-
tmp_path, ext, test_descr, exp_geometry_type, mixed_dimensions, wkt
|
|
1602
|
+
tmp_path, ext, test_descr, exp_geometry_type, mixed_dimensions, wkt, use_arrow
|
|
1290
1603
|
):
|
|
1291
1604
|
# Shapefile has some different behaviour that other file types
|
|
1292
1605
|
if ext == ".shp":
|
|
@@ -1313,10 +1626,10 @@ def test_write_geometry_z_types_auto(
|
|
|
1313
1626
|
DataSourceError,
|
|
1314
1627
|
match=("Mixed 2D and 3D coordinates are not supported by"),
|
|
1315
1628
|
):
|
|
1316
|
-
write_dataframe(gdf, filename)
|
|
1629
|
+
write_dataframe(gdf, filename, use_arrow=use_arrow)
|
|
1317
1630
|
return
|
|
1318
1631
|
else:
|
|
1319
|
-
write_dataframe(gdf, filename)
|
|
1632
|
+
write_dataframe(gdf, filename, use_arrow=use_arrow)
|
|
1320
1633
|
|
|
1321
1634
|
info = read_info(filename)
|
|
1322
1635
|
assert info["geometry_type"] == exp_geometry_type
|
|
@@ -1328,11 +1641,17 @@ def test_write_geometry_z_types_auto(
|
|
|
1328
1641
|
assert_geodataframe_equal(gdf, result_gdf)
|
|
1329
1642
|
|
|
1330
1643
|
|
|
1331
|
-
def test_read_multisurface(data_dir):
|
|
1332
|
-
|
|
1644
|
+
def test_read_multisurface(data_dir, use_arrow):
|
|
1645
|
+
if use_arrow:
|
|
1646
|
+
with pytest.raises(shapely.errors.GEOSException):
|
|
1647
|
+
# TODO(Arrow)
|
|
1648
|
+
# shapely fails parsing the WKB
|
|
1649
|
+
read_dataframe(data_dir / "test_multisurface.gpkg", use_arrow=True)
|
|
1650
|
+
else:
|
|
1651
|
+
df = read_dataframe(data_dir / "test_multisurface.gpkg")
|
|
1333
1652
|
|
|
1334
|
-
|
|
1335
|
-
|
|
1653
|
+
# MultiSurface should be converted to MultiPolygon
|
|
1654
|
+
assert df.geometry.type.tolist() == ["MultiPolygon"]
|
|
1336
1655
|
|
|
1337
1656
|
|
|
1338
1657
|
def test_read_dataset_kwargs(data_dir, use_arrow):
|
|
@@ -1371,7 +1690,8 @@ def test_read_invalid_dataset_kwargs(naturalearth_lowres, use_arrow):
|
|
|
1371
1690
|
read_dataframe(naturalearth_lowres, use_arrow=use_arrow, INVALID="YES")
|
|
1372
1691
|
|
|
1373
1692
|
|
|
1374
|
-
|
|
1693
|
+
@pytest.mark.requires_arrow_write_api
|
|
1694
|
+
def test_write_nullable_dtypes(tmp_path, use_arrow):
|
|
1375
1695
|
path = tmp_path / "test_nullable_dtypes.gpkg"
|
|
1376
1696
|
test_data = {
|
|
1377
1697
|
"col1": pd.Series([1, 2, 3], dtype="int64"),
|
|
@@ -1383,7 +1703,7 @@ def test_write_nullable_dtypes(tmp_path):
|
|
|
1383
1703
|
input_gdf = gp.GeoDataFrame(
|
|
1384
1704
|
test_data, geometry=[shapely.Point(0, 0)] * 3, crs="epsg:31370"
|
|
1385
1705
|
)
|
|
1386
|
-
write_dataframe(input_gdf, path)
|
|
1706
|
+
write_dataframe(input_gdf, path, use_arrow=use_arrow)
|
|
1387
1707
|
output_gdf = read_dataframe(path)
|
|
1388
1708
|
# We read it back as default (non-nullable) numpy dtypes, so we cast
|
|
1389
1709
|
# to those for the expected result
|
|
@@ -1392,19 +1712,21 @@ def test_write_nullable_dtypes(tmp_path):
|
|
|
1392
1712
|
expected["col3"] = expected["col3"].astype("float32")
|
|
1393
1713
|
expected["col4"] = expected["col4"].astype("float64")
|
|
1394
1714
|
expected["col5"] = expected["col5"].astype(object)
|
|
1715
|
+
expected.loc[1, "col5"] = None # pandas converts to pd.NA on line above
|
|
1395
1716
|
assert_geodataframe_equal(output_gdf, expected)
|
|
1396
1717
|
|
|
1397
1718
|
|
|
1398
1719
|
@pytest.mark.parametrize(
|
|
1399
1720
|
"metadata_type", ["dataset_metadata", "layer_metadata", "metadata"]
|
|
1400
1721
|
)
|
|
1401
|
-
|
|
1722
|
+
@pytest.mark.requires_arrow_write_api
|
|
1723
|
+
def test_metadata_io(tmp_path, naturalearth_lowres, metadata_type, use_arrow):
|
|
1402
1724
|
metadata = {"level": metadata_type}
|
|
1403
1725
|
|
|
1404
1726
|
df = read_dataframe(naturalearth_lowres)
|
|
1405
1727
|
|
|
1406
|
-
filename =
|
|
1407
|
-
write_dataframe(df, filename, **{metadata_type: metadata})
|
|
1728
|
+
filename = tmp_path / "test.gpkg"
|
|
1729
|
+
write_dataframe(df, filename, use_arrow=use_arrow, **{metadata_type: metadata})
|
|
1408
1730
|
|
|
1409
1731
|
metadata_key = "layer_metadata" if metadata_type == "metadata" else metadata_type
|
|
1410
1732
|
|
|
@@ -1420,25 +1742,328 @@ def test_metadata_io(tmpdir, naturalearth_lowres, metadata_type):
|
|
|
1420
1742
|
{"key": 1},
|
|
1421
1743
|
],
|
|
1422
1744
|
)
|
|
1423
|
-
|
|
1745
|
+
@pytest.mark.requires_arrow_write_api
|
|
1746
|
+
def test_invalid_metadata(
|
|
1747
|
+
tmp_path, naturalearth_lowres, metadata_type, metadata, use_arrow
|
|
1748
|
+
):
|
|
1749
|
+
df = read_dataframe(naturalearth_lowres)
|
|
1424
1750
|
with pytest.raises(ValueError, match="must be a string"):
|
|
1425
|
-
filename = os.path.join(str(tmpdir), "test.gpkg")
|
|
1426
1751
|
write_dataframe(
|
|
1427
|
-
|
|
1752
|
+
df, tmp_path / "test.gpkg", use_arrow=use_arrow, **{metadata_type: metadata}
|
|
1428
1753
|
)
|
|
1429
1754
|
|
|
1430
1755
|
|
|
1431
1756
|
@pytest.mark.parametrize("metadata_type", ["dataset_metadata", "layer_metadata"])
|
|
1432
|
-
|
|
1757
|
+
@pytest.mark.requires_arrow_write_api
|
|
1758
|
+
def test_metadata_unsupported(tmp_path, naturalearth_lowres, metadata_type, use_arrow):
|
|
1433
1759
|
"""metadata is silently ignored"""
|
|
1434
1760
|
|
|
1435
|
-
filename =
|
|
1761
|
+
filename = tmp_path / "test.geojson"
|
|
1436
1762
|
write_dataframe(
|
|
1437
1763
|
read_dataframe(naturalearth_lowres),
|
|
1438
1764
|
filename,
|
|
1765
|
+
use_arrow=use_arrow,
|
|
1439
1766
|
**{metadata_type: {"key": "value"}},
|
|
1440
1767
|
)
|
|
1441
1768
|
|
|
1442
1769
|
metadata_key = "layer_metadata" if metadata_type == "metadata" else metadata_type
|
|
1443
1770
|
|
|
1444
1771
|
assert read_info(filename)[metadata_key] is None
|
|
1772
|
+
|
|
1773
|
+
|
|
1774
|
+
@pytest.mark.skipif(not PANDAS_GE_15, reason="ArrowDtype requires pandas 1.5+")
|
|
1775
|
+
def test_read_dataframe_arrow_dtypes(tmp_path):
|
|
1776
|
+
# https://github.com/geopandas/pyogrio/issues/319 - ensure arrow binary
|
|
1777
|
+
# column can be converted with from_wkb in case of missing values
|
|
1778
|
+
pytest.importorskip("pyarrow")
|
|
1779
|
+
filename = tmp_path / "test.gpkg"
|
|
1780
|
+
df = gp.GeoDataFrame(
|
|
1781
|
+
{"col": [1.0, 2.0]}, geometry=[Point(1, 1), None], crs="EPSG:4326"
|
|
1782
|
+
)
|
|
1783
|
+
write_dataframe(df, filename)
|
|
1784
|
+
|
|
1785
|
+
result = read_dataframe(
|
|
1786
|
+
filename,
|
|
1787
|
+
use_arrow=True,
|
|
1788
|
+
arrow_to_pandas_kwargs={
|
|
1789
|
+
"types_mapper": lambda pa_dtype: pd.ArrowDtype(pa_dtype)
|
|
1790
|
+
},
|
|
1791
|
+
)
|
|
1792
|
+
assert isinstance(result["col"].dtype, pd.ArrowDtype)
|
|
1793
|
+
result["col"] = result["col"].astype("float64")
|
|
1794
|
+
assert_geodataframe_equal(result, df)
|
|
1795
|
+
|
|
1796
|
+
|
|
1797
|
+
@requires_pyarrow_api
|
|
1798
|
+
@pytest.mark.skipif(
|
|
1799
|
+
__gdal_version__ < (3, 8, 3), reason="Arrow bool value bug fixed in GDAL >= 3.8.3"
|
|
1800
|
+
)
|
|
1801
|
+
@pytest.mark.parametrize("ext", ALL_EXTS)
|
|
1802
|
+
def test_arrow_bool_roundtrip(tmp_path, ext):
|
|
1803
|
+
filename = tmp_path / f"test{ext}"
|
|
1804
|
+
|
|
1805
|
+
kwargs = {}
|
|
1806
|
+
|
|
1807
|
+
if ext == ".fgb":
|
|
1808
|
+
# For .fgb, spatial_index=False to avoid the rows being reordered
|
|
1809
|
+
kwargs["spatial_index"] = False
|
|
1810
|
+
|
|
1811
|
+
df = gp.GeoDataFrame(
|
|
1812
|
+
{"bool_col": [True, False, True, False, True], "geometry": [Point(0, 0)] * 5},
|
|
1813
|
+
crs="EPSG:4326",
|
|
1814
|
+
)
|
|
1815
|
+
|
|
1816
|
+
write_dataframe(df, filename, **kwargs)
|
|
1817
|
+
result = read_dataframe(filename, use_arrow=True)
|
|
1818
|
+
# Shapefiles do not support bool columns; these are returned as int32
|
|
1819
|
+
assert_geodataframe_equal(result, df, check_dtype=ext != ".shp")
|
|
1820
|
+
|
|
1821
|
+
|
|
1822
|
+
@requires_pyarrow_api
|
|
1823
|
+
@pytest.mark.skipif(
|
|
1824
|
+
__gdal_version__ >= (3, 8, 3), reason="Arrow bool value bug fixed in GDAL >= 3.8.3"
|
|
1825
|
+
)
|
|
1826
|
+
@pytest.mark.parametrize("ext", ALL_EXTS)
|
|
1827
|
+
def test_arrow_bool_exception(tmp_path, ext):
|
|
1828
|
+
filename = tmp_path / f"test{ext}"
|
|
1829
|
+
|
|
1830
|
+
df = gp.GeoDataFrame(
|
|
1831
|
+
{"bool_col": [True, False, True, False, True], "geometry": [Point(0, 0)] * 5},
|
|
1832
|
+
crs="EPSG:4326",
|
|
1833
|
+
)
|
|
1834
|
+
|
|
1835
|
+
write_dataframe(df, filename)
|
|
1836
|
+
|
|
1837
|
+
if ext in {".fgb", ".gpkg"}:
|
|
1838
|
+
# only raise exception for GPKG / FGB
|
|
1839
|
+
with pytest.raises(
|
|
1840
|
+
RuntimeError,
|
|
1841
|
+
match="GDAL < 3.8.3 does not correctly read boolean data values using "
|
|
1842
|
+
"the Arrow API",
|
|
1843
|
+
):
|
|
1844
|
+
read_dataframe(filename, use_arrow=True)
|
|
1845
|
+
|
|
1846
|
+
# do not raise exception if no bool columns are read
|
|
1847
|
+
read_dataframe(filename, use_arrow=True, columns=[])
|
|
1848
|
+
|
|
1849
|
+
else:
|
|
1850
|
+
_ = read_dataframe(filename, use_arrow=True)
|
|
1851
|
+
|
|
1852
|
+
|
|
1853
|
+
@pytest.mark.filterwarnings("ignore:File /vsimem:RuntimeWarning")
|
|
1854
|
+
@pytest.mark.parametrize("driver", ["GeoJSON", "GPKG"])
|
|
1855
|
+
def test_write_memory(naturalearth_lowres, driver):
|
|
1856
|
+
df = read_dataframe(naturalearth_lowres)
|
|
1857
|
+
|
|
1858
|
+
buffer = BytesIO()
|
|
1859
|
+
write_dataframe(df, buffer, driver=driver, layer="test")
|
|
1860
|
+
|
|
1861
|
+
assert len(buffer.getbuffer()) > 0
|
|
1862
|
+
|
|
1863
|
+
actual = read_dataframe(buffer)
|
|
1864
|
+
assert len(actual) == len(df)
|
|
1865
|
+
|
|
1866
|
+
is_json = driver == "GeoJSON"
|
|
1867
|
+
|
|
1868
|
+
assert_geodataframe_equal(
|
|
1869
|
+
actual,
|
|
1870
|
+
df,
|
|
1871
|
+
check_less_precise=is_json,
|
|
1872
|
+
check_index_type=False,
|
|
1873
|
+
check_dtype=not is_json,
|
|
1874
|
+
)
|
|
1875
|
+
|
|
1876
|
+
|
|
1877
|
+
def test_write_memory_driver_required(naturalearth_lowres):
|
|
1878
|
+
df = read_dataframe(naturalearth_lowres)
|
|
1879
|
+
|
|
1880
|
+
buffer = BytesIO()
|
|
1881
|
+
|
|
1882
|
+
with pytest.raises(
|
|
1883
|
+
ValueError,
|
|
1884
|
+
match="driver must be provided to write to in-memory file",
|
|
1885
|
+
):
|
|
1886
|
+
write_dataframe(df.head(1), buffer, driver=None, layer="test")
|
|
1887
|
+
|
|
1888
|
+
|
|
1889
|
+
@pytest.mark.parametrize("driver", ["ESRI Shapefile", "OpenFileGDB"])
|
|
1890
|
+
def test_write_memory_unsupported_driver(naturalearth_lowres, driver):
|
|
1891
|
+
if driver == "OpenFileGDB" and __gdal_version__ < (3, 6, 0):
|
|
1892
|
+
pytest.skip("OpenFileGDB write support only available for GDAL >= 3.6.0")
|
|
1893
|
+
|
|
1894
|
+
df = read_dataframe(naturalearth_lowres)
|
|
1895
|
+
|
|
1896
|
+
buffer = BytesIO()
|
|
1897
|
+
|
|
1898
|
+
with pytest.raises(
|
|
1899
|
+
ValueError, match=f"writing to in-memory file is not supported for {driver}"
|
|
1900
|
+
):
|
|
1901
|
+
write_dataframe(df, buffer, driver=driver, layer="test")
|
|
1902
|
+
|
|
1903
|
+
|
|
1904
|
+
@pytest.mark.parametrize("driver", ["GeoJSON", "GPKG"])
|
|
1905
|
+
def test_write_memory_append_unsupported(naturalearth_lowres, driver):
|
|
1906
|
+
df = read_dataframe(naturalearth_lowres)
|
|
1907
|
+
|
|
1908
|
+
buffer = BytesIO()
|
|
1909
|
+
|
|
1910
|
+
with pytest.raises(
|
|
1911
|
+
NotImplementedError, match="append is not supported for in-memory files"
|
|
1912
|
+
):
|
|
1913
|
+
write_dataframe(df.head(1), buffer, driver=driver, layer="test", append=True)
|
|
1914
|
+
|
|
1915
|
+
|
|
1916
|
+
def test_write_memory_existing_unsupported(naturalearth_lowres):
|
|
1917
|
+
df = read_dataframe(naturalearth_lowres)
|
|
1918
|
+
|
|
1919
|
+
buffer = BytesIO(b"0000")
|
|
1920
|
+
with pytest.raises(
|
|
1921
|
+
NotImplementedError,
|
|
1922
|
+
match="writing to existing in-memory object is not supported",
|
|
1923
|
+
):
|
|
1924
|
+
write_dataframe(df.head(1), buffer, driver="GeoJSON", layer="test")
|
|
1925
|
+
|
|
1926
|
+
|
|
1927
|
+
@pytest.mark.parametrize("ext", ["gpkg", "geojson"])
|
|
1928
|
+
def test_non_utf8_encoding_io(tmp_path, ext, encoded_text):
|
|
1929
|
+
"""Verify that we write non-UTF data to the data source
|
|
1930
|
+
|
|
1931
|
+
IMPORTANT: this may not be valid for the data source and will likely render
|
|
1932
|
+
them unusable in other tools, but should successfully roundtrip unless we
|
|
1933
|
+
disable writing using other encodings.
|
|
1934
|
+
|
|
1935
|
+
NOTE: FlatGeobuff driver cannot handle non-UTF data in GDAL >= 3.9
|
|
1936
|
+
|
|
1937
|
+
NOTE: pyarrow cannot handle non-UTF-8 characters in this way
|
|
1938
|
+
"""
|
|
1939
|
+
|
|
1940
|
+
encoding, text = encoded_text
|
|
1941
|
+
output_path = tmp_path / f"test.{ext}"
|
|
1942
|
+
|
|
1943
|
+
df = gp.GeoDataFrame({text: [text], "geometry": [Point(0, 0)]}, crs="EPSG:4326")
|
|
1944
|
+
write_dataframe(df, output_path, encoding=encoding)
|
|
1945
|
+
|
|
1946
|
+
# cannot open these files without specifying encoding
|
|
1947
|
+
with pytest.raises(UnicodeDecodeError):
|
|
1948
|
+
read_dataframe(output_path)
|
|
1949
|
+
|
|
1950
|
+
# must provide encoding to read these properly
|
|
1951
|
+
actual = read_dataframe(output_path, encoding=encoding)
|
|
1952
|
+
assert actual.columns[0] == text
|
|
1953
|
+
assert actual[text].values[0] == text
|
|
1954
|
+
|
|
1955
|
+
|
|
1956
|
+
@requires_pyarrow_api
|
|
1957
|
+
@pytest.mark.parametrize("ext", ["gpkg", "geojson"])
|
|
1958
|
+
def test_non_utf8_encoding_io_arrow_exception(tmp_path, ext, encoded_text):
|
|
1959
|
+
encoding, text = encoded_text
|
|
1960
|
+
output_path = tmp_path / f"test.{ext}"
|
|
1961
|
+
|
|
1962
|
+
df = gp.GeoDataFrame({text: [text], "geometry": [Point(0, 0)]}, crs="EPSG:4326")
|
|
1963
|
+
write_dataframe(df, output_path, encoding=encoding)
|
|
1964
|
+
|
|
1965
|
+
# cannot open these files without specifying encoding
|
|
1966
|
+
with pytest.raises(UnicodeDecodeError):
|
|
1967
|
+
read_dataframe(output_path)
|
|
1968
|
+
|
|
1969
|
+
with pytest.raises(
|
|
1970
|
+
ValueError, match="non-UTF-8 encoding is not supported for Arrow"
|
|
1971
|
+
):
|
|
1972
|
+
read_dataframe(output_path, encoding=encoding, use_arrow=True)
|
|
1973
|
+
|
|
1974
|
+
|
|
1975
|
+
def test_non_utf8_encoding_io_shapefile(tmp_path, encoded_text, use_arrow):
|
|
1976
|
+
encoding, text = encoded_text
|
|
1977
|
+
|
|
1978
|
+
output_path = tmp_path / "test.shp"
|
|
1979
|
+
|
|
1980
|
+
df = gp.GeoDataFrame({text: [text], "geometry": [Point(0, 0)]}, crs="EPSG:4326")
|
|
1981
|
+
write_dataframe(df, output_path, encoding=encoding)
|
|
1982
|
+
|
|
1983
|
+
# NOTE: GDAL automatically creates a cpg file with the encoding name, which
|
|
1984
|
+
# means that if we read this without specifying the encoding it uses the
|
|
1985
|
+
# correct one
|
|
1986
|
+
actual = read_dataframe(output_path, use_arrow=use_arrow)
|
|
1987
|
+
assert actual.columns[0] == text
|
|
1988
|
+
assert actual[text].values[0] == text
|
|
1989
|
+
|
|
1990
|
+
# verify that if cpg file is not present, that user-provided encoding must be used
|
|
1991
|
+
output_path.with_suffix(".cpg").unlink()
|
|
1992
|
+
|
|
1993
|
+
# We will assume ISO-8859-1, which is wrong
|
|
1994
|
+
miscoded = text.encode(encoding).decode("ISO-8859-1")
|
|
1995
|
+
|
|
1996
|
+
if use_arrow:
|
|
1997
|
+
# pyarrow cannot decode column name with incorrect encoding
|
|
1998
|
+
with pytest.raises(UnicodeDecodeError):
|
|
1999
|
+
read_dataframe(output_path, use_arrow=True)
|
|
2000
|
+
else:
|
|
2001
|
+
bad = read_dataframe(output_path, use_arrow=False)
|
|
2002
|
+
assert bad.columns[0] == miscoded
|
|
2003
|
+
assert bad[miscoded].values[0] == miscoded
|
|
2004
|
+
|
|
2005
|
+
# If encoding is provided, that should yield correct text
|
|
2006
|
+
actual = read_dataframe(output_path, encoding=encoding, use_arrow=use_arrow)
|
|
2007
|
+
assert actual.columns[0] == text
|
|
2008
|
+
assert actual[text].values[0] == text
|
|
2009
|
+
|
|
2010
|
+
# if ENCODING open option, that should yield correct text
|
|
2011
|
+
actual = read_dataframe(output_path, use_arrow=use_arrow, ENCODING=encoding)
|
|
2012
|
+
assert actual.columns[0] == text
|
|
2013
|
+
assert actual[text].values[0] == text
|
|
2014
|
+
|
|
2015
|
+
|
|
2016
|
+
def test_encoding_read_option_collision_shapefile(naturalearth_lowres, use_arrow):
|
|
2017
|
+
"""Providing both encoding parameter and ENCODING open option (even if blank) is not allowed"""
|
|
2018
|
+
|
|
2019
|
+
with pytest.raises(
|
|
2020
|
+
ValueError, match='cannot provide both encoding parameter and "ENCODING" option'
|
|
2021
|
+
):
|
|
2022
|
+
read_dataframe(
|
|
2023
|
+
naturalearth_lowres, encoding="CP936", ENCODING="", use_arrow=use_arrow
|
|
2024
|
+
)
|
|
2025
|
+
|
|
2026
|
+
|
|
2027
|
+
def test_encoding_write_layer_option_collision_shapefile(tmp_path, encoded_text):
|
|
2028
|
+
"""Providing both encoding parameter and ENCODING layer creation option (even if blank) is not allowed"""
|
|
2029
|
+
encoding, text = encoded_text
|
|
2030
|
+
|
|
2031
|
+
output_path = tmp_path / "test.shp"
|
|
2032
|
+
df = gp.GeoDataFrame({text: [text], "geometry": [Point(0, 0)]}, crs="EPSG:4326")
|
|
2033
|
+
|
|
2034
|
+
with pytest.raises(
|
|
2035
|
+
ValueError,
|
|
2036
|
+
match='cannot provide both encoding parameter and "ENCODING" layer creation option',
|
|
2037
|
+
):
|
|
2038
|
+
write_dataframe(
|
|
2039
|
+
df, output_path, encoding=encoding, layer_options={"ENCODING": ""}
|
|
2040
|
+
)
|
|
2041
|
+
|
|
2042
|
+
|
|
2043
|
+
def test_non_utf8_encoding_shapefile_sql(tmp_path, use_arrow):
|
|
2044
|
+
encoding = "CP936"
|
|
2045
|
+
|
|
2046
|
+
output_path = tmp_path / "test.shp"
|
|
2047
|
+
|
|
2048
|
+
mandarin = "中文"
|
|
2049
|
+
df = gp.GeoDataFrame(
|
|
2050
|
+
{mandarin: mandarin, "geometry": [Point(0, 0)]}, crs="EPSG:4326"
|
|
2051
|
+
)
|
|
2052
|
+
write_dataframe(df, output_path, encoding=encoding)
|
|
2053
|
+
|
|
2054
|
+
actual = read_dataframe(
|
|
2055
|
+
output_path,
|
|
2056
|
+
sql=f"select * from test where \"{mandarin}\" = '{mandarin}'",
|
|
2057
|
+
use_arrow=use_arrow,
|
|
2058
|
+
)
|
|
2059
|
+
assert actual.columns[0] == mandarin
|
|
2060
|
+
assert actual[mandarin].values[0] == mandarin
|
|
2061
|
+
|
|
2062
|
+
actual = read_dataframe(
|
|
2063
|
+
output_path,
|
|
2064
|
+
sql=f"select * from test where \"{mandarin}\" = '{mandarin}'",
|
|
2065
|
+
encoding=encoding,
|
|
2066
|
+
use_arrow=use_arrow,
|
|
2067
|
+
)
|
|
2068
|
+
assert actual.columns[0] == mandarin
|
|
2069
|
+
assert actual[mandarin].values[0] == mandarin
|