pyogrio 0.7.2__cp310-cp310-win_amd64.whl → 0.9.0__cp310-cp310-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyogrio might be problematic. Click here for more details.
- pyogrio/__init__.py +7 -3
- pyogrio/_compat.py +6 -1
- pyogrio/_err.c +855 -321
- pyogrio/_err.cp310-win_amd64.pyd +0 -0
- pyogrio/_err.pyx +7 -3
- pyogrio/_geometry.c +134 -75
- pyogrio/_geometry.cp310-win_amd64.pyd +0 -0
- pyogrio/_io.c +28462 -22659
- pyogrio/_io.cp310-win_amd64.pyd +0 -0
- pyogrio/_io.pyx +904 -242
- pyogrio/_ogr.c +1317 -1640
- pyogrio/_ogr.cp310-win_amd64.pyd +0 -0
- pyogrio/_ogr.pxd +69 -13
- pyogrio/_ogr.pyx +8 -24
- pyogrio/_version.py +3 -3
- pyogrio/_vsi.c +6815 -0
- pyogrio/_vsi.cp310-win_amd64.pyd +0 -0
- pyogrio/_vsi.pxd +4 -0
- pyogrio/_vsi.pyx +140 -0
- pyogrio/core.py +43 -44
- pyogrio/gdal_data/GDAL-targets-release.cmake +1 -1
- pyogrio/gdal_data/GDAL-targets.cmake +10 -6
- pyogrio/gdal_data/GDALConfigVersion.cmake +3 -3
- pyogrio/gdal_data/gdalinfo_output.schema.json +2 -0
- pyogrio/gdal_data/gdalvrt.xsd +163 -0
- pyogrio/gdal_data/ogrinfo_output.schema.json +12 -1
- pyogrio/gdal_data/vcpkg.spdx.json +23 -23
- pyogrio/gdal_data/vcpkg_abi_info.txt +29 -28
- pyogrio/geopandas.py +140 -34
- pyogrio/proj_data/ITRF2008 +2 -2
- pyogrio/proj_data/proj-config-version.cmake +2 -2
- pyogrio/proj_data/proj-config.cmake +2 -1
- pyogrio/proj_data/proj-targets-release.cmake +0 -1
- pyogrio/proj_data/proj-targets.cmake +10 -6
- pyogrio/proj_data/proj.db +0 -0
- pyogrio/proj_data/proj4-targets-release.cmake +0 -1
- pyogrio/proj_data/proj4-targets.cmake +10 -6
- pyogrio/proj_data/vcpkg.spdx.json +21 -43
- pyogrio/proj_data/vcpkg_abi_info.txt +16 -17
- pyogrio/raw.py +438 -116
- pyogrio/tests/conftest.py +75 -6
- pyogrio/tests/fixtures/poly_not_enough_points.shp.zip +0 -0
- pyogrio/tests/test_arrow.py +841 -7
- pyogrio/tests/test_core.py +99 -7
- pyogrio/tests/test_geopandas_io.py +827 -121
- pyogrio/tests/test_path.py +23 -3
- pyogrio/tests/test_raw_io.py +276 -50
- pyogrio/util.py +39 -19
- pyogrio-0.9.0.dist-info/DELVEWHEEL +2 -0
- {pyogrio-0.7.2.dist-info → pyogrio-0.9.0.dist-info}/METADATA +2 -2
- {pyogrio-0.7.2.dist-info → pyogrio-0.9.0.dist-info}/RECORD +72 -67
- {pyogrio-0.7.2.dist-info → pyogrio-0.9.0.dist-info}/WHEEL +1 -1
- pyogrio.libs/Lerc-5e4d8cbeeabca06f95e2270792304dc3.dll +0 -0
- pyogrio.libs/{gdal-c3b1d8f66682071d0cd26d86e4182013.dll → gdal-b434963605a006e01c486c0df6dea4e0.dll} +0 -0
- pyogrio.libs/geos-f0622d0794b81c937a851b2e6fa9b712.dll +0 -0
- pyogrio.libs/geos_c-0e16bf70612fc3301d077b9d863a3fdb.dll +0 -0
- pyogrio.libs/{geotiff-e43cdab688866b59f8800cfcde836d16.dll → geotiff-772e7c705fb15ddf91b432adb4eb1f6c.dll} +0 -0
- pyogrio.libs/iconv-2-8fcc23ddc6f096c45871011b6e008b44.dll +0 -0
- pyogrio.libs/{jpeg62-567ab743ac805dfb57fe3867ba5788a4.dll → jpeg62-2f9b7af22d78338e8f0be0058503dc35.dll} +0 -0
- pyogrio.libs/json-c-e52a077545e4057de42beb4948289b41.dll +0 -0
- pyogrio.libs/libcurl-bc81cd8afe15b10c0821b181b6af8bd0.dll +0 -0
- pyogrio.libs/libexpat-fbe03ca8917dfda776562d4338b289b8.dll +0 -0
- pyogrio.libs/{liblzma-de7f4770d4e3715acd031ca93883f10c.dll → liblzma-6b36f24d54d3dd45f274a2aebef81085.dll} +0 -0
- pyogrio.libs/libpng16-13928571ad910705eae8d7dd8eef8b11.dll +0 -0
- pyogrio.libs/{msvcp140-83b6a1a2fa8b1735a358b2fe13cabe4e.dll → msvcp140-46db46e967c8db2cb7a20fc75872a57e.dll} +0 -0
- pyogrio.libs/proj-8a30239ef2dfc3b9dd2bb48e8abb330f.dll +0 -0
- pyogrio.libs/{qhull_r-99ae8a526357acc44b162cb4df2c3bb6.dll → qhull_r-c45abde5d0c92faf723cc2942138af77.dll} +0 -0
- pyogrio.libs/sqlite3-df30c3cf230727e23c43c40126a530f7.dll +0 -0
- pyogrio.libs/{tiff-7c2d4b204ec2db46c81f6a597895c2f7.dll → tiff-43630f30487a9015213475ae86ed3fa3.dll} +0 -0
- pyogrio.libs/{zlib1-824de9299616f0908aeeb9441a084848.dll → zlib1-e1272810861a13dd8d6cff3beac47f17.dll} +0 -0
- pyogrio/tests/win32.py +0 -86
- pyogrio-0.7.2.dist-info/DELVEWHEEL +0 -2
- pyogrio.libs/Lerc-d5afc4101deffe7de21241ccd4d562f6.dll +0 -0
- pyogrio.libs/geos-1c764a1384537a0ad2995e83d23e8642.dll +0 -0
- pyogrio.libs/geos_c-0d7dfdcee49efa8df585e2fb993157aa.dll +0 -0
- pyogrio.libs/json-c-36c91e30c4410d41c22b2010c31183e3.dll +0 -0
- pyogrio.libs/libcurl-ebcc8c18195071a90e59f818902e10c6.dll +0 -0
- pyogrio.libs/libexpat-345379c9c11632130d8c383cbacde1a6.dll +0 -0
- pyogrio.libs/libpng16-2c30e6846653c47ef2ff9d7dec3338ba.dll +0 -0
- pyogrio.libs/proj-98758c96a6cb682b5cec7e8dc5e29a50.dll +0 -0
- pyogrio.libs/sqlite3-327ed7b38bfd91fb4a17544960e055e9.dll +0 -0
- {pyogrio-0.7.2.dist-info → pyogrio-0.9.0.dist-info}/LICENSE +0 -0
- {pyogrio-0.7.2.dist-info → pyogrio-0.9.0.dist-info}/top_level.txt +0 -0
pyogrio/tests/test_arrow.py
CHANGED
|
@@ -1,12 +1,32 @@
|
|
|
1
1
|
import contextlib
|
|
2
|
+
from io import BytesIO
|
|
3
|
+
import json
|
|
2
4
|
import math
|
|
3
5
|
import os
|
|
6
|
+
from packaging.version import Version
|
|
7
|
+
import sys
|
|
4
8
|
|
|
5
9
|
import pytest
|
|
10
|
+
import numpy as np
|
|
6
11
|
|
|
7
|
-
|
|
8
|
-
from pyogrio
|
|
9
|
-
|
|
12
|
+
import pyogrio
|
|
13
|
+
from pyogrio import (
|
|
14
|
+
__gdal_version__,
|
|
15
|
+
read_dataframe,
|
|
16
|
+
read_info,
|
|
17
|
+
list_layers,
|
|
18
|
+
get_gdal_config_option,
|
|
19
|
+
set_gdal_config_options,
|
|
20
|
+
)
|
|
21
|
+
from pyogrio.raw import open_arrow, read_arrow, write, write_arrow
|
|
22
|
+
from pyogrio.errors import DataSourceError, FieldError, DataLayerError
|
|
23
|
+
from pyogrio.tests.conftest import (
|
|
24
|
+
ALL_EXTS,
|
|
25
|
+
DRIVERS,
|
|
26
|
+
DRIVER_EXT,
|
|
27
|
+
requires_arrow_write_api,
|
|
28
|
+
requires_pyarrow_api,
|
|
29
|
+
)
|
|
10
30
|
|
|
11
31
|
try:
|
|
12
32
|
import pandas as pd
|
|
@@ -18,8 +38,9 @@ except ImportError:
|
|
|
18
38
|
pass
|
|
19
39
|
|
|
20
40
|
# skip all tests in this file if Arrow API or GeoPandas are unavailable
|
|
21
|
-
pytestmark =
|
|
41
|
+
pytestmark = requires_pyarrow_api
|
|
22
42
|
pytest.importorskip("geopandas")
|
|
43
|
+
pa = pytest.importorskip("pyarrow")
|
|
23
44
|
|
|
24
45
|
|
|
25
46
|
def test_read_arrow(naturalearth_lowres_all_ext):
|
|
@@ -33,6 +54,12 @@ def test_read_arrow(naturalearth_lowres_all_ext):
|
|
|
33
54
|
assert_geodataframe_equal(result, expected, check_less_precise=check_less_precise)
|
|
34
55
|
|
|
35
56
|
|
|
57
|
+
def test_read_arrow_unspecified_layer_warning(data_dir):
|
|
58
|
+
"""Reading a multi-layer file without specifying a layer gives a warning."""
|
|
59
|
+
with pytest.warns(UserWarning, match="More than one layer found "):
|
|
60
|
+
read_arrow(data_dir / "sample.osm.pbf")
|
|
61
|
+
|
|
62
|
+
|
|
36
63
|
@pytest.mark.parametrize("skip_features, expected", [(10, 167), (200, 0)])
|
|
37
64
|
def test_read_arrow_skip_features(naturalearth_lowres, skip_features, expected):
|
|
38
65
|
table = read_arrow(naturalearth_lowres, skip_features=skip_features)[1]
|
|
@@ -115,6 +142,7 @@ def test_read_arrow_to_pandas_kwargs(test_fgdb_vsi):
|
|
|
115
142
|
arrow_to_pandas_kwargs = {"strings_to_categorical": True}
|
|
116
143
|
result = read_dataframe(
|
|
117
144
|
test_fgdb_vsi,
|
|
145
|
+
layer="basetable_2",
|
|
118
146
|
use_arrow=True,
|
|
119
147
|
arrow_to_pandas_kwargs=arrow_to_pandas_kwargs,
|
|
120
148
|
)
|
|
@@ -128,8 +156,27 @@ def test_read_arrow_raw(naturalearth_lowres):
|
|
|
128
156
|
assert isinstance(table, pyarrow.Table)
|
|
129
157
|
|
|
130
158
|
|
|
131
|
-
def
|
|
132
|
-
|
|
159
|
+
def test_read_arrow_vsi(naturalearth_lowres_vsi):
|
|
160
|
+
table = read_arrow(naturalearth_lowres_vsi[1])[1]
|
|
161
|
+
assert len(table) == 177
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def test_read_arrow_bytes(geojson_bytes):
|
|
165
|
+
meta, table = read_arrow(geojson_bytes)
|
|
166
|
+
|
|
167
|
+
assert meta["fields"].shape == (5,)
|
|
168
|
+
assert len(table) == 3
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def test_read_arrow_filelike(geojson_filelike):
|
|
172
|
+
meta, table = read_arrow(geojson_filelike)
|
|
173
|
+
|
|
174
|
+
assert meta["fields"].shape == (5,)
|
|
175
|
+
assert len(table) == 3
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def test_open_arrow_pyarrow(naturalearth_lowres):
|
|
179
|
+
with open_arrow(naturalearth_lowres, use_pyarrow=True) as (meta, reader):
|
|
133
180
|
assert isinstance(meta, dict)
|
|
134
181
|
assert isinstance(reader, pyarrow.RecordBatchReader)
|
|
135
182
|
assert isinstance(reader.read_all(), pyarrow.Table)
|
|
@@ -139,7 +186,10 @@ def test_open_arrow_batch_size(naturalearth_lowres):
|
|
|
139
186
|
meta, table = read_arrow(naturalearth_lowres)
|
|
140
187
|
batch_size = math.ceil(len(table) / 2)
|
|
141
188
|
|
|
142
|
-
with open_arrow(naturalearth_lowres, batch_size=batch_size) as (
|
|
189
|
+
with open_arrow(naturalearth_lowres, batch_size=batch_size, use_pyarrow=True) as (
|
|
190
|
+
meta,
|
|
191
|
+
reader,
|
|
192
|
+
):
|
|
143
193
|
assert isinstance(meta, dict)
|
|
144
194
|
assert isinstance(reader, pyarrow.RecordBatchReader)
|
|
145
195
|
count = 0
|
|
@@ -185,6 +235,49 @@ def test_open_arrow_max_features_unsupported(naturalearth_lowres, max_features):
|
|
|
185
235
|
pass
|
|
186
236
|
|
|
187
237
|
|
|
238
|
+
@pytest.mark.skipif(
|
|
239
|
+
__gdal_version__ < (3, 8, 0),
|
|
240
|
+
reason="returns geoarrow metadata only for GDAL>=3.8.0",
|
|
241
|
+
)
|
|
242
|
+
def test_read_arrow_geoarrow_metadata(naturalearth_lowres):
|
|
243
|
+
_meta, table = read_arrow(naturalearth_lowres)
|
|
244
|
+
field = table.schema.field("wkb_geometry")
|
|
245
|
+
assert field.metadata[b"ARROW:extension:name"] == b"geoarrow.wkb"
|
|
246
|
+
parsed_meta = json.loads(field.metadata[b"ARROW:extension:metadata"])
|
|
247
|
+
assert parsed_meta["crs"]["id"]["authority"] == "EPSG"
|
|
248
|
+
assert parsed_meta["crs"]["id"]["code"] == 4326
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def test_open_arrow_capsule_protocol(naturalearth_lowres):
|
|
252
|
+
pytest.importorskip("pyarrow", minversion="14")
|
|
253
|
+
|
|
254
|
+
with open_arrow(naturalearth_lowres) as (meta, reader):
|
|
255
|
+
assert isinstance(meta, dict)
|
|
256
|
+
assert isinstance(reader, pyogrio._io._ArrowStream)
|
|
257
|
+
|
|
258
|
+
result = pyarrow.table(reader)
|
|
259
|
+
|
|
260
|
+
_, expected = read_arrow(naturalearth_lowres)
|
|
261
|
+
assert result.equals(expected)
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
def test_open_arrow_capsule_protocol_without_pyarrow(naturalearth_lowres):
|
|
265
|
+
pyarrow = pytest.importorskip("pyarrow", minversion="14")
|
|
266
|
+
|
|
267
|
+
# Make PyArrow temporarily unavailable (importing will fail)
|
|
268
|
+
sys.modules["pyarrow"] = None
|
|
269
|
+
try:
|
|
270
|
+
with open_arrow(naturalearth_lowres) as (meta, reader):
|
|
271
|
+
assert isinstance(meta, dict)
|
|
272
|
+
assert isinstance(reader, pyogrio._io._ArrowStream)
|
|
273
|
+
result = pyarrow.table(reader)
|
|
274
|
+
finally:
|
|
275
|
+
sys.modules["pyarrow"] = pyarrow
|
|
276
|
+
|
|
277
|
+
_, expected = read_arrow(naturalearth_lowres)
|
|
278
|
+
assert result.equals(expected)
|
|
279
|
+
|
|
280
|
+
|
|
188
281
|
@contextlib.contextmanager
|
|
189
282
|
def use_arrow_context():
|
|
190
283
|
original = os.environ.get("PYOGRIO_USE_ARROW", None)
|
|
@@ -205,3 +298,744 @@ def test_enable_with_environment_variable(test_ogr_types_list):
|
|
|
205
298
|
with use_arrow_context():
|
|
206
299
|
result = read_dataframe(test_ogr_types_list)
|
|
207
300
|
assert "list_int64" in result.columns
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
@pytest.mark.skipif(
|
|
304
|
+
__gdal_version__ < (3, 8, 3), reason="Arrow bool value bug fixed in GDAL >= 3.8.3"
|
|
305
|
+
)
|
|
306
|
+
@pytest.mark.parametrize("ext", ALL_EXTS)
|
|
307
|
+
def test_arrow_bool_roundtrip(tmp_path, ext):
|
|
308
|
+
filename = tmp_path / f"test{ext}"
|
|
309
|
+
|
|
310
|
+
# Point(0, 0)
|
|
311
|
+
geometry = np.array(
|
|
312
|
+
[bytes.fromhex("010100000000000000000000000000000000000000")] * 5, dtype=object
|
|
313
|
+
)
|
|
314
|
+
bool_col = np.array([True, False, True, False, True])
|
|
315
|
+
field_data = [bool_col]
|
|
316
|
+
fields = ["bool_col"]
|
|
317
|
+
|
|
318
|
+
kwargs = {}
|
|
319
|
+
|
|
320
|
+
if ext == ".fgb":
|
|
321
|
+
# For .fgb, spatial_index=False to avoid the rows being reordered
|
|
322
|
+
kwargs["spatial_index"] = False
|
|
323
|
+
|
|
324
|
+
write(
|
|
325
|
+
filename,
|
|
326
|
+
geometry,
|
|
327
|
+
field_data,
|
|
328
|
+
fields,
|
|
329
|
+
geometry_type="Point",
|
|
330
|
+
crs="EPSG:4326",
|
|
331
|
+
**kwargs,
|
|
332
|
+
)
|
|
333
|
+
|
|
334
|
+
write(
|
|
335
|
+
filename, geometry, field_data, fields, geometry_type="Point", crs="EPSG:4326"
|
|
336
|
+
)
|
|
337
|
+
table = read_arrow(filename)[1]
|
|
338
|
+
|
|
339
|
+
assert np.array_equal(table["bool_col"].to_numpy(), bool_col)
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
@pytest.mark.skipif(
|
|
343
|
+
__gdal_version__ >= (3, 8, 3), reason="Arrow bool value bug fixed in GDAL >= 3.8.3"
|
|
344
|
+
)
|
|
345
|
+
@pytest.mark.parametrize("ext", ALL_EXTS)
|
|
346
|
+
def test_arrow_bool_exception(tmp_path, ext):
|
|
347
|
+
filename = tmp_path / f"test{ext}"
|
|
348
|
+
|
|
349
|
+
# Point(0, 0)
|
|
350
|
+
geometry = np.array(
|
|
351
|
+
[bytes.fromhex("010100000000000000000000000000000000000000")] * 5, dtype=object
|
|
352
|
+
)
|
|
353
|
+
bool_col = np.array([True, False, True, False, True])
|
|
354
|
+
field_data = [bool_col]
|
|
355
|
+
fields = ["bool_col"]
|
|
356
|
+
|
|
357
|
+
write(
|
|
358
|
+
filename, geometry, field_data, fields, geometry_type="Point", crs="EPSG:4326"
|
|
359
|
+
)
|
|
360
|
+
|
|
361
|
+
if ext in {".fgb", ".gpkg"}:
|
|
362
|
+
# only raise exception for GPKG / FGB
|
|
363
|
+
with pytest.raises(
|
|
364
|
+
RuntimeError,
|
|
365
|
+
match="GDAL < 3.8.3 does not correctly read boolean data values using "
|
|
366
|
+
"the Arrow API",
|
|
367
|
+
):
|
|
368
|
+
with open_arrow(filename):
|
|
369
|
+
pass
|
|
370
|
+
|
|
371
|
+
# do not raise exception if no bool columns are read
|
|
372
|
+
with open_arrow(filename, columns=[]):
|
|
373
|
+
pass
|
|
374
|
+
|
|
375
|
+
else:
|
|
376
|
+
with open_arrow(filename):
|
|
377
|
+
pass
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
# Point(0, 0)
|
|
381
|
+
points = np.array(
|
|
382
|
+
[bytes.fromhex("010100000000000000000000000000000000000000")] * 3,
|
|
383
|
+
dtype=object,
|
|
384
|
+
)
|
|
385
|
+
|
|
386
|
+
|
|
387
|
+
@requires_arrow_write_api
|
|
388
|
+
def test_write_shp(tmp_path, naturalearth_lowres):
|
|
389
|
+
meta, table = read_arrow(naturalearth_lowres)
|
|
390
|
+
|
|
391
|
+
filename = tmp_path / "test.shp"
|
|
392
|
+
write_arrow(
|
|
393
|
+
table,
|
|
394
|
+
filename,
|
|
395
|
+
crs=meta["crs"],
|
|
396
|
+
encoding=meta["encoding"],
|
|
397
|
+
geometry_type=meta["geometry_type"],
|
|
398
|
+
geometry_name=meta["geometry_name"] or "wkb_geometry",
|
|
399
|
+
)
|
|
400
|
+
|
|
401
|
+
assert filename.exists()
|
|
402
|
+
for ext in (".dbf", ".prj"):
|
|
403
|
+
assert filename.with_suffix(ext).exists()
|
|
404
|
+
|
|
405
|
+
|
|
406
|
+
@pytest.mark.filterwarnings("ignore:A geometry of type POLYGON is inserted")
|
|
407
|
+
@requires_arrow_write_api
|
|
408
|
+
def test_write_gpkg(tmp_path, naturalearth_lowres):
|
|
409
|
+
meta, table = read_arrow(naturalearth_lowres)
|
|
410
|
+
|
|
411
|
+
filename = tmp_path / "test.gpkg"
|
|
412
|
+
write_arrow(
|
|
413
|
+
table,
|
|
414
|
+
filename,
|
|
415
|
+
driver="GPKG",
|
|
416
|
+
crs=meta["crs"],
|
|
417
|
+
geometry_type="MultiPolygon",
|
|
418
|
+
geometry_name=meta["geometry_name"] or "wkb_geometry",
|
|
419
|
+
)
|
|
420
|
+
|
|
421
|
+
assert filename.exists()
|
|
422
|
+
|
|
423
|
+
|
|
424
|
+
@pytest.mark.filterwarnings("ignore:A geometry of type POLYGON is inserted")
|
|
425
|
+
@requires_arrow_write_api
|
|
426
|
+
def test_write_gpkg_multiple_layers(tmp_path, naturalearth_lowres):
|
|
427
|
+
meta, table = read_arrow(naturalearth_lowres)
|
|
428
|
+
meta["geometry_type"] = "MultiPolygon"
|
|
429
|
+
|
|
430
|
+
filename = tmp_path / "test.gpkg"
|
|
431
|
+
write_arrow(
|
|
432
|
+
table,
|
|
433
|
+
filename,
|
|
434
|
+
driver="GPKG",
|
|
435
|
+
layer="first",
|
|
436
|
+
crs=meta["crs"],
|
|
437
|
+
geometry_type="MultiPolygon",
|
|
438
|
+
geometry_name=meta["geometry_name"] or "wkb_geometry",
|
|
439
|
+
)
|
|
440
|
+
|
|
441
|
+
assert filename.exists()
|
|
442
|
+
|
|
443
|
+
assert np.array_equal(list_layers(filename), [["first", "MultiPolygon"]])
|
|
444
|
+
|
|
445
|
+
write_arrow(
|
|
446
|
+
table,
|
|
447
|
+
filename,
|
|
448
|
+
driver="GPKG",
|
|
449
|
+
layer="second",
|
|
450
|
+
crs=meta["crs"],
|
|
451
|
+
geometry_type="MultiPolygon",
|
|
452
|
+
geometry_name=meta["geometry_name"] or "wkb_geometry",
|
|
453
|
+
)
|
|
454
|
+
|
|
455
|
+
assert np.array_equal(
|
|
456
|
+
list_layers(filename), [["first", "MultiPolygon"], ["second", "MultiPolygon"]]
|
|
457
|
+
)
|
|
458
|
+
|
|
459
|
+
|
|
460
|
+
@requires_arrow_write_api
|
|
461
|
+
def test_write_geojson(tmp_path, naturalearth_lowres):
|
|
462
|
+
meta, table = read_arrow(naturalearth_lowres)
|
|
463
|
+
filename = tmp_path / "test.json"
|
|
464
|
+
write_arrow(
|
|
465
|
+
table,
|
|
466
|
+
filename,
|
|
467
|
+
driver="GeoJSON",
|
|
468
|
+
crs=meta["crs"],
|
|
469
|
+
geometry_type=meta["geometry_type"],
|
|
470
|
+
geometry_name=meta["geometry_name"] or "wkb_geometry",
|
|
471
|
+
)
|
|
472
|
+
|
|
473
|
+
assert filename.exists()
|
|
474
|
+
|
|
475
|
+
data = json.loads(open(filename).read())
|
|
476
|
+
|
|
477
|
+
assert data["type"] == "FeatureCollection"
|
|
478
|
+
assert data["name"] == "test"
|
|
479
|
+
assert "crs" in data
|
|
480
|
+
assert len(data["features"]) == len(table)
|
|
481
|
+
assert not len(
|
|
482
|
+
set(meta["fields"]).difference(data["features"][0]["properties"].keys())
|
|
483
|
+
)
|
|
484
|
+
|
|
485
|
+
|
|
486
|
+
@pytest.mark.parametrize(
|
|
487
|
+
"driver",
|
|
488
|
+
{
|
|
489
|
+
driver
|
|
490
|
+
for driver in DRIVERS.values()
|
|
491
|
+
if driver not in ("ESRI Shapefile", "GPKG", "GeoJSON")
|
|
492
|
+
},
|
|
493
|
+
)
|
|
494
|
+
@requires_arrow_write_api
|
|
495
|
+
def test_write_supported(tmp_path, naturalearth_lowres, driver):
|
|
496
|
+
"""Test drivers known to work that are not specifically tested above"""
|
|
497
|
+
meta, table = read_arrow(naturalearth_lowres, columns=["iso_a3"], max_features=1)
|
|
498
|
+
|
|
499
|
+
# note: naturalearth_lowres contains mixed polygons / multipolygons, which
|
|
500
|
+
# are not supported in mixed form for all drivers. To get around this here
|
|
501
|
+
# we take the first record only.
|
|
502
|
+
meta["geometry_type"] = "MultiPolygon"
|
|
503
|
+
|
|
504
|
+
filename = tmp_path / f"test{DRIVER_EXT[driver]}"
|
|
505
|
+
write_arrow(
|
|
506
|
+
table,
|
|
507
|
+
filename,
|
|
508
|
+
driver=driver,
|
|
509
|
+
crs=meta["crs"],
|
|
510
|
+
geometry_type=meta["geometry_type"],
|
|
511
|
+
geometry_name=meta["geometry_name"] or "wkb_geometry",
|
|
512
|
+
)
|
|
513
|
+
assert filename.exists()
|
|
514
|
+
|
|
515
|
+
|
|
516
|
+
@requires_arrow_write_api
|
|
517
|
+
def test_write_unsupported(tmp_path, naturalearth_lowres):
|
|
518
|
+
meta, table = read_arrow(naturalearth_lowres)
|
|
519
|
+
|
|
520
|
+
with pytest.raises(DataSourceError, match="does not support write functionality"):
|
|
521
|
+
write_arrow(
|
|
522
|
+
table,
|
|
523
|
+
tmp_path / "test.json",
|
|
524
|
+
driver="ESRIJSON",
|
|
525
|
+
crs=meta["crs"],
|
|
526
|
+
geometry_type=meta["geometry_type"],
|
|
527
|
+
geometry_name=meta["geometry_name"] or "wkb_geometry",
|
|
528
|
+
)
|
|
529
|
+
|
|
530
|
+
|
|
531
|
+
@pytest.mark.parametrize("ext", DRIVERS)
|
|
532
|
+
@requires_arrow_write_api
|
|
533
|
+
def test_write_append(request, tmp_path, naturalearth_lowres, ext):
|
|
534
|
+
if ext.startswith(".geojson"):
|
|
535
|
+
# Bug in GDAL when appending int64 to GeoJSON
|
|
536
|
+
# (https://github.com/OSGeo/gdal/issues/9792)
|
|
537
|
+
request.node.add_marker(
|
|
538
|
+
pytest.mark.xfail(reason="Bugs with append when writing Arrow to GeoJSON")
|
|
539
|
+
)
|
|
540
|
+
|
|
541
|
+
meta, table = read_arrow(naturalearth_lowres)
|
|
542
|
+
|
|
543
|
+
# coerce output layer to generic Geometry to avoid mixed type errors
|
|
544
|
+
meta["geometry_type"] = "Unknown"
|
|
545
|
+
|
|
546
|
+
filename = tmp_path / f"test{ext}"
|
|
547
|
+
write_arrow(
|
|
548
|
+
table,
|
|
549
|
+
filename,
|
|
550
|
+
crs=meta["crs"],
|
|
551
|
+
geometry_type=meta["geometry_type"],
|
|
552
|
+
geometry_name=meta["geometry_name"] or "wkb_geometry",
|
|
553
|
+
)
|
|
554
|
+
assert filename.exists()
|
|
555
|
+
assert read_info(filename)["features"] == 177
|
|
556
|
+
|
|
557
|
+
# write the same records again
|
|
558
|
+
write_arrow(
|
|
559
|
+
table,
|
|
560
|
+
filename,
|
|
561
|
+
append=True,
|
|
562
|
+
crs=meta["crs"],
|
|
563
|
+
geometry_type=meta["geometry_type"],
|
|
564
|
+
geometry_name=meta["geometry_name"] or "wkb_geometry",
|
|
565
|
+
)
|
|
566
|
+
assert read_info(filename)["features"] == 354
|
|
567
|
+
|
|
568
|
+
|
|
569
|
+
@pytest.mark.parametrize("driver,ext", [("GML", ".gml"), ("GeoJSONSeq", ".geojsons")])
|
|
570
|
+
@requires_arrow_write_api
|
|
571
|
+
def test_write_append_unsupported(tmp_path, naturalearth_lowres, driver, ext):
|
|
572
|
+
meta, table = read_arrow(naturalearth_lowres)
|
|
573
|
+
|
|
574
|
+
# GML does not support append functionality
|
|
575
|
+
filename = tmp_path / "test.gml"
|
|
576
|
+
write_arrow(
|
|
577
|
+
table,
|
|
578
|
+
filename,
|
|
579
|
+
driver="GML",
|
|
580
|
+
crs=meta["crs"],
|
|
581
|
+
geometry_type=meta["geometry_type"],
|
|
582
|
+
geometry_name=meta["geometry_name"] or "wkb_geometry",
|
|
583
|
+
)
|
|
584
|
+
assert filename.exists()
|
|
585
|
+
assert read_info(filename, force_feature_count=True)["features"] == 177
|
|
586
|
+
|
|
587
|
+
with pytest.raises(DataSourceError):
|
|
588
|
+
write_arrow(
|
|
589
|
+
table,
|
|
590
|
+
filename,
|
|
591
|
+
driver="GML",
|
|
592
|
+
append=True,
|
|
593
|
+
crs=meta["crs"],
|
|
594
|
+
geometry_type=meta["geometry_type"],
|
|
595
|
+
geometry_name=meta["geometry_name"] or "wkb_geometry",
|
|
596
|
+
)
|
|
597
|
+
|
|
598
|
+
|
|
599
|
+
@requires_arrow_write_api
|
|
600
|
+
def test_write_gdalclose_error(naturalearth_lowres):
|
|
601
|
+
meta, table = read_arrow(naturalearth_lowres)
|
|
602
|
+
|
|
603
|
+
filename = "s3://non-existing-bucket/test.geojson"
|
|
604
|
+
|
|
605
|
+
# set config options to avoid errors on open due to GDAL S3 configuration
|
|
606
|
+
set_gdal_config_options(
|
|
607
|
+
{
|
|
608
|
+
"AWS_ACCESS_KEY_ID": "invalid",
|
|
609
|
+
"AWS_SECRET_ACCESS_KEY": "invalid",
|
|
610
|
+
"AWS_NO_SIGN_REQUEST": True,
|
|
611
|
+
}
|
|
612
|
+
)
|
|
613
|
+
|
|
614
|
+
with pytest.raises(DataSourceError, match="Failed to write features to dataset"):
|
|
615
|
+
write_arrow(
|
|
616
|
+
table,
|
|
617
|
+
filename,
|
|
618
|
+
crs=meta["crs"],
|
|
619
|
+
geometry_type=meta["geometry_type"],
|
|
620
|
+
geometry_name=meta["geometry_name"] or "wkb_geometry",
|
|
621
|
+
)
|
|
622
|
+
|
|
623
|
+
|
|
624
|
+
@requires_arrow_write_api
|
|
625
|
+
@pytest.mark.parametrize("name", ["geoarrow.wkb", "ogc.wkb"])
|
|
626
|
+
def test_write_geometry_extension_type(tmp_path, naturalearth_lowres, name):
|
|
627
|
+
# Infer geometry column based on extension name
|
|
628
|
+
# instead of passing `geometry_name` explicitly
|
|
629
|
+
meta, table = read_arrow(naturalearth_lowres)
|
|
630
|
+
|
|
631
|
+
# change extension type name
|
|
632
|
+
idx = table.schema.get_field_index("wkb_geometry")
|
|
633
|
+
new_field = table.schema.field(idx).with_metadata({"ARROW:extension:name": name})
|
|
634
|
+
new_table = table.cast(table.schema.set(idx, new_field))
|
|
635
|
+
|
|
636
|
+
filename = tmp_path / "test_geoarrow.shp"
|
|
637
|
+
write_arrow(
|
|
638
|
+
new_table,
|
|
639
|
+
filename,
|
|
640
|
+
crs=meta["crs"],
|
|
641
|
+
geometry_type=meta["geometry_type"],
|
|
642
|
+
)
|
|
643
|
+
_, table_roundtripped = read_arrow(filename)
|
|
644
|
+
assert table_roundtripped.equals(table)
|
|
645
|
+
|
|
646
|
+
|
|
647
|
+
@requires_arrow_write_api
|
|
648
|
+
def test_write_unsupported_geoarrow(tmp_path, naturalearth_lowres):
|
|
649
|
+
meta, table = read_arrow(naturalearth_lowres)
|
|
650
|
+
|
|
651
|
+
# change extension type name (the name doesn't match with the column type
|
|
652
|
+
# for correct geoarrow data, but our writing code checks it based on the name)
|
|
653
|
+
idx = table.schema.get_field_index("wkb_geometry")
|
|
654
|
+
new_field = table.schema.field(idx).with_metadata(
|
|
655
|
+
{"ARROW:extension:name": "geoarrow.point"}
|
|
656
|
+
)
|
|
657
|
+
new_table = table.cast(table.schema.set(idx, new_field))
|
|
658
|
+
|
|
659
|
+
with pytest.raises(
|
|
660
|
+
NotImplementedError,
|
|
661
|
+
match="Writing a geometry column of type geoarrow.point is not yet supported",
|
|
662
|
+
):
|
|
663
|
+
write_arrow(
|
|
664
|
+
new_table,
|
|
665
|
+
tmp_path / "test_geoarrow.shp",
|
|
666
|
+
crs=meta["crs"],
|
|
667
|
+
geometry_type=meta["geometry_type"],
|
|
668
|
+
)
|
|
669
|
+
|
|
670
|
+
|
|
671
|
+
@requires_arrow_write_api
|
|
672
|
+
def test_write_no_geom(tmp_path, naturalearth_lowres):
|
|
673
|
+
_, table = read_arrow(naturalearth_lowres)
|
|
674
|
+
table = table.drop_columns("wkb_geometry")
|
|
675
|
+
|
|
676
|
+
# Test
|
|
677
|
+
filename = tmp_path / "test.gpkg"
|
|
678
|
+
write_arrow(table, filename)
|
|
679
|
+
# Check result
|
|
680
|
+
assert filename.exists()
|
|
681
|
+
meta, result = read_arrow(filename)
|
|
682
|
+
assert meta["crs"] is None
|
|
683
|
+
assert meta["geometry_type"] is None
|
|
684
|
+
assert table.equals(result)
|
|
685
|
+
|
|
686
|
+
|
|
687
|
+
@requires_arrow_write_api
|
|
688
|
+
def test_write_geometry_type(tmp_path, naturalearth_lowres):
|
|
689
|
+
meta, table = read_arrow(naturalearth_lowres)
|
|
690
|
+
|
|
691
|
+
# Not specifying the geometry currently raises an error
|
|
692
|
+
with pytest.raises(ValueError, match="'geometry_type' keyword is required"):
|
|
693
|
+
write_arrow(
|
|
694
|
+
table,
|
|
695
|
+
tmp_path / "test.shp",
|
|
696
|
+
crs=meta["crs"],
|
|
697
|
+
geometry_name=meta["geometry_name"] or "wkb_geometry",
|
|
698
|
+
)
|
|
699
|
+
|
|
700
|
+
# Specifying "Unknown" works and will create generic layer
|
|
701
|
+
filename = tmp_path / "test.gpkg"
|
|
702
|
+
write_arrow(
|
|
703
|
+
table,
|
|
704
|
+
filename,
|
|
705
|
+
crs=meta["crs"],
|
|
706
|
+
geometry_type="Unknown",
|
|
707
|
+
geometry_name=meta["geometry_name"] or "wkb_geometry",
|
|
708
|
+
)
|
|
709
|
+
assert filename.exists()
|
|
710
|
+
meta_written, _ = read_arrow(filename)
|
|
711
|
+
assert meta_written["geometry_type"] == "Unknown"
|
|
712
|
+
|
|
713
|
+
|
|
714
|
+
@requires_arrow_write_api
|
|
715
|
+
def test_write_raise_promote_to_multi(tmp_path, naturalearth_lowres):
|
|
716
|
+
meta, table = read_arrow(naturalearth_lowres)
|
|
717
|
+
|
|
718
|
+
with pytest.raises(
|
|
719
|
+
ValueError, match="The 'promote_to_multi' option is not supported"
|
|
720
|
+
):
|
|
721
|
+
write_arrow(
|
|
722
|
+
table,
|
|
723
|
+
tmp_path / "test.shp",
|
|
724
|
+
crs=meta["crs"],
|
|
725
|
+
geometry_type=meta["geometry_type"],
|
|
726
|
+
geometry_name=meta["geometry_name"] or "wkb_geometry",
|
|
727
|
+
promote_to_multi=True,
|
|
728
|
+
)
|
|
729
|
+
|
|
730
|
+
|
|
731
|
+
@requires_arrow_write_api
|
|
732
|
+
def test_write_no_crs(tmp_path, naturalearth_lowres):
|
|
733
|
+
meta, table = read_arrow(naturalearth_lowres)
|
|
734
|
+
|
|
735
|
+
filename = tmp_path / "test.shp"
|
|
736
|
+
with pytest.warns(UserWarning, match="'crs' was not provided"):
|
|
737
|
+
write_arrow(
|
|
738
|
+
table,
|
|
739
|
+
filename,
|
|
740
|
+
geometry_type=meta["geometry_type"],
|
|
741
|
+
geometry_name=meta["geometry_name"] or "wkb_geometry",
|
|
742
|
+
)
|
|
743
|
+
# apart from CRS warning, it did write correctly
|
|
744
|
+
meta_result, result = read_arrow(filename)
|
|
745
|
+
assert table.equals(result)
|
|
746
|
+
assert meta_result["crs"] is None
|
|
747
|
+
|
|
748
|
+
|
|
749
|
+
@requires_arrow_write_api
|
|
750
|
+
def test_write_non_arrow_data(tmp_path):
|
|
751
|
+
data = np.array([1, 2, 3])
|
|
752
|
+
with pytest.raises(
|
|
753
|
+
ValueError, match="The provided data is not recognized as Arrow data"
|
|
754
|
+
):
|
|
755
|
+
write_arrow(
|
|
756
|
+
data,
|
|
757
|
+
tmp_path / "test_no_arrow_data.shp",
|
|
758
|
+
crs="EPSG:4326",
|
|
759
|
+
geometry_type="Point",
|
|
760
|
+
geometry_name="geometry",
|
|
761
|
+
)
|
|
762
|
+
|
|
763
|
+
|
|
764
|
+
@pytest.mark.skipif(
|
|
765
|
+
Version(pa.__version__) < Version("16.0.0.dev0"),
|
|
766
|
+
reason="PyCapsule protocol only added to pyarrow.ChunkedArray in pyarrow 16",
|
|
767
|
+
)
|
|
768
|
+
@requires_arrow_write_api
|
|
769
|
+
def test_write_non_arrow_tabular_data(tmp_path):
|
|
770
|
+
data = pa.chunked_array([[1, 2, 3], [4, 5, 6]])
|
|
771
|
+
with pytest.raises(
|
|
772
|
+
DataLayerError,
|
|
773
|
+
match=".*should be called on a schema that is a struct of fields",
|
|
774
|
+
):
|
|
775
|
+
write_arrow(
|
|
776
|
+
data,
|
|
777
|
+
tmp_path / "test_no_arrow_tabular_data.shp",
|
|
778
|
+
crs="EPSG:4326",
|
|
779
|
+
geometry_type="Point",
|
|
780
|
+
geometry_name="geometry",
|
|
781
|
+
)
|
|
782
|
+
|
|
783
|
+
|
|
784
|
+
@pytest.mark.filterwarnings("ignore:.*not handled natively:RuntimeWarning")
|
|
785
|
+
@requires_arrow_write_api
|
|
786
|
+
def test_write_batch_error_message(tmp_path):
|
|
787
|
+
# raise the correct error and message from GDAL when an error happens
|
|
788
|
+
# while writing
|
|
789
|
+
|
|
790
|
+
# invalid dictionary array that will only error while writing (schema
|
|
791
|
+
# itself is OK)
|
|
792
|
+
arr = pa.DictionaryArray.from_buffers(
|
|
793
|
+
pa.dictionary(pa.int64(), pa.string()),
|
|
794
|
+
length=3,
|
|
795
|
+
buffers=pa.array([0, 1, 2]).buffers(),
|
|
796
|
+
dictionary=pa.array(["a", "b"]),
|
|
797
|
+
)
|
|
798
|
+
table = pa.table({"geometry": points, "col": arr})
|
|
799
|
+
|
|
800
|
+
with pytest.raises(DataLayerError, match=".*invalid dictionary index"):
|
|
801
|
+
write_arrow(
|
|
802
|
+
table,
|
|
803
|
+
tmp_path / "test_unsupported_list_type.fgb",
|
|
804
|
+
crs="EPSG:4326",
|
|
805
|
+
geometry_type="Point",
|
|
806
|
+
geometry_name="geometry",
|
|
807
|
+
)
|
|
808
|
+
|
|
809
|
+
|
|
810
|
+
@requires_arrow_write_api
|
|
811
|
+
def test_write_schema_error_message(tmp_path):
|
|
812
|
+
# raise the correct error and message from GDAL when an error happens
|
|
813
|
+
# creating the fields from the schema
|
|
814
|
+
# (using complex list of map of integer->integer which is not supported by GDAL)
|
|
815
|
+
table = pa.table(
|
|
816
|
+
{
|
|
817
|
+
"geometry": points,
|
|
818
|
+
"col": pa.array(
|
|
819
|
+
[[[(1, 2), (3, 4)], None, [(5, 6)]]] * 3,
|
|
820
|
+
pa.list_(pa.map_(pa.int64(), pa.int64())),
|
|
821
|
+
),
|
|
822
|
+
}
|
|
823
|
+
)
|
|
824
|
+
|
|
825
|
+
with pytest.raises(FieldError, match=".*not supported"):
|
|
826
|
+
write_arrow(
|
|
827
|
+
table,
|
|
828
|
+
tmp_path / "test_unsupported_map_type.shp",
|
|
829
|
+
crs="EPSG:4326",
|
|
830
|
+
geometry_type="Point",
|
|
831
|
+
geometry_name="geometry",
|
|
832
|
+
)
|
|
833
|
+
|
|
834
|
+
|
|
835
|
+
@requires_arrow_write_api
|
|
836
|
+
@pytest.mark.filterwarnings("ignore:File /vsimem:RuntimeWarning")
|
|
837
|
+
@pytest.mark.parametrize("driver", ["GeoJSON", "GPKG"])
|
|
838
|
+
def test_write_memory(naturalearth_lowres, driver):
|
|
839
|
+
meta, table = read_arrow(naturalearth_lowres, max_features=1)
|
|
840
|
+
meta["geometry_type"] = "MultiPolygon"
|
|
841
|
+
|
|
842
|
+
buffer = BytesIO()
|
|
843
|
+
write_arrow(
|
|
844
|
+
table,
|
|
845
|
+
buffer,
|
|
846
|
+
driver=driver,
|
|
847
|
+
layer="test",
|
|
848
|
+
crs=meta["crs"],
|
|
849
|
+
geometry_type=meta["geometry_type"],
|
|
850
|
+
geometry_name=meta["geometry_name"] or "wkb_geometry",
|
|
851
|
+
)
|
|
852
|
+
|
|
853
|
+
assert len(buffer.getbuffer()) > 0
|
|
854
|
+
assert list_layers(buffer)[0][0] == "test"
|
|
855
|
+
|
|
856
|
+
actual_meta, actual_table = read_arrow(buffer)
|
|
857
|
+
assert len(actual_table) == len(table)
|
|
858
|
+
assert np.array_equal(actual_meta["fields"], meta["fields"])
|
|
859
|
+
|
|
860
|
+
|
|
861
|
+
@requires_arrow_write_api
|
|
862
|
+
def test_write_memory_driver_required(naturalearth_lowres):
|
|
863
|
+
meta, table = read_arrow(naturalearth_lowres, max_features=1)
|
|
864
|
+
|
|
865
|
+
buffer = BytesIO()
|
|
866
|
+
with pytest.raises(
|
|
867
|
+
ValueError,
|
|
868
|
+
match="driver must be provided to write to in-memory file",
|
|
869
|
+
):
|
|
870
|
+
write_arrow(
|
|
871
|
+
table,
|
|
872
|
+
buffer,
|
|
873
|
+
driver=None,
|
|
874
|
+
layer="test",
|
|
875
|
+
crs=meta["crs"],
|
|
876
|
+
geometry_type=meta["geometry_type"],
|
|
877
|
+
geometry_name=meta["geometry_name"] or "wkb_geometry",
|
|
878
|
+
)
|
|
879
|
+
|
|
880
|
+
|
|
881
|
+
@requires_arrow_write_api
|
|
882
|
+
@pytest.mark.parametrize("driver", ["ESRI Shapefile", "OpenFileGDB"])
|
|
883
|
+
def test_write_memory_unsupported_driver(naturalearth_lowres, driver):
|
|
884
|
+
if driver == "OpenFileGDB" and __gdal_version__ < (3, 6, 0):
|
|
885
|
+
pytest.skip("OpenFileGDB write support only available for GDAL >= 3.6.0")
|
|
886
|
+
|
|
887
|
+
meta, table = read_arrow(naturalearth_lowres, max_features=1)
|
|
888
|
+
|
|
889
|
+
buffer = BytesIO()
|
|
890
|
+
|
|
891
|
+
with pytest.raises(
|
|
892
|
+
ValueError, match=f"writing to in-memory file is not supported for {driver}"
|
|
893
|
+
):
|
|
894
|
+
write_arrow(
|
|
895
|
+
table,
|
|
896
|
+
buffer,
|
|
897
|
+
driver=driver,
|
|
898
|
+
layer="test",
|
|
899
|
+
crs=meta["crs"],
|
|
900
|
+
geometry_type=meta["geometry_type"],
|
|
901
|
+
geometry_name=meta["geometry_name"] or "wkb_geometry",
|
|
902
|
+
)
|
|
903
|
+
|
|
904
|
+
|
|
905
|
+
@requires_arrow_write_api
|
|
906
|
+
@pytest.mark.parametrize("driver", ["GeoJSON", "GPKG"])
|
|
907
|
+
def test_write_memory_append_unsupported(naturalearth_lowres, driver):
|
|
908
|
+
meta, table = read_arrow(naturalearth_lowres, max_features=1)
|
|
909
|
+
meta["geometry_type"] = "MultiPolygon"
|
|
910
|
+
|
|
911
|
+
buffer = BytesIO()
|
|
912
|
+
with pytest.raises(
|
|
913
|
+
NotImplementedError, match="append is not supported for in-memory files"
|
|
914
|
+
):
|
|
915
|
+
write_arrow(
|
|
916
|
+
table,
|
|
917
|
+
buffer,
|
|
918
|
+
driver=driver,
|
|
919
|
+
layer="test",
|
|
920
|
+
crs=meta["crs"],
|
|
921
|
+
geometry_type=meta["geometry_type"],
|
|
922
|
+
geometry_name=meta["geometry_name"] or "wkb_geometry",
|
|
923
|
+
append=True,
|
|
924
|
+
)
|
|
925
|
+
|
|
926
|
+
|
|
927
|
+
@requires_arrow_write_api
|
|
928
|
+
def test_write_memory_existing_unsupported(naturalearth_lowres):
|
|
929
|
+
meta, table = read_arrow(naturalearth_lowres, max_features=1)
|
|
930
|
+
meta["geometry_type"] = "MultiPolygon"
|
|
931
|
+
|
|
932
|
+
buffer = BytesIO(b"0000")
|
|
933
|
+
with pytest.raises(
|
|
934
|
+
NotImplementedError,
|
|
935
|
+
match="writing to existing in-memory object is not supported",
|
|
936
|
+
):
|
|
937
|
+
write_arrow(
|
|
938
|
+
table,
|
|
939
|
+
buffer,
|
|
940
|
+
driver="GeoJSON",
|
|
941
|
+
layer="test",
|
|
942
|
+
crs=meta["crs"],
|
|
943
|
+
geometry_type=meta["geometry_type"],
|
|
944
|
+
geometry_name=meta["geometry_name"] or "wkb_geometry",
|
|
945
|
+
)
|
|
946
|
+
|
|
947
|
+
|
|
948
|
+
@requires_arrow_write_api
|
|
949
|
+
def test_non_utf8_encoding_io_shapefile(tmp_path, encoded_text):
|
|
950
|
+
encoding, text = encoded_text
|
|
951
|
+
|
|
952
|
+
table = pa.table(
|
|
953
|
+
{
|
|
954
|
+
# Point(0, 0)
|
|
955
|
+
"geometry": pa.array(
|
|
956
|
+
[bytes.fromhex("010100000000000000000000000000000000000000")]
|
|
957
|
+
),
|
|
958
|
+
text: pa.array([text]),
|
|
959
|
+
}
|
|
960
|
+
)
|
|
961
|
+
|
|
962
|
+
filename = tmp_path / "test.shp"
|
|
963
|
+
write_arrow(
|
|
964
|
+
table,
|
|
965
|
+
filename,
|
|
966
|
+
geometry_type="Point",
|
|
967
|
+
geometry_name="geometry",
|
|
968
|
+
crs="EPSG:4326",
|
|
969
|
+
encoding=encoding,
|
|
970
|
+
)
|
|
971
|
+
|
|
972
|
+
# NOTE: GDAL automatically creates a cpg file with the encoding name, which
|
|
973
|
+
# means that if we read this without specifying the encoding it uses the
|
|
974
|
+
# correct one
|
|
975
|
+
schema, table = read_arrow(filename)
|
|
976
|
+
assert schema["fields"][0] == text
|
|
977
|
+
assert table[text][0].as_py() == text
|
|
978
|
+
|
|
979
|
+
# verify that if cpg file is not present, that user-provided encoding must be used
|
|
980
|
+
filename.with_suffix(".cpg").unlink()
|
|
981
|
+
|
|
982
|
+
# We will assume ISO-8859-1, which is wrong
|
|
983
|
+
miscoded = text.encode(encoding).decode("ISO-8859-1")
|
|
984
|
+
bad_schema = read_arrow(filename)[0]
|
|
985
|
+
assert bad_schema["fields"][0] == miscoded
|
|
986
|
+
# table cannot be decoded to UTF-8 without UnicodeDecodeErrors
|
|
987
|
+
|
|
988
|
+
# If encoding is provided, that should yield correct text
|
|
989
|
+
schema, table = read_arrow(filename, encoding=encoding)
|
|
990
|
+
assert schema["fields"][0] == text
|
|
991
|
+
assert table[text][0].as_py() == text
|
|
992
|
+
|
|
993
|
+
# verify that setting encoding does not corrupt SHAPE_ENCODING option if set
|
|
994
|
+
# globally (it is ignored during read when encoding is specified by user)
|
|
995
|
+
try:
|
|
996
|
+
set_gdal_config_options({"SHAPE_ENCODING": "CP1254"})
|
|
997
|
+
_ = read_arrow(filename, encoding=encoding)
|
|
998
|
+
assert get_gdal_config_option("SHAPE_ENCODING") == "CP1254"
|
|
999
|
+
|
|
1000
|
+
finally:
|
|
1001
|
+
# reset to clear between tests
|
|
1002
|
+
set_gdal_config_options({"SHAPE_ENCODING": None})
|
|
1003
|
+
|
|
1004
|
+
|
|
1005
|
+
@requires_arrow_write_api
|
|
1006
|
+
def test_encoding_write_layer_option_collision_shapefile(tmp_path, naturalearth_lowres):
|
|
1007
|
+
"""Providing both encoding parameter and ENCODING layer creation option (even if blank) is not allowed"""
|
|
1008
|
+
|
|
1009
|
+
meta, table = read_arrow(naturalearth_lowres)
|
|
1010
|
+
|
|
1011
|
+
with pytest.raises(
|
|
1012
|
+
ValueError,
|
|
1013
|
+
match='cannot provide both encoding parameter and "ENCODING" layer creation option',
|
|
1014
|
+
):
|
|
1015
|
+
write_arrow(
|
|
1016
|
+
table,
|
|
1017
|
+
tmp_path / "test.shp",
|
|
1018
|
+
crs=meta["crs"],
|
|
1019
|
+
geometry_type="MultiPolygon",
|
|
1020
|
+
geometry_name=meta["geometry_name"] or "wkb_geometry",
|
|
1021
|
+
encoding="CP936",
|
|
1022
|
+
layer_options={"ENCODING": ""},
|
|
1023
|
+
)
|
|
1024
|
+
|
|
1025
|
+
|
|
1026
|
+
@requires_arrow_write_api
|
|
1027
|
+
@pytest.mark.parametrize("ext", ["gpkg", "geojson"])
|
|
1028
|
+
def test_non_utf8_encoding_io_arrow_exception(tmp_path, naturalearth_lowres, ext):
|
|
1029
|
+
meta, table = read_arrow(naturalearth_lowres)
|
|
1030
|
+
|
|
1031
|
+
with pytest.raises(
|
|
1032
|
+
ValueError, match="non-UTF-8 encoding is not supported for Arrow"
|
|
1033
|
+
):
|
|
1034
|
+
write_arrow(
|
|
1035
|
+
table,
|
|
1036
|
+
tmp_path / f"test.{ext}",
|
|
1037
|
+
crs=meta["crs"],
|
|
1038
|
+
geometry_type="MultiPolygon",
|
|
1039
|
+
geometry_name=meta["geometry_name"] or "wkb_geometry",
|
|
1040
|
+
encoding="CP936",
|
|
1041
|
+
)
|