pyogrio 0.7.2__cp38-cp38-manylinux_2_28_aarch64.whl → 0.9.0__cp38-cp38-manylinux_2_28_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyogrio might be problematic. Click here for more details.

Files changed (48) hide show
  1. pyogrio/__init__.py +4 -0
  2. pyogrio/_compat.py +6 -1
  3. pyogrio/_err.cpython-38-aarch64-linux-gnu.so +0 -0
  4. pyogrio/_err.pyx +7 -3
  5. pyogrio/_geometry.cpython-38-aarch64-linux-gnu.so +0 -0
  6. pyogrio/_io.cpython-38-aarch64-linux-gnu.so +0 -0
  7. pyogrio/_io.pyx +904 -242
  8. pyogrio/_ogr.cpython-38-aarch64-linux-gnu.so +0 -0
  9. pyogrio/_ogr.pxd +69 -13
  10. pyogrio/_ogr.pyx +8 -24
  11. pyogrio/_version.py +3 -3
  12. pyogrio/_vsi.cpython-38-aarch64-linux-gnu.so +0 -0
  13. pyogrio/_vsi.pxd +4 -0
  14. pyogrio/_vsi.pyx +140 -0
  15. pyogrio/core.py +43 -44
  16. pyogrio/gdal_data/GDAL-targets-release.cmake +3 -3
  17. pyogrio/gdal_data/GDAL-targets.cmake +10 -6
  18. pyogrio/gdal_data/GDALConfigVersion.cmake +3 -3
  19. pyogrio/gdal_data/gdalinfo_output.schema.json +2 -0
  20. pyogrio/gdal_data/gdalvrt.xsd +163 -0
  21. pyogrio/gdal_data/ogrinfo_output.schema.json +12 -1
  22. pyogrio/gdal_data/vcpkg.spdx.json +26 -26
  23. pyogrio/gdal_data/vcpkg_abi_info.txt +27 -26
  24. pyogrio/geopandas.py +140 -34
  25. pyogrio/proj_data/ITRF2008 +2 -2
  26. pyogrio/proj_data/proj-config-version.cmake +2 -2
  27. pyogrio/proj_data/proj-config.cmake +2 -1
  28. pyogrio/proj_data/proj-targets.cmake +13 -13
  29. pyogrio/proj_data/proj.db +0 -0
  30. pyogrio/proj_data/proj4-targets.cmake +13 -13
  31. pyogrio/proj_data/vcpkg.spdx.json +20 -42
  32. pyogrio/proj_data/vcpkg_abi_info.txt +14 -15
  33. pyogrio/raw.py +438 -116
  34. pyogrio/tests/conftest.py +75 -6
  35. pyogrio/tests/fixtures/poly_not_enough_points.shp.zip +0 -0
  36. pyogrio/tests/test_arrow.py +841 -7
  37. pyogrio/tests/test_core.py +99 -7
  38. pyogrio/tests/test_geopandas_io.py +827 -121
  39. pyogrio/tests/test_path.py +23 -3
  40. pyogrio/tests/test_raw_io.py +276 -50
  41. pyogrio/util.py +39 -19
  42. {pyogrio-0.7.2.dist-info → pyogrio-0.9.0.dist-info}/METADATA +2 -2
  43. {pyogrio-0.7.2.dist-info → pyogrio-0.9.0.dist-info}/RECORD +210 -207
  44. {pyogrio-0.7.2.dist-info → pyogrio-0.9.0.dist-info}/WHEEL +1 -1
  45. pyogrio.libs/{libgdal-cb554135.so.33.3.7.2 → libgdal-6ff0914e.so.34.3.8.5} +0 -0
  46. pyogrio/tests/win32.py +0 -86
  47. {pyogrio-0.7.2.dist-info → pyogrio-0.9.0.dist-info}/LICENSE +0 -0
  48. {pyogrio-0.7.2.dist-info → pyogrio-0.9.0.dist-info}/top_level.txt +0 -0
@@ -1,12 +1,32 @@
1
1
  import contextlib
2
+ from io import BytesIO
3
+ import json
2
4
  import math
3
5
  import os
6
+ from packaging.version import Version
7
+ import sys
4
8
 
5
9
  import pytest
10
+ import numpy as np
6
11
 
7
- from pyogrio import __gdal_version__, read_dataframe
8
- from pyogrio.raw import open_arrow, read_arrow
9
- from pyogrio.tests.conftest import requires_arrow_api
12
+ import pyogrio
13
+ from pyogrio import (
14
+ __gdal_version__,
15
+ read_dataframe,
16
+ read_info,
17
+ list_layers,
18
+ get_gdal_config_option,
19
+ set_gdal_config_options,
20
+ )
21
+ from pyogrio.raw import open_arrow, read_arrow, write, write_arrow
22
+ from pyogrio.errors import DataSourceError, FieldError, DataLayerError
23
+ from pyogrio.tests.conftest import (
24
+ ALL_EXTS,
25
+ DRIVERS,
26
+ DRIVER_EXT,
27
+ requires_arrow_write_api,
28
+ requires_pyarrow_api,
29
+ )
10
30
 
11
31
  try:
12
32
  import pandas as pd
@@ -18,8 +38,9 @@ except ImportError:
18
38
  pass
19
39
 
20
40
  # skip all tests in this file if Arrow API or GeoPandas are unavailable
21
- pytestmark = requires_arrow_api
41
+ pytestmark = requires_pyarrow_api
22
42
  pytest.importorskip("geopandas")
43
+ pa = pytest.importorskip("pyarrow")
23
44
 
24
45
 
25
46
  def test_read_arrow(naturalearth_lowres_all_ext):
@@ -33,6 +54,12 @@ def test_read_arrow(naturalearth_lowres_all_ext):
33
54
  assert_geodataframe_equal(result, expected, check_less_precise=check_less_precise)
34
55
 
35
56
 
57
+ def test_read_arrow_unspecified_layer_warning(data_dir):
58
+ """Reading a multi-layer file without specifying a layer gives a warning."""
59
+ with pytest.warns(UserWarning, match="More than one layer found "):
60
+ read_arrow(data_dir / "sample.osm.pbf")
61
+
62
+
36
63
  @pytest.mark.parametrize("skip_features, expected", [(10, 167), (200, 0)])
37
64
  def test_read_arrow_skip_features(naturalearth_lowres, skip_features, expected):
38
65
  table = read_arrow(naturalearth_lowres, skip_features=skip_features)[1]
@@ -115,6 +142,7 @@ def test_read_arrow_to_pandas_kwargs(test_fgdb_vsi):
115
142
  arrow_to_pandas_kwargs = {"strings_to_categorical": True}
116
143
  result = read_dataframe(
117
144
  test_fgdb_vsi,
145
+ layer="basetable_2",
118
146
  use_arrow=True,
119
147
  arrow_to_pandas_kwargs=arrow_to_pandas_kwargs,
120
148
  )
@@ -128,8 +156,27 @@ def test_read_arrow_raw(naturalearth_lowres):
128
156
  assert isinstance(table, pyarrow.Table)
129
157
 
130
158
 
131
- def test_open_arrow(naturalearth_lowres):
132
- with open_arrow(naturalearth_lowres) as (meta, reader):
159
+ def test_read_arrow_vsi(naturalearth_lowres_vsi):
160
+ table = read_arrow(naturalearth_lowres_vsi[1])[1]
161
+ assert len(table) == 177
162
+
163
+
164
+ def test_read_arrow_bytes(geojson_bytes):
165
+ meta, table = read_arrow(geojson_bytes)
166
+
167
+ assert meta["fields"].shape == (5,)
168
+ assert len(table) == 3
169
+
170
+
171
+ def test_read_arrow_filelike(geojson_filelike):
172
+ meta, table = read_arrow(geojson_filelike)
173
+
174
+ assert meta["fields"].shape == (5,)
175
+ assert len(table) == 3
176
+
177
+
178
+ def test_open_arrow_pyarrow(naturalearth_lowres):
179
+ with open_arrow(naturalearth_lowres, use_pyarrow=True) as (meta, reader):
133
180
  assert isinstance(meta, dict)
134
181
  assert isinstance(reader, pyarrow.RecordBatchReader)
135
182
  assert isinstance(reader.read_all(), pyarrow.Table)
@@ -139,7 +186,10 @@ def test_open_arrow_batch_size(naturalearth_lowres):
139
186
  meta, table = read_arrow(naturalearth_lowres)
140
187
  batch_size = math.ceil(len(table) / 2)
141
188
 
142
- with open_arrow(naturalearth_lowres, batch_size=batch_size) as (meta, reader):
189
+ with open_arrow(naturalearth_lowres, batch_size=batch_size, use_pyarrow=True) as (
190
+ meta,
191
+ reader,
192
+ ):
143
193
  assert isinstance(meta, dict)
144
194
  assert isinstance(reader, pyarrow.RecordBatchReader)
145
195
  count = 0
@@ -185,6 +235,49 @@ def test_open_arrow_max_features_unsupported(naturalearth_lowres, max_features):
185
235
  pass
186
236
 
187
237
 
238
+ @pytest.mark.skipif(
239
+ __gdal_version__ < (3, 8, 0),
240
+ reason="returns geoarrow metadata only for GDAL>=3.8.0",
241
+ )
242
+ def test_read_arrow_geoarrow_metadata(naturalearth_lowres):
243
+ _meta, table = read_arrow(naturalearth_lowres)
244
+ field = table.schema.field("wkb_geometry")
245
+ assert field.metadata[b"ARROW:extension:name"] == b"geoarrow.wkb"
246
+ parsed_meta = json.loads(field.metadata[b"ARROW:extension:metadata"])
247
+ assert parsed_meta["crs"]["id"]["authority"] == "EPSG"
248
+ assert parsed_meta["crs"]["id"]["code"] == 4326
249
+
250
+
251
+ def test_open_arrow_capsule_protocol(naturalearth_lowres):
252
+ pytest.importorskip("pyarrow", minversion="14")
253
+
254
+ with open_arrow(naturalearth_lowres) as (meta, reader):
255
+ assert isinstance(meta, dict)
256
+ assert isinstance(reader, pyogrio._io._ArrowStream)
257
+
258
+ result = pyarrow.table(reader)
259
+
260
+ _, expected = read_arrow(naturalearth_lowres)
261
+ assert result.equals(expected)
262
+
263
+
264
+ def test_open_arrow_capsule_protocol_without_pyarrow(naturalearth_lowres):
265
+ pyarrow = pytest.importorskip("pyarrow", minversion="14")
266
+
267
+ # Make PyArrow temporarily unavailable (importing will fail)
268
+ sys.modules["pyarrow"] = None
269
+ try:
270
+ with open_arrow(naturalearth_lowres) as (meta, reader):
271
+ assert isinstance(meta, dict)
272
+ assert isinstance(reader, pyogrio._io._ArrowStream)
273
+ result = pyarrow.table(reader)
274
+ finally:
275
+ sys.modules["pyarrow"] = pyarrow
276
+
277
+ _, expected = read_arrow(naturalearth_lowres)
278
+ assert result.equals(expected)
279
+
280
+
188
281
  @contextlib.contextmanager
189
282
  def use_arrow_context():
190
283
  original = os.environ.get("PYOGRIO_USE_ARROW", None)
@@ -205,3 +298,744 @@ def test_enable_with_environment_variable(test_ogr_types_list):
205
298
  with use_arrow_context():
206
299
  result = read_dataframe(test_ogr_types_list)
207
300
  assert "list_int64" in result.columns
301
+
302
+
303
+ @pytest.mark.skipif(
304
+ __gdal_version__ < (3, 8, 3), reason="Arrow bool value bug fixed in GDAL >= 3.8.3"
305
+ )
306
+ @pytest.mark.parametrize("ext", ALL_EXTS)
307
+ def test_arrow_bool_roundtrip(tmp_path, ext):
308
+ filename = tmp_path / f"test{ext}"
309
+
310
+ # Point(0, 0)
311
+ geometry = np.array(
312
+ [bytes.fromhex("010100000000000000000000000000000000000000")] * 5, dtype=object
313
+ )
314
+ bool_col = np.array([True, False, True, False, True])
315
+ field_data = [bool_col]
316
+ fields = ["bool_col"]
317
+
318
+ kwargs = {}
319
+
320
+ if ext == ".fgb":
321
+ # For .fgb, spatial_index=False to avoid the rows being reordered
322
+ kwargs["spatial_index"] = False
323
+
324
+ write(
325
+ filename,
326
+ geometry,
327
+ field_data,
328
+ fields,
329
+ geometry_type="Point",
330
+ crs="EPSG:4326",
331
+ **kwargs,
332
+ )
333
+
334
+ write(
335
+ filename, geometry, field_data, fields, geometry_type="Point", crs="EPSG:4326"
336
+ )
337
+ table = read_arrow(filename)[1]
338
+
339
+ assert np.array_equal(table["bool_col"].to_numpy(), bool_col)
340
+
341
+
342
+ @pytest.mark.skipif(
343
+ __gdal_version__ >= (3, 8, 3), reason="Arrow bool value bug fixed in GDAL >= 3.8.3"
344
+ )
345
+ @pytest.mark.parametrize("ext", ALL_EXTS)
346
+ def test_arrow_bool_exception(tmp_path, ext):
347
+ filename = tmp_path / f"test{ext}"
348
+
349
+ # Point(0, 0)
350
+ geometry = np.array(
351
+ [bytes.fromhex("010100000000000000000000000000000000000000")] * 5, dtype=object
352
+ )
353
+ bool_col = np.array([True, False, True, False, True])
354
+ field_data = [bool_col]
355
+ fields = ["bool_col"]
356
+
357
+ write(
358
+ filename, geometry, field_data, fields, geometry_type="Point", crs="EPSG:4326"
359
+ )
360
+
361
+ if ext in {".fgb", ".gpkg"}:
362
+ # only raise exception for GPKG / FGB
363
+ with pytest.raises(
364
+ RuntimeError,
365
+ match="GDAL < 3.8.3 does not correctly read boolean data values using "
366
+ "the Arrow API",
367
+ ):
368
+ with open_arrow(filename):
369
+ pass
370
+
371
+ # do not raise exception if no bool columns are read
372
+ with open_arrow(filename, columns=[]):
373
+ pass
374
+
375
+ else:
376
+ with open_arrow(filename):
377
+ pass
378
+
379
+
380
+ # Point(0, 0)
381
+ points = np.array(
382
+ [bytes.fromhex("010100000000000000000000000000000000000000")] * 3,
383
+ dtype=object,
384
+ )
385
+
386
+
387
+ @requires_arrow_write_api
388
+ def test_write_shp(tmp_path, naturalearth_lowres):
389
+ meta, table = read_arrow(naturalearth_lowres)
390
+
391
+ filename = tmp_path / "test.shp"
392
+ write_arrow(
393
+ table,
394
+ filename,
395
+ crs=meta["crs"],
396
+ encoding=meta["encoding"],
397
+ geometry_type=meta["geometry_type"],
398
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
399
+ )
400
+
401
+ assert filename.exists()
402
+ for ext in (".dbf", ".prj"):
403
+ assert filename.with_suffix(ext).exists()
404
+
405
+
406
+ @pytest.mark.filterwarnings("ignore:A geometry of type POLYGON is inserted")
407
+ @requires_arrow_write_api
408
+ def test_write_gpkg(tmp_path, naturalearth_lowres):
409
+ meta, table = read_arrow(naturalearth_lowres)
410
+
411
+ filename = tmp_path / "test.gpkg"
412
+ write_arrow(
413
+ table,
414
+ filename,
415
+ driver="GPKG",
416
+ crs=meta["crs"],
417
+ geometry_type="MultiPolygon",
418
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
419
+ )
420
+
421
+ assert filename.exists()
422
+
423
+
424
+ @pytest.mark.filterwarnings("ignore:A geometry of type POLYGON is inserted")
425
+ @requires_arrow_write_api
426
+ def test_write_gpkg_multiple_layers(tmp_path, naturalearth_lowres):
427
+ meta, table = read_arrow(naturalearth_lowres)
428
+ meta["geometry_type"] = "MultiPolygon"
429
+
430
+ filename = tmp_path / "test.gpkg"
431
+ write_arrow(
432
+ table,
433
+ filename,
434
+ driver="GPKG",
435
+ layer="first",
436
+ crs=meta["crs"],
437
+ geometry_type="MultiPolygon",
438
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
439
+ )
440
+
441
+ assert filename.exists()
442
+
443
+ assert np.array_equal(list_layers(filename), [["first", "MultiPolygon"]])
444
+
445
+ write_arrow(
446
+ table,
447
+ filename,
448
+ driver="GPKG",
449
+ layer="second",
450
+ crs=meta["crs"],
451
+ geometry_type="MultiPolygon",
452
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
453
+ )
454
+
455
+ assert np.array_equal(
456
+ list_layers(filename), [["first", "MultiPolygon"], ["second", "MultiPolygon"]]
457
+ )
458
+
459
+
460
+ @requires_arrow_write_api
461
+ def test_write_geojson(tmp_path, naturalearth_lowres):
462
+ meta, table = read_arrow(naturalearth_lowres)
463
+ filename = tmp_path / "test.json"
464
+ write_arrow(
465
+ table,
466
+ filename,
467
+ driver="GeoJSON",
468
+ crs=meta["crs"],
469
+ geometry_type=meta["geometry_type"],
470
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
471
+ )
472
+
473
+ assert filename.exists()
474
+
475
+ data = json.loads(open(filename).read())
476
+
477
+ assert data["type"] == "FeatureCollection"
478
+ assert data["name"] == "test"
479
+ assert "crs" in data
480
+ assert len(data["features"]) == len(table)
481
+ assert not len(
482
+ set(meta["fields"]).difference(data["features"][0]["properties"].keys())
483
+ )
484
+
485
+
486
+ @pytest.mark.parametrize(
487
+ "driver",
488
+ {
489
+ driver
490
+ for driver in DRIVERS.values()
491
+ if driver not in ("ESRI Shapefile", "GPKG", "GeoJSON")
492
+ },
493
+ )
494
+ @requires_arrow_write_api
495
+ def test_write_supported(tmp_path, naturalearth_lowres, driver):
496
+ """Test drivers known to work that are not specifically tested above"""
497
+ meta, table = read_arrow(naturalearth_lowres, columns=["iso_a3"], max_features=1)
498
+
499
+ # note: naturalearth_lowres contains mixed polygons / multipolygons, which
500
+ # are not supported in mixed form for all drivers. To get around this here
501
+ # we take the first record only.
502
+ meta["geometry_type"] = "MultiPolygon"
503
+
504
+ filename = tmp_path / f"test{DRIVER_EXT[driver]}"
505
+ write_arrow(
506
+ table,
507
+ filename,
508
+ driver=driver,
509
+ crs=meta["crs"],
510
+ geometry_type=meta["geometry_type"],
511
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
512
+ )
513
+ assert filename.exists()
514
+
515
+
516
+ @requires_arrow_write_api
517
+ def test_write_unsupported(tmp_path, naturalearth_lowres):
518
+ meta, table = read_arrow(naturalearth_lowres)
519
+
520
+ with pytest.raises(DataSourceError, match="does not support write functionality"):
521
+ write_arrow(
522
+ table,
523
+ tmp_path / "test.json",
524
+ driver="ESRIJSON",
525
+ crs=meta["crs"],
526
+ geometry_type=meta["geometry_type"],
527
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
528
+ )
529
+
530
+
531
+ @pytest.mark.parametrize("ext", DRIVERS)
532
+ @requires_arrow_write_api
533
+ def test_write_append(request, tmp_path, naturalearth_lowres, ext):
534
+ if ext.startswith(".geojson"):
535
+ # Bug in GDAL when appending int64 to GeoJSON
536
+ # (https://github.com/OSGeo/gdal/issues/9792)
537
+ request.node.add_marker(
538
+ pytest.mark.xfail(reason="Bugs with append when writing Arrow to GeoJSON")
539
+ )
540
+
541
+ meta, table = read_arrow(naturalearth_lowres)
542
+
543
+ # coerce output layer to generic Geometry to avoid mixed type errors
544
+ meta["geometry_type"] = "Unknown"
545
+
546
+ filename = tmp_path / f"test{ext}"
547
+ write_arrow(
548
+ table,
549
+ filename,
550
+ crs=meta["crs"],
551
+ geometry_type=meta["geometry_type"],
552
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
553
+ )
554
+ assert filename.exists()
555
+ assert read_info(filename)["features"] == 177
556
+
557
+ # write the same records again
558
+ write_arrow(
559
+ table,
560
+ filename,
561
+ append=True,
562
+ crs=meta["crs"],
563
+ geometry_type=meta["geometry_type"],
564
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
565
+ )
566
+ assert read_info(filename)["features"] == 354
567
+
568
+
569
+ @pytest.mark.parametrize("driver,ext", [("GML", ".gml"), ("GeoJSONSeq", ".geojsons")])
570
+ @requires_arrow_write_api
571
+ def test_write_append_unsupported(tmp_path, naturalearth_lowres, driver, ext):
572
+ meta, table = read_arrow(naturalearth_lowres)
573
+
574
+ # GML does not support append functionality
575
+ filename = tmp_path / "test.gml"
576
+ write_arrow(
577
+ table,
578
+ filename,
579
+ driver="GML",
580
+ crs=meta["crs"],
581
+ geometry_type=meta["geometry_type"],
582
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
583
+ )
584
+ assert filename.exists()
585
+ assert read_info(filename, force_feature_count=True)["features"] == 177
586
+
587
+ with pytest.raises(DataSourceError):
588
+ write_arrow(
589
+ table,
590
+ filename,
591
+ driver="GML",
592
+ append=True,
593
+ crs=meta["crs"],
594
+ geometry_type=meta["geometry_type"],
595
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
596
+ )
597
+
598
+
599
+ @requires_arrow_write_api
600
+ def test_write_gdalclose_error(naturalearth_lowres):
601
+ meta, table = read_arrow(naturalearth_lowres)
602
+
603
+ filename = "s3://non-existing-bucket/test.geojson"
604
+
605
+ # set config options to avoid errors on open due to GDAL S3 configuration
606
+ set_gdal_config_options(
607
+ {
608
+ "AWS_ACCESS_KEY_ID": "invalid",
609
+ "AWS_SECRET_ACCESS_KEY": "invalid",
610
+ "AWS_NO_SIGN_REQUEST": True,
611
+ }
612
+ )
613
+
614
+ with pytest.raises(DataSourceError, match="Failed to write features to dataset"):
615
+ write_arrow(
616
+ table,
617
+ filename,
618
+ crs=meta["crs"],
619
+ geometry_type=meta["geometry_type"],
620
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
621
+ )
622
+
623
+
624
+ @requires_arrow_write_api
625
+ @pytest.mark.parametrize("name", ["geoarrow.wkb", "ogc.wkb"])
626
+ def test_write_geometry_extension_type(tmp_path, naturalearth_lowres, name):
627
+ # Infer geometry column based on extension name
628
+ # instead of passing `geometry_name` explicitly
629
+ meta, table = read_arrow(naturalearth_lowres)
630
+
631
+ # change extension type name
632
+ idx = table.schema.get_field_index("wkb_geometry")
633
+ new_field = table.schema.field(idx).with_metadata({"ARROW:extension:name": name})
634
+ new_table = table.cast(table.schema.set(idx, new_field))
635
+
636
+ filename = tmp_path / "test_geoarrow.shp"
637
+ write_arrow(
638
+ new_table,
639
+ filename,
640
+ crs=meta["crs"],
641
+ geometry_type=meta["geometry_type"],
642
+ )
643
+ _, table_roundtripped = read_arrow(filename)
644
+ assert table_roundtripped.equals(table)
645
+
646
+
647
+ @requires_arrow_write_api
648
+ def test_write_unsupported_geoarrow(tmp_path, naturalearth_lowres):
649
+ meta, table = read_arrow(naturalearth_lowres)
650
+
651
+ # change extension type name (the name doesn't match with the column type
652
+ # for correct geoarrow data, but our writing code checks it based on the name)
653
+ idx = table.schema.get_field_index("wkb_geometry")
654
+ new_field = table.schema.field(idx).with_metadata(
655
+ {"ARROW:extension:name": "geoarrow.point"}
656
+ )
657
+ new_table = table.cast(table.schema.set(idx, new_field))
658
+
659
+ with pytest.raises(
660
+ NotImplementedError,
661
+ match="Writing a geometry column of type geoarrow.point is not yet supported",
662
+ ):
663
+ write_arrow(
664
+ new_table,
665
+ tmp_path / "test_geoarrow.shp",
666
+ crs=meta["crs"],
667
+ geometry_type=meta["geometry_type"],
668
+ )
669
+
670
+
671
+ @requires_arrow_write_api
672
+ def test_write_no_geom(tmp_path, naturalearth_lowres):
673
+ _, table = read_arrow(naturalearth_lowres)
674
+ table = table.drop_columns("wkb_geometry")
675
+
676
+ # Test
677
+ filename = tmp_path / "test.gpkg"
678
+ write_arrow(table, filename)
679
+ # Check result
680
+ assert filename.exists()
681
+ meta, result = read_arrow(filename)
682
+ assert meta["crs"] is None
683
+ assert meta["geometry_type"] is None
684
+ assert table.equals(result)
685
+
686
+
687
+ @requires_arrow_write_api
688
+ def test_write_geometry_type(tmp_path, naturalearth_lowres):
689
+ meta, table = read_arrow(naturalearth_lowres)
690
+
691
+ # Not specifying the geometry currently raises an error
692
+ with pytest.raises(ValueError, match="'geometry_type' keyword is required"):
693
+ write_arrow(
694
+ table,
695
+ tmp_path / "test.shp",
696
+ crs=meta["crs"],
697
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
698
+ )
699
+
700
+ # Specifying "Unknown" works and will create generic layer
701
+ filename = tmp_path / "test.gpkg"
702
+ write_arrow(
703
+ table,
704
+ filename,
705
+ crs=meta["crs"],
706
+ geometry_type="Unknown",
707
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
708
+ )
709
+ assert filename.exists()
710
+ meta_written, _ = read_arrow(filename)
711
+ assert meta_written["geometry_type"] == "Unknown"
712
+
713
+
714
+ @requires_arrow_write_api
715
+ def test_write_raise_promote_to_multi(tmp_path, naturalearth_lowres):
716
+ meta, table = read_arrow(naturalearth_lowres)
717
+
718
+ with pytest.raises(
719
+ ValueError, match="The 'promote_to_multi' option is not supported"
720
+ ):
721
+ write_arrow(
722
+ table,
723
+ tmp_path / "test.shp",
724
+ crs=meta["crs"],
725
+ geometry_type=meta["geometry_type"],
726
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
727
+ promote_to_multi=True,
728
+ )
729
+
730
+
731
+ @requires_arrow_write_api
732
+ def test_write_no_crs(tmp_path, naturalearth_lowres):
733
+ meta, table = read_arrow(naturalearth_lowres)
734
+
735
+ filename = tmp_path / "test.shp"
736
+ with pytest.warns(UserWarning, match="'crs' was not provided"):
737
+ write_arrow(
738
+ table,
739
+ filename,
740
+ geometry_type=meta["geometry_type"],
741
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
742
+ )
743
+ # apart from CRS warning, it did write correctly
744
+ meta_result, result = read_arrow(filename)
745
+ assert table.equals(result)
746
+ assert meta_result["crs"] is None
747
+
748
+
749
+ @requires_arrow_write_api
750
+ def test_write_non_arrow_data(tmp_path):
751
+ data = np.array([1, 2, 3])
752
+ with pytest.raises(
753
+ ValueError, match="The provided data is not recognized as Arrow data"
754
+ ):
755
+ write_arrow(
756
+ data,
757
+ tmp_path / "test_no_arrow_data.shp",
758
+ crs="EPSG:4326",
759
+ geometry_type="Point",
760
+ geometry_name="geometry",
761
+ )
762
+
763
+
764
+ @pytest.mark.skipif(
765
+ Version(pa.__version__) < Version("16.0.0.dev0"),
766
+ reason="PyCapsule protocol only added to pyarrow.ChunkedArray in pyarrow 16",
767
+ )
768
+ @requires_arrow_write_api
769
+ def test_write_non_arrow_tabular_data(tmp_path):
770
+ data = pa.chunked_array([[1, 2, 3], [4, 5, 6]])
771
+ with pytest.raises(
772
+ DataLayerError,
773
+ match=".*should be called on a schema that is a struct of fields",
774
+ ):
775
+ write_arrow(
776
+ data,
777
+ tmp_path / "test_no_arrow_tabular_data.shp",
778
+ crs="EPSG:4326",
779
+ geometry_type="Point",
780
+ geometry_name="geometry",
781
+ )
782
+
783
+
784
+ @pytest.mark.filterwarnings("ignore:.*not handled natively:RuntimeWarning")
785
+ @requires_arrow_write_api
786
+ def test_write_batch_error_message(tmp_path):
787
+ # raise the correct error and message from GDAL when an error happens
788
+ # while writing
789
+
790
+ # invalid dictionary array that will only error while writing (schema
791
+ # itself is OK)
792
+ arr = pa.DictionaryArray.from_buffers(
793
+ pa.dictionary(pa.int64(), pa.string()),
794
+ length=3,
795
+ buffers=pa.array([0, 1, 2]).buffers(),
796
+ dictionary=pa.array(["a", "b"]),
797
+ )
798
+ table = pa.table({"geometry": points, "col": arr})
799
+
800
+ with pytest.raises(DataLayerError, match=".*invalid dictionary index"):
801
+ write_arrow(
802
+ table,
803
+ tmp_path / "test_unsupported_list_type.fgb",
804
+ crs="EPSG:4326",
805
+ geometry_type="Point",
806
+ geometry_name="geometry",
807
+ )
808
+
809
+
810
+ @requires_arrow_write_api
811
+ def test_write_schema_error_message(tmp_path):
812
+ # raise the correct error and message from GDAL when an error happens
813
+ # creating the fields from the schema
814
+ # (using complex list of map of integer->integer which is not supported by GDAL)
815
+ table = pa.table(
816
+ {
817
+ "geometry": points,
818
+ "col": pa.array(
819
+ [[[(1, 2), (3, 4)], None, [(5, 6)]]] * 3,
820
+ pa.list_(pa.map_(pa.int64(), pa.int64())),
821
+ ),
822
+ }
823
+ )
824
+
825
+ with pytest.raises(FieldError, match=".*not supported"):
826
+ write_arrow(
827
+ table,
828
+ tmp_path / "test_unsupported_map_type.shp",
829
+ crs="EPSG:4326",
830
+ geometry_type="Point",
831
+ geometry_name="geometry",
832
+ )
833
+
834
+
835
+ @requires_arrow_write_api
836
+ @pytest.mark.filterwarnings("ignore:File /vsimem:RuntimeWarning")
837
+ @pytest.mark.parametrize("driver", ["GeoJSON", "GPKG"])
838
+ def test_write_memory(naturalearth_lowres, driver):
839
+ meta, table = read_arrow(naturalearth_lowres, max_features=1)
840
+ meta["geometry_type"] = "MultiPolygon"
841
+
842
+ buffer = BytesIO()
843
+ write_arrow(
844
+ table,
845
+ buffer,
846
+ driver=driver,
847
+ layer="test",
848
+ crs=meta["crs"],
849
+ geometry_type=meta["geometry_type"],
850
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
851
+ )
852
+
853
+ assert len(buffer.getbuffer()) > 0
854
+ assert list_layers(buffer)[0][0] == "test"
855
+
856
+ actual_meta, actual_table = read_arrow(buffer)
857
+ assert len(actual_table) == len(table)
858
+ assert np.array_equal(actual_meta["fields"], meta["fields"])
859
+
860
+
861
+ @requires_arrow_write_api
862
+ def test_write_memory_driver_required(naturalearth_lowres):
863
+ meta, table = read_arrow(naturalearth_lowres, max_features=1)
864
+
865
+ buffer = BytesIO()
866
+ with pytest.raises(
867
+ ValueError,
868
+ match="driver must be provided to write to in-memory file",
869
+ ):
870
+ write_arrow(
871
+ table,
872
+ buffer,
873
+ driver=None,
874
+ layer="test",
875
+ crs=meta["crs"],
876
+ geometry_type=meta["geometry_type"],
877
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
878
+ )
879
+
880
+
881
+ @requires_arrow_write_api
882
+ @pytest.mark.parametrize("driver", ["ESRI Shapefile", "OpenFileGDB"])
883
+ def test_write_memory_unsupported_driver(naturalearth_lowres, driver):
884
+ if driver == "OpenFileGDB" and __gdal_version__ < (3, 6, 0):
885
+ pytest.skip("OpenFileGDB write support only available for GDAL >= 3.6.0")
886
+
887
+ meta, table = read_arrow(naturalearth_lowres, max_features=1)
888
+
889
+ buffer = BytesIO()
890
+
891
+ with pytest.raises(
892
+ ValueError, match=f"writing to in-memory file is not supported for {driver}"
893
+ ):
894
+ write_arrow(
895
+ table,
896
+ buffer,
897
+ driver=driver,
898
+ layer="test",
899
+ crs=meta["crs"],
900
+ geometry_type=meta["geometry_type"],
901
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
902
+ )
903
+
904
+
905
+ @requires_arrow_write_api
906
+ @pytest.mark.parametrize("driver", ["GeoJSON", "GPKG"])
907
+ def test_write_memory_append_unsupported(naturalearth_lowres, driver):
908
+ meta, table = read_arrow(naturalearth_lowres, max_features=1)
909
+ meta["geometry_type"] = "MultiPolygon"
910
+
911
+ buffer = BytesIO()
912
+ with pytest.raises(
913
+ NotImplementedError, match="append is not supported for in-memory files"
914
+ ):
915
+ write_arrow(
916
+ table,
917
+ buffer,
918
+ driver=driver,
919
+ layer="test",
920
+ crs=meta["crs"],
921
+ geometry_type=meta["geometry_type"],
922
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
923
+ append=True,
924
+ )
925
+
926
+
927
+ @requires_arrow_write_api
928
+ def test_write_memory_existing_unsupported(naturalearth_lowres):
929
+ meta, table = read_arrow(naturalearth_lowres, max_features=1)
930
+ meta["geometry_type"] = "MultiPolygon"
931
+
932
+ buffer = BytesIO(b"0000")
933
+ with pytest.raises(
934
+ NotImplementedError,
935
+ match="writing to existing in-memory object is not supported",
936
+ ):
937
+ write_arrow(
938
+ table,
939
+ buffer,
940
+ driver="GeoJSON",
941
+ layer="test",
942
+ crs=meta["crs"],
943
+ geometry_type=meta["geometry_type"],
944
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
945
+ )
946
+
947
+
948
+ @requires_arrow_write_api
949
+ def test_non_utf8_encoding_io_shapefile(tmp_path, encoded_text):
950
+ encoding, text = encoded_text
951
+
952
+ table = pa.table(
953
+ {
954
+ # Point(0, 0)
955
+ "geometry": pa.array(
956
+ [bytes.fromhex("010100000000000000000000000000000000000000")]
957
+ ),
958
+ text: pa.array([text]),
959
+ }
960
+ )
961
+
962
+ filename = tmp_path / "test.shp"
963
+ write_arrow(
964
+ table,
965
+ filename,
966
+ geometry_type="Point",
967
+ geometry_name="geometry",
968
+ crs="EPSG:4326",
969
+ encoding=encoding,
970
+ )
971
+
972
+ # NOTE: GDAL automatically creates a cpg file with the encoding name, which
973
+ # means that if we read this without specifying the encoding it uses the
974
+ # correct one
975
+ schema, table = read_arrow(filename)
976
+ assert schema["fields"][0] == text
977
+ assert table[text][0].as_py() == text
978
+
979
+ # verify that if cpg file is not present, that user-provided encoding must be used
980
+ filename.with_suffix(".cpg").unlink()
981
+
982
+ # We will assume ISO-8859-1, which is wrong
983
+ miscoded = text.encode(encoding).decode("ISO-8859-1")
984
+ bad_schema = read_arrow(filename)[0]
985
+ assert bad_schema["fields"][0] == miscoded
986
+ # table cannot be decoded to UTF-8 without UnicodeDecodeErrors
987
+
988
+ # If encoding is provided, that should yield correct text
989
+ schema, table = read_arrow(filename, encoding=encoding)
990
+ assert schema["fields"][0] == text
991
+ assert table[text][0].as_py() == text
992
+
993
+ # verify that setting encoding does not corrupt SHAPE_ENCODING option if set
994
+ # globally (it is ignored during read when encoding is specified by user)
995
+ try:
996
+ set_gdal_config_options({"SHAPE_ENCODING": "CP1254"})
997
+ _ = read_arrow(filename, encoding=encoding)
998
+ assert get_gdal_config_option("SHAPE_ENCODING") == "CP1254"
999
+
1000
+ finally:
1001
+ # reset to clear between tests
1002
+ set_gdal_config_options({"SHAPE_ENCODING": None})
1003
+
1004
+
1005
+ @requires_arrow_write_api
1006
+ def test_encoding_write_layer_option_collision_shapefile(tmp_path, naturalearth_lowres):
1007
+ """Providing both encoding parameter and ENCODING layer creation option (even if blank) is not allowed"""
1008
+
1009
+ meta, table = read_arrow(naturalearth_lowres)
1010
+
1011
+ with pytest.raises(
1012
+ ValueError,
1013
+ match='cannot provide both encoding parameter and "ENCODING" layer creation option',
1014
+ ):
1015
+ write_arrow(
1016
+ table,
1017
+ tmp_path / "test.shp",
1018
+ crs=meta["crs"],
1019
+ geometry_type="MultiPolygon",
1020
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
1021
+ encoding="CP936",
1022
+ layer_options={"ENCODING": ""},
1023
+ )
1024
+
1025
+
1026
+ @requires_arrow_write_api
1027
+ @pytest.mark.parametrize("ext", ["gpkg", "geojson"])
1028
+ def test_non_utf8_encoding_io_arrow_exception(tmp_path, naturalearth_lowres, ext):
1029
+ meta, table = read_arrow(naturalearth_lowres)
1030
+
1031
+ with pytest.raises(
1032
+ ValueError, match="non-UTF-8 encoding is not supported for Arrow"
1033
+ ):
1034
+ write_arrow(
1035
+ table,
1036
+ tmp_path / f"test.{ext}",
1037
+ crs=meta["crs"],
1038
+ geometry_type="MultiPolygon",
1039
+ geometry_name=meta["geometry_name"] or "wkb_geometry",
1040
+ encoding="CP936",
1041
+ )