pyogrio 0.10.0__cp310-cp310-manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyogrio might be problematic. Click here for more details.
- pyogrio/__init__.py +55 -0
- pyogrio/_compat.py +47 -0
- pyogrio/_env.py +59 -0
- pyogrio/_err.cpython-310-x86_64-linux-gnu.so +0 -0
- pyogrio/_geometry.cpython-310-x86_64-linux-gnu.so +0 -0
- pyogrio/_io.cpython-310-x86_64-linux-gnu.so +0 -0
- pyogrio/_ogr.cpython-310-x86_64-linux-gnu.so +0 -0
- pyogrio/_version.py +21 -0
- pyogrio/_vsi.cpython-310-x86_64-linux-gnu.so +0 -0
- pyogrio/core.py +386 -0
- pyogrio/errors.py +25 -0
- pyogrio/gdal_data/GDAL-targets-release.cmake +19 -0
- pyogrio/gdal_data/GDAL-targets.cmake +105 -0
- pyogrio/gdal_data/GDALConfig.cmake +24 -0
- pyogrio/gdal_data/GDALConfigVersion.cmake +85 -0
- pyogrio/gdal_data/GDALLogoBW.svg +138 -0
- pyogrio/gdal_data/GDALLogoColor.svg +126 -0
- pyogrio/gdal_data/GDALLogoGS.svg +126 -0
- pyogrio/gdal_data/LICENSE.TXT +467 -0
- pyogrio/gdal_data/MM_m_idofic.csv +321 -0
- pyogrio/gdal_data/copyright +467 -0
- pyogrio/gdal_data/cubewerx_extra.wkt +48 -0
- pyogrio/gdal_data/default.rsc +0 -0
- pyogrio/gdal_data/ecw_cs.wkt +1453 -0
- pyogrio/gdal_data/eedaconf.json +23 -0
- pyogrio/gdal_data/epsg.wkt +1 -0
- pyogrio/gdal_data/esri_StatePlane_extra.wkt +631 -0
- pyogrio/gdal_data/gdalicon.png +0 -0
- pyogrio/gdal_data/gdalinfo_output.schema.json +346 -0
- pyogrio/gdal_data/gdalmdiminfo_output.schema.json +321 -0
- pyogrio/gdal_data/gdaltileindex.xsd +269 -0
- pyogrio/gdal_data/gdalvrt.xsd +880 -0
- pyogrio/gdal_data/gfs.xsd +246 -0
- pyogrio/gdal_data/gml_registry.xml +117 -0
- pyogrio/gdal_data/gml_registry.xsd +66 -0
- pyogrio/gdal_data/grib2_center.csv +251 -0
- pyogrio/gdal_data/grib2_process.csv +102 -0
- pyogrio/gdal_data/grib2_subcenter.csv +63 -0
- pyogrio/gdal_data/grib2_table_4_2_0_0.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_0_1.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_0_13.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_0_14.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_0_15.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_0_16.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_0_17.csv +11 -0
- pyogrio/gdal_data/grib2_table_4_2_0_18.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_0_19.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_0_190.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_0_191.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_0_2.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_0_20.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_0_21.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_0_3.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_0_4.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_0_5.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_0_6.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_0_7.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_10_0.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_10_1.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_10_191.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_10_2.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_10_3.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_10_4.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_1_0.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_1_1.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_1_2.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_20_0.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_20_1.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_20_2.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_2_0.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_2_3.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_2_4.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_2_5.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_2_6.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_3_0.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_3_1.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_3_2.csv +28 -0
- pyogrio/gdal_data/grib2_table_4_2_3_3.csv +8 -0
- pyogrio/gdal_data/grib2_table_4_2_3_4.csv +14 -0
- pyogrio/gdal_data/grib2_table_4_2_3_5.csv +11 -0
- pyogrio/gdal_data/grib2_table_4_2_3_6.csv +11 -0
- pyogrio/gdal_data/grib2_table_4_2_4_0.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_4_1.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_4_10.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_4_2.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_4_3.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_4_4.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_4_5.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_4_6.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_4_7.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_4_8.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_4_9.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_local_Canada.csv +5 -0
- pyogrio/gdal_data/grib2_table_4_2_local_HPC.csv +2 -0
- pyogrio/gdal_data/grib2_table_4_2_local_MRMS.csv +175 -0
- pyogrio/gdal_data/grib2_table_4_2_local_NCEP.csv +401 -0
- pyogrio/gdal_data/grib2_table_4_2_local_NDFD.csv +38 -0
- pyogrio/gdal_data/grib2_table_4_2_local_index.csv +7 -0
- pyogrio/gdal_data/grib2_table_4_5.csv +261 -0
- pyogrio/gdal_data/grib2_table_versions.csv +3 -0
- pyogrio/gdal_data/gt_datum.csv +229 -0
- pyogrio/gdal_data/gt_ellips.csv +24 -0
- pyogrio/gdal_data/header.dxf +1124 -0
- pyogrio/gdal_data/inspire_cp_BasicPropertyUnit.gfs +57 -0
- pyogrio/gdal_data/inspire_cp_CadastralBoundary.gfs +60 -0
- pyogrio/gdal_data/inspire_cp_CadastralParcel.gfs +81 -0
- pyogrio/gdal_data/inspire_cp_CadastralZoning.gfs +161 -0
- pyogrio/gdal_data/jpfgdgml_AdmArea.gfs +59 -0
- pyogrio/gdal_data/jpfgdgml_AdmBdry.gfs +49 -0
- pyogrio/gdal_data/jpfgdgml_AdmPt.gfs +59 -0
- pyogrio/gdal_data/jpfgdgml_BldA.gfs +54 -0
- pyogrio/gdal_data/jpfgdgml_BldL.gfs +54 -0
- pyogrio/gdal_data/jpfgdgml_Cntr.gfs +54 -0
- pyogrio/gdal_data/jpfgdgml_CommBdry.gfs +49 -0
- pyogrio/gdal_data/jpfgdgml_CommPt.gfs +59 -0
- pyogrio/gdal_data/jpfgdgml_Cstline.gfs +54 -0
- pyogrio/gdal_data/jpfgdgml_ElevPt.gfs +54 -0
- pyogrio/gdal_data/jpfgdgml_GCP.gfs +94 -0
- pyogrio/gdal_data/jpfgdgml_LeveeEdge.gfs +49 -0
- pyogrio/gdal_data/jpfgdgml_RailCL.gfs +54 -0
- pyogrio/gdal_data/jpfgdgml_RdASL.gfs +44 -0
- pyogrio/gdal_data/jpfgdgml_RdArea.gfs +54 -0
- pyogrio/gdal_data/jpfgdgml_RdCompt.gfs +59 -0
- pyogrio/gdal_data/jpfgdgml_RdEdg.gfs +59 -0
- pyogrio/gdal_data/jpfgdgml_RdMgtBdry.gfs +49 -0
- pyogrio/gdal_data/jpfgdgml_RdSgmtA.gfs +59 -0
- pyogrio/gdal_data/jpfgdgml_RvrMgtBdry.gfs +49 -0
- pyogrio/gdal_data/jpfgdgml_SBAPt.gfs +49 -0
- pyogrio/gdal_data/jpfgdgml_SBArea.gfs +54 -0
- pyogrio/gdal_data/jpfgdgml_SBBdry.gfs +44 -0
- pyogrio/gdal_data/jpfgdgml_WA.gfs +54 -0
- pyogrio/gdal_data/jpfgdgml_WL.gfs +54 -0
- pyogrio/gdal_data/jpfgdgml_WStrA.gfs +54 -0
- pyogrio/gdal_data/jpfgdgml_WStrL.gfs +54 -0
- pyogrio/gdal_data/nitf_spec.xml +3306 -0
- pyogrio/gdal_data/nitf_spec.xsd +189 -0
- pyogrio/gdal_data/ogrinfo_output.schema.json +528 -0
- pyogrio/gdal_data/ogrvrt.xsd +546 -0
- pyogrio/gdal_data/osmconf.ini +132 -0
- pyogrio/gdal_data/ozi_datum.csv +131 -0
- pyogrio/gdal_data/ozi_ellips.csv +35 -0
- pyogrio/gdal_data/pci_datum.txt +530 -0
- pyogrio/gdal_data/pci_ellips.txt +129 -0
- pyogrio/gdal_data/pdfcomposition.xsd +721 -0
- pyogrio/gdal_data/pds4_template.xml +65 -0
- pyogrio/gdal_data/plscenesconf.json +1985 -0
- pyogrio/gdal_data/ruian_vf_ob_v1.gfs +1455 -0
- pyogrio/gdal_data/ruian_vf_st_uvoh_v1.gfs +86 -0
- pyogrio/gdal_data/ruian_vf_st_v1.gfs +1489 -0
- pyogrio/gdal_data/ruian_vf_v1.gfs +2126 -0
- pyogrio/gdal_data/s57agencies.csv +249 -0
- pyogrio/gdal_data/s57attributes.csv +484 -0
- pyogrio/gdal_data/s57expectedinput.csv +1008 -0
- pyogrio/gdal_data/s57objectclasses.csv +287 -0
- pyogrio/gdal_data/seed_2d.dgn +0 -0
- pyogrio/gdal_data/seed_3d.dgn +0 -0
- pyogrio/gdal_data/stateplane.csv +259 -0
- pyogrio/gdal_data/tms_LINZAntarticaMapTileGrid.json +190 -0
- pyogrio/gdal_data/tms_MapML_APSTILE.json +268 -0
- pyogrio/gdal_data/tms_MapML_CBMTILE.json +346 -0
- pyogrio/gdal_data/tms_NZTM2000.json +243 -0
- pyogrio/gdal_data/trailer.dxf +434 -0
- pyogrio/gdal_data/usage +4 -0
- pyogrio/gdal_data/vcpkg-cmake-wrapper.cmake +23 -0
- pyogrio/gdal_data/vcpkg.spdx.json +264 -0
- pyogrio/gdal_data/vcpkg_abi_info.txt +41 -0
- pyogrio/gdal_data/vdv452.xml +367 -0
- pyogrio/gdal_data/vdv452.xsd +63 -0
- pyogrio/gdal_data/vicar.json +164 -0
- pyogrio/geopandas.py +683 -0
- pyogrio/proj_data/CH +22 -0
- pyogrio/proj_data/GL27 +23 -0
- pyogrio/proj_data/ITRF2000 +24 -0
- pyogrio/proj_data/ITRF2008 +94 -0
- pyogrio/proj_data/ITRF2014 +55 -0
- pyogrio/proj_data/copyright +34 -0
- pyogrio/proj_data/deformation_model.schema.json +582 -0
- pyogrio/proj_data/nad.lst +142 -0
- pyogrio/proj_data/nad27 +810 -0
- pyogrio/proj_data/nad83 +745 -0
- pyogrio/proj_data/other.extra +53 -0
- pyogrio/proj_data/proj-config-version.cmake +44 -0
- pyogrio/proj_data/proj-config.cmake +79 -0
- pyogrio/proj_data/proj-targets-release.cmake +19 -0
- pyogrio/proj_data/proj-targets.cmake +107 -0
- pyogrio/proj_data/proj.db +0 -0
- pyogrio/proj_data/proj.ini +51 -0
- pyogrio/proj_data/proj4-targets-release.cmake +19 -0
- pyogrio/proj_data/proj4-targets.cmake +107 -0
- pyogrio/proj_data/projjson.schema.json +1174 -0
- pyogrio/proj_data/triangulation.schema.json +214 -0
- pyogrio/proj_data/usage +4 -0
- pyogrio/proj_data/vcpkg.spdx.json +198 -0
- pyogrio/proj_data/vcpkg_abi_info.txt +27 -0
- pyogrio/proj_data/world +214 -0
- pyogrio/raw.py +887 -0
- pyogrio/tests/__init__.py +0 -0
- pyogrio/tests/conftest.py +398 -0
- pyogrio/tests/fixtures/README.md +108 -0
- pyogrio/tests/fixtures/curve.gpkg +0 -0
- pyogrio/tests/fixtures/curvepolygon.gpkg +0 -0
- pyogrio/tests/fixtures/line_zm.gpkg +0 -0
- pyogrio/tests/fixtures/multisurface.gpkg +0 -0
- pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.cpg +1 -0
- pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.dbf +0 -0
- pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.prj +1 -0
- pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.shp +0 -0
- pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.shx +0 -0
- pyogrio/tests/fixtures/sample.osm.pbf +0 -0
- pyogrio/tests/fixtures/test_gpkg_nulls.gpkg +0 -0
- pyogrio/tests/test_arrow.py +1195 -0
- pyogrio/tests/test_core.py +678 -0
- pyogrio/tests/test_geopandas_io.py +2314 -0
- pyogrio/tests/test_path.py +364 -0
- pyogrio/tests/test_raw_io.py +1515 -0
- pyogrio/tests/test_util.py +56 -0
- pyogrio/util.py +247 -0
- pyogrio-0.10.0.dist-info/LICENSE +21 -0
- pyogrio-0.10.0.dist-info/METADATA +129 -0
- pyogrio-0.10.0.dist-info/RECORD +223 -0
- pyogrio-0.10.0.dist-info/WHEEL +5 -0
- pyogrio-0.10.0.dist-info/top_level.txt +1 -0
- pyogrio.libs/libgdal-44263852.so.35.3.9.1 +0 -0
pyogrio/geopandas.py
ADDED
|
@@ -0,0 +1,683 @@
|
|
|
1
|
+
"""Functions for reading and writing GeoPandas dataframes."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import warnings
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
|
|
8
|
+
from pyogrio._compat import HAS_GEOPANDAS, PANDAS_GE_15, PANDAS_GE_20, PANDAS_GE_22
|
|
9
|
+
from pyogrio.errors import DataSourceError
|
|
10
|
+
from pyogrio.raw import (
|
|
11
|
+
DRIVERS_NO_MIXED_DIMENSIONS,
|
|
12
|
+
DRIVERS_NO_MIXED_SINGLE_MULTI,
|
|
13
|
+
_get_write_path_driver,
|
|
14
|
+
read,
|
|
15
|
+
read_arrow,
|
|
16
|
+
write,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _stringify_path(path):
|
|
21
|
+
"""Convert path-like to a string if possible, pass-through other objects."""
|
|
22
|
+
if isinstance(path, str):
|
|
23
|
+
return path
|
|
24
|
+
|
|
25
|
+
# checking whether path implements the filesystem protocol
|
|
26
|
+
if hasattr(path, "__fspath__"):
|
|
27
|
+
return path.__fspath__()
|
|
28
|
+
|
|
29
|
+
# pass-though other objects
|
|
30
|
+
return path
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _try_parse_datetime(ser):
|
|
34
|
+
import pandas as pd # only called when pandas is known to be installed
|
|
35
|
+
|
|
36
|
+
if PANDAS_GE_22:
|
|
37
|
+
datetime_kwargs = {"format": "ISO8601"}
|
|
38
|
+
elif PANDAS_GE_20:
|
|
39
|
+
datetime_kwargs = {"format": "ISO8601", "errors": "ignore"}
|
|
40
|
+
else:
|
|
41
|
+
datetime_kwargs = {"yearfirst": True}
|
|
42
|
+
with warnings.catch_warnings():
|
|
43
|
+
warnings.filterwarnings(
|
|
44
|
+
"ignore",
|
|
45
|
+
".*parsing datetimes with mixed time zones will raise.*",
|
|
46
|
+
FutureWarning,
|
|
47
|
+
)
|
|
48
|
+
# pre-emptive try catch for when pandas will raise
|
|
49
|
+
# (can tighten the exception type in future when it does)
|
|
50
|
+
try:
|
|
51
|
+
res = pd.to_datetime(ser, **datetime_kwargs)
|
|
52
|
+
except Exception:
|
|
53
|
+
res = ser
|
|
54
|
+
# if object dtype, try parse as utc instead
|
|
55
|
+
if res.dtype == "object":
|
|
56
|
+
try:
|
|
57
|
+
res = pd.to_datetime(ser, utc=True, **datetime_kwargs)
|
|
58
|
+
except Exception:
|
|
59
|
+
pass
|
|
60
|
+
|
|
61
|
+
if res.dtype != "object":
|
|
62
|
+
# GDAL only supports ms precision, convert outputs to match.
|
|
63
|
+
# Pandas 2.0 supports datetime[ms] directly, prior versions only support [ns],
|
|
64
|
+
# Instead, round the values to [ms] precision.
|
|
65
|
+
if PANDAS_GE_20:
|
|
66
|
+
res = res.dt.as_unit("ms")
|
|
67
|
+
else:
|
|
68
|
+
res = res.dt.round(freq="ms")
|
|
69
|
+
return res
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def read_dataframe(
|
|
73
|
+
path_or_buffer,
|
|
74
|
+
/,
|
|
75
|
+
layer=None,
|
|
76
|
+
encoding=None,
|
|
77
|
+
columns=None,
|
|
78
|
+
read_geometry=True,
|
|
79
|
+
force_2d=False,
|
|
80
|
+
skip_features=0,
|
|
81
|
+
max_features=None,
|
|
82
|
+
where=None,
|
|
83
|
+
bbox=None,
|
|
84
|
+
mask=None,
|
|
85
|
+
fids=None,
|
|
86
|
+
sql=None,
|
|
87
|
+
sql_dialect=None,
|
|
88
|
+
fid_as_index=False,
|
|
89
|
+
use_arrow=None,
|
|
90
|
+
on_invalid="raise",
|
|
91
|
+
arrow_to_pandas_kwargs=None,
|
|
92
|
+
**kwargs,
|
|
93
|
+
):
|
|
94
|
+
"""Read from an OGR data source to a GeoPandas GeoDataFrame or Pandas DataFrame.
|
|
95
|
+
|
|
96
|
+
If the data source does not have a geometry column or ``read_geometry`` is False,
|
|
97
|
+
a DataFrame will be returned.
|
|
98
|
+
|
|
99
|
+
Requires ``geopandas`` >= 0.8.
|
|
100
|
+
|
|
101
|
+
Parameters
|
|
102
|
+
----------
|
|
103
|
+
path_or_buffer : pathlib.Path or str, or bytes buffer
|
|
104
|
+
A dataset path or URI, raw buffer, or file-like object with a read method.
|
|
105
|
+
layer : int or str, optional (default: first layer)
|
|
106
|
+
If an integer is provided, it corresponds to the index of the layer
|
|
107
|
+
with the data source. If a string is provided, it must match the name
|
|
108
|
+
of the layer in the data source. Defaults to first layer in data source.
|
|
109
|
+
encoding : str, optional (default: None)
|
|
110
|
+
If present, will be used as the encoding for reading string values from
|
|
111
|
+
the data source. By default will automatically try to detect the native
|
|
112
|
+
encoding and decode to ``UTF-8``.
|
|
113
|
+
columns : list-like, optional (default: all columns)
|
|
114
|
+
List of column names to import from the data source. Column names must
|
|
115
|
+
exactly match the names in the data source, and will be returned in
|
|
116
|
+
the order they occur in the data source. To avoid reading any columns,
|
|
117
|
+
pass an empty list-like. If combined with ``where`` parameter, must
|
|
118
|
+
include columns referenced in the ``where`` expression or the data may
|
|
119
|
+
not be correctly read; the data source may return empty results or
|
|
120
|
+
raise an exception (behavior varies by driver).
|
|
121
|
+
read_geometry : bool, optional (default: True)
|
|
122
|
+
If True, will read geometry into a GeoSeries. If False, a Pandas DataFrame
|
|
123
|
+
will be returned instead.
|
|
124
|
+
force_2d : bool, optional (default: False)
|
|
125
|
+
If the geometry has Z values, setting this to True will cause those to
|
|
126
|
+
be ignored and 2D geometries to be returned
|
|
127
|
+
skip_features : int, optional (default: 0)
|
|
128
|
+
Number of features to skip from the beginning of the file before
|
|
129
|
+
returning features. If greater than available number of features, an
|
|
130
|
+
empty DataFrame will be returned. Using this parameter may incur
|
|
131
|
+
significant overhead if the driver does not support the capability to
|
|
132
|
+
randomly seek to a specific feature, because it will need to iterate
|
|
133
|
+
over all prior features.
|
|
134
|
+
max_features : int, optional (default: None)
|
|
135
|
+
Number of features to read from the file.
|
|
136
|
+
where : str, optional (default: None)
|
|
137
|
+
Where clause to filter features in layer by attribute values. If the data source
|
|
138
|
+
natively supports SQL, its specific SQL dialect should be used (eg. SQLite and
|
|
139
|
+
GeoPackage: `SQLITE`_, PostgreSQL). If it doesn't, the `OGRSQL WHERE`_ syntax
|
|
140
|
+
should be used. Note that it is not possible to overrule the SQL dialect, this
|
|
141
|
+
is only possible when you use the ``sql`` parameter.
|
|
142
|
+
Examples: ``"ISO_A3 = 'CAN'"``, ``"POP_EST > 10000000 AND POP_EST < 100000000"``
|
|
143
|
+
bbox : tuple of (xmin, ymin, xmax, ymax) (default: None)
|
|
144
|
+
If present, will be used to filter records whose geometry intersects this
|
|
145
|
+
box. This must be in the same CRS as the dataset. If GEOS is present
|
|
146
|
+
and used by GDAL, only geometries that intersect this bbox will be
|
|
147
|
+
returned; if GEOS is not available or not used by GDAL, all geometries
|
|
148
|
+
with bounding boxes that intersect this bbox will be returned.
|
|
149
|
+
Cannot be combined with ``mask`` keyword.
|
|
150
|
+
mask : Shapely geometry, optional (default: None)
|
|
151
|
+
If present, will be used to filter records whose geometry intersects
|
|
152
|
+
this geometry. This must be in the same CRS as the dataset. If GEOS is
|
|
153
|
+
present and used by GDAL, only geometries that intersect this geometry
|
|
154
|
+
will be returned; if GEOS is not available or not used by GDAL, all
|
|
155
|
+
geometries with bounding boxes that intersect the bounding box of this
|
|
156
|
+
geometry will be returned. Requires Shapely >= 2.0.
|
|
157
|
+
Cannot be combined with ``bbox`` keyword.
|
|
158
|
+
fids : array-like, optional (default: None)
|
|
159
|
+
Array of integer feature id (FID) values to select. Cannot be combined
|
|
160
|
+
with other keywords to select a subset (``skip_features``,
|
|
161
|
+
``max_features``, ``where``, ``bbox``, ``mask``, or ``sql``). Note that
|
|
162
|
+
the starting index is driver and file specific (e.g. typically 0 for
|
|
163
|
+
Shapefile and 1 for GeoPackage, but can still depend on the specific
|
|
164
|
+
file). The performance of reading a large number of features usings FIDs
|
|
165
|
+
is also driver specific and depends on the value of ``use_arrow``. The order
|
|
166
|
+
of the rows returned is undefined. If you would like to sort based on FID, use
|
|
167
|
+
``fid_as_index=True`` to have the index of the GeoDataFrame returned set to the
|
|
168
|
+
FIDs of the features read. If ``use_arrow=True``, the number of FIDs is limited
|
|
169
|
+
to 4997 for drivers with 'OGRSQL' as default SQL dialect. To read a larger
|
|
170
|
+
number of FIDs, set ``user_arrow=False``.
|
|
171
|
+
sql : str, optional (default: None)
|
|
172
|
+
The SQL statement to execute. Look at the sql_dialect parameter for more
|
|
173
|
+
information on the syntax to use for the query. When combined with other
|
|
174
|
+
keywords like ``columns``, ``skip_features``, ``max_features``,
|
|
175
|
+
``where``, ``bbox``, or ``mask``, those are applied after the SQL query.
|
|
176
|
+
Be aware that this can have an impact on performance, (e.g. filtering
|
|
177
|
+
with the ``bbox`` or ``mask`` keywords may not use spatial indexes).
|
|
178
|
+
Cannot be combined with the ``layer`` or ``fids`` keywords.
|
|
179
|
+
sql_dialect : str, optional (default: None)
|
|
180
|
+
The SQL dialect the SQL statement is written in. Possible values:
|
|
181
|
+
|
|
182
|
+
- **None**: if the data source natively supports SQL, its specific SQL dialect
|
|
183
|
+
will be used by default (eg. SQLite and Geopackage: `SQLITE`_, PostgreSQL).
|
|
184
|
+
If the data source doesn't natively support SQL, the `OGRSQL`_ dialect is
|
|
185
|
+
the default.
|
|
186
|
+
- '`OGRSQL`_': can be used on any data source. Performance can suffer
|
|
187
|
+
when used on data sources with native support for SQL.
|
|
188
|
+
- '`SQLITE`_': can be used on any data source. All spatialite_
|
|
189
|
+
functions can be used. Performance can suffer on data sources with
|
|
190
|
+
native support for SQL, except for Geopackage and SQLite as this is
|
|
191
|
+
their native SQL dialect.
|
|
192
|
+
|
|
193
|
+
fid_as_index : bool, optional (default: False)
|
|
194
|
+
If True, will use the FIDs of the features that were read as the
|
|
195
|
+
index of the GeoDataFrame. May start at 0 or 1 depending on the driver.
|
|
196
|
+
use_arrow : bool, optional (default: False)
|
|
197
|
+
Whether to use Arrow as the transfer mechanism of the read data
|
|
198
|
+
from GDAL to Python (requires GDAL >= 3.6 and `pyarrow` to be
|
|
199
|
+
installed). When enabled, this provides a further speed-up.
|
|
200
|
+
Defaults to False, but this default can also be globally overridden
|
|
201
|
+
by setting the ``PYOGRIO_USE_ARROW=1`` environment variable.
|
|
202
|
+
on_invalid : str, optional (default: "raise")
|
|
203
|
+
The action to take when an invalid geometry is encountered. Possible
|
|
204
|
+
values:
|
|
205
|
+
|
|
206
|
+
- **raise**: an exception will be raised if a WKB input geometry is
|
|
207
|
+
invalid.
|
|
208
|
+
- **warn**: invalid WKB geometries will be returned as ``None`` and a
|
|
209
|
+
warning will be raised.
|
|
210
|
+
- **ignore**: invalid WKB geometries will be returned as ``None``
|
|
211
|
+
without a warning.
|
|
212
|
+
|
|
213
|
+
arrow_to_pandas_kwargs : dict, optional (default: None)
|
|
214
|
+
When `use_arrow` is True, these kwargs will be passed to the `to_pandas`_
|
|
215
|
+
call for the arrow to pandas conversion.
|
|
216
|
+
**kwargs
|
|
217
|
+
Additional driver-specific dataset open options passed to OGR. Invalid
|
|
218
|
+
options will trigger a warning.
|
|
219
|
+
|
|
220
|
+
Returns
|
|
221
|
+
-------
|
|
222
|
+
GeoDataFrame or DataFrame (if no geometry is present)
|
|
223
|
+
|
|
224
|
+
.. _OGRSQL:
|
|
225
|
+
|
|
226
|
+
https://gdal.org/user/ogr_sql_dialect.html#ogr-sql-dialect
|
|
227
|
+
|
|
228
|
+
.. _OGRSQL WHERE:
|
|
229
|
+
|
|
230
|
+
https://gdal.org/user/ogr_sql_dialect.html#where
|
|
231
|
+
|
|
232
|
+
.. _SQLITE:
|
|
233
|
+
|
|
234
|
+
https://gdal.org/user/sql_sqlite_dialect.html#sql-sqlite-dialect
|
|
235
|
+
|
|
236
|
+
.. _spatialite:
|
|
237
|
+
|
|
238
|
+
https://www.gaia-gis.it/gaia-sins/spatialite-sql-latest.html
|
|
239
|
+
|
|
240
|
+
.. _to_pandas:
|
|
241
|
+
|
|
242
|
+
https://arrow.apache.org/docs/python/generated/pyarrow.Table.html#pyarrow.Table.to_pandas
|
|
243
|
+
|
|
244
|
+
"""
|
|
245
|
+
if not HAS_GEOPANDAS:
|
|
246
|
+
raise ImportError("geopandas is required to use pyogrio.read_dataframe()")
|
|
247
|
+
|
|
248
|
+
import geopandas as gp
|
|
249
|
+
import pandas as pd
|
|
250
|
+
|
|
251
|
+
import shapely # if geopandas is present, shapely is expected to be present
|
|
252
|
+
|
|
253
|
+
path_or_buffer = _stringify_path(path_or_buffer)
|
|
254
|
+
|
|
255
|
+
if use_arrow is None:
|
|
256
|
+
use_arrow = bool(int(os.environ.get("PYOGRIO_USE_ARROW", "0")))
|
|
257
|
+
|
|
258
|
+
read_func = read_arrow if use_arrow else read
|
|
259
|
+
gdal_force_2d = False if use_arrow else force_2d
|
|
260
|
+
if not use_arrow:
|
|
261
|
+
# For arrow, datetimes are read as is.
|
|
262
|
+
# For numpy IO, datetimes are read as string values to preserve timezone info
|
|
263
|
+
# as numpy does not directly support timezones.
|
|
264
|
+
kwargs["datetime_as_string"] = True
|
|
265
|
+
result = read_func(
|
|
266
|
+
path_or_buffer,
|
|
267
|
+
layer=layer,
|
|
268
|
+
encoding=encoding,
|
|
269
|
+
columns=columns,
|
|
270
|
+
read_geometry=read_geometry,
|
|
271
|
+
force_2d=gdal_force_2d,
|
|
272
|
+
skip_features=skip_features,
|
|
273
|
+
max_features=max_features,
|
|
274
|
+
where=where,
|
|
275
|
+
bbox=bbox,
|
|
276
|
+
mask=mask,
|
|
277
|
+
fids=fids,
|
|
278
|
+
sql=sql,
|
|
279
|
+
sql_dialect=sql_dialect,
|
|
280
|
+
return_fids=fid_as_index,
|
|
281
|
+
**kwargs,
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
if use_arrow:
|
|
285
|
+
meta, table = result
|
|
286
|
+
|
|
287
|
+
# split_blocks and self_destruct decrease memory usage, but have as side effect
|
|
288
|
+
# that accessing table afterwards causes crash, so del table to avoid.
|
|
289
|
+
kwargs = {"self_destruct": True}
|
|
290
|
+
if arrow_to_pandas_kwargs is not None:
|
|
291
|
+
kwargs.update(arrow_to_pandas_kwargs)
|
|
292
|
+
df = table.to_pandas(**kwargs)
|
|
293
|
+
del table
|
|
294
|
+
|
|
295
|
+
if fid_as_index:
|
|
296
|
+
df = df.set_index(meta["fid_column"])
|
|
297
|
+
df.index.names = ["fid"]
|
|
298
|
+
|
|
299
|
+
geometry_name = meta["geometry_name"] or "wkb_geometry"
|
|
300
|
+
if not fid_as_index and len(df.columns) == 0:
|
|
301
|
+
# Index not asked, no geometry column and no attribute columns: return empty
|
|
302
|
+
return pd.DataFrame()
|
|
303
|
+
elif geometry_name in df.columns:
|
|
304
|
+
wkb_values = df.pop(geometry_name)
|
|
305
|
+
if PANDAS_GE_15 and wkb_values.dtype != object:
|
|
306
|
+
# for example ArrowDtype will otherwise create numpy array with pd.NA
|
|
307
|
+
wkb_values = wkb_values.to_numpy(na_value=None)
|
|
308
|
+
df["geometry"] = shapely.from_wkb(wkb_values, on_invalid=on_invalid)
|
|
309
|
+
if force_2d:
|
|
310
|
+
df["geometry"] = shapely.force_2d(df["geometry"])
|
|
311
|
+
return gp.GeoDataFrame(df, geometry="geometry", crs=meta["crs"])
|
|
312
|
+
else:
|
|
313
|
+
return df
|
|
314
|
+
|
|
315
|
+
meta, index, geometry, field_data = result
|
|
316
|
+
|
|
317
|
+
columns = meta["fields"].tolist()
|
|
318
|
+
data = {columns[i]: field_data[i] for i in range(len(columns))}
|
|
319
|
+
if fid_as_index:
|
|
320
|
+
index = pd.Index(index, name="fid")
|
|
321
|
+
else:
|
|
322
|
+
index = None
|
|
323
|
+
df = pd.DataFrame(data, columns=columns, index=index)
|
|
324
|
+
for dtype, c in zip(meta["dtypes"], df.columns):
|
|
325
|
+
if dtype.startswith("datetime"):
|
|
326
|
+
df[c] = _try_parse_datetime(df[c])
|
|
327
|
+
|
|
328
|
+
if geometry is None or not read_geometry:
|
|
329
|
+
return df
|
|
330
|
+
|
|
331
|
+
geometry = shapely.from_wkb(geometry, on_invalid=on_invalid)
|
|
332
|
+
|
|
333
|
+
return gp.GeoDataFrame(df, geometry=geometry, crs=meta["crs"])
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
# TODO: handle index properly
|
|
337
|
+
def write_dataframe(
|
|
338
|
+
df,
|
|
339
|
+
path,
|
|
340
|
+
layer=None,
|
|
341
|
+
driver=None,
|
|
342
|
+
encoding=None,
|
|
343
|
+
geometry_type=None,
|
|
344
|
+
promote_to_multi=None,
|
|
345
|
+
nan_as_null=True,
|
|
346
|
+
append=False,
|
|
347
|
+
use_arrow=None,
|
|
348
|
+
dataset_metadata=None,
|
|
349
|
+
layer_metadata=None,
|
|
350
|
+
metadata=None,
|
|
351
|
+
dataset_options=None,
|
|
352
|
+
layer_options=None,
|
|
353
|
+
**kwargs,
|
|
354
|
+
):
|
|
355
|
+
"""Write GeoPandas GeoDataFrame to an OGR file format.
|
|
356
|
+
|
|
357
|
+
Parameters
|
|
358
|
+
----------
|
|
359
|
+
df : GeoDataFrame or DataFrame
|
|
360
|
+
The data to write. For attribute columns of the "object" dtype,
|
|
361
|
+
all values will be converted to strings to be written to the
|
|
362
|
+
output file, except None and np.nan, which will be set to NULL
|
|
363
|
+
in the output file.
|
|
364
|
+
path : str or io.BytesIO
|
|
365
|
+
path to output file on writeable file system or an io.BytesIO object to
|
|
366
|
+
allow writing to memory. Will raise NotImplementedError if an open file
|
|
367
|
+
handle is passed; use BytesIO instead.
|
|
368
|
+
NOTE: support for writing to memory is limited to specific drivers.
|
|
369
|
+
layer : str, optional (default: None)
|
|
370
|
+
layer name to create. If writing to memory and layer name is not
|
|
371
|
+
provided, it layer name will be set to a UUID4 value.
|
|
372
|
+
driver : string, optional (default: None)
|
|
373
|
+
The OGR format driver used to write the vector file. By default attempts
|
|
374
|
+
to infer driver from path. Must be provided to write to memory.
|
|
375
|
+
encoding : str, optional (default: None)
|
|
376
|
+
If present, will be used as the encoding for writing string values to
|
|
377
|
+
the file. Use with caution, only certain drivers support encodings
|
|
378
|
+
other than UTF-8.
|
|
379
|
+
geometry_type : string, optional (default: None)
|
|
380
|
+
By default, the geometry type of the layer will be inferred from the
|
|
381
|
+
data, after applying the promote_to_multi logic. If the data only contains a
|
|
382
|
+
single geometry type (after applying the logic of promote_to_multi), this type
|
|
383
|
+
is used for the layer. If the data (still) contains mixed geometry types, the
|
|
384
|
+
output layer geometry type will be set to "Unknown".
|
|
385
|
+
|
|
386
|
+
This parameter does not modify the geometry, but it will try to force the layer
|
|
387
|
+
type of the output file to this value. Use this parameter with caution because
|
|
388
|
+
using a non-default layer geometry type may result in errors when writing the
|
|
389
|
+
file, may be ignored by the driver, or may result in invalid files. Possible
|
|
390
|
+
values are: "Unknown", "Point", "LineString", "Polygon", "MultiPoint",
|
|
391
|
+
"MultiLineString", "MultiPolygon" or "GeometryCollection".
|
|
392
|
+
promote_to_multi : bool, optional (default: None)
|
|
393
|
+
If True, will convert singular geometry types in the data to their
|
|
394
|
+
corresponding multi geometry type for writing. By default, will convert
|
|
395
|
+
mixed singular and multi geometry types to multi geometry types for drivers
|
|
396
|
+
that do not support mixed singular and multi geometry types. If False, geometry
|
|
397
|
+
types will not be promoted, which may result in errors or invalid files when
|
|
398
|
+
attempting to write mixed singular and multi geometry types to drivers that do
|
|
399
|
+
not support such combinations.
|
|
400
|
+
nan_as_null : bool, default True
|
|
401
|
+
For floating point columns (float32 / float64), whether NaN values are
|
|
402
|
+
written as "null" (missing value). Defaults to True because in pandas
|
|
403
|
+
NaNs are typically used as missing value. Note that when set to False,
|
|
404
|
+
behaviour is format specific: some formats don't support NaNs by
|
|
405
|
+
default (e.g. GeoJSON will skip this property) or might treat them as
|
|
406
|
+
null anyway (e.g. GeoPackage).
|
|
407
|
+
append : bool, optional (default: False)
|
|
408
|
+
If True, the data source specified by path already exists, and the
|
|
409
|
+
driver supports appending to an existing data source, will cause the
|
|
410
|
+
data to be appended to the existing records in the data source. Not
|
|
411
|
+
supported for writing to in-memory files.
|
|
412
|
+
NOTE: append support is limited to specific drivers and GDAL versions.
|
|
413
|
+
use_arrow : bool, optional (default: False)
|
|
414
|
+
Whether to use Arrow as the transfer mechanism of the data to write
|
|
415
|
+
from Python to GDAL (requires GDAL >= 3.8 and `pyarrow` to be
|
|
416
|
+
installed). When enabled, this provides a further speed-up.
|
|
417
|
+
Defaults to False, but this default can also be globally overridden
|
|
418
|
+
by setting the ``PYOGRIO_USE_ARROW=1`` environment variable.
|
|
419
|
+
Using Arrow does not support writing an object-dtype column with
|
|
420
|
+
mixed types.
|
|
421
|
+
dataset_metadata : dict, optional (default: None)
|
|
422
|
+
Metadata to be stored at the dataset level in the output file; limited
|
|
423
|
+
to drivers that support writing metadata, such as GPKG, and silently
|
|
424
|
+
ignored otherwise. Keys and values must be strings.
|
|
425
|
+
layer_metadata : dict, optional (default: None)
|
|
426
|
+
Metadata to be stored at the layer level in the output file; limited to
|
|
427
|
+
drivers that support writing metadata, such as GPKG, and silently
|
|
428
|
+
ignored otherwise. Keys and values must be strings.
|
|
429
|
+
metadata : dict, optional (default: None)
|
|
430
|
+
alias of layer_metadata
|
|
431
|
+
dataset_options : dict, optional
|
|
432
|
+
Dataset creation options (format specific) passed to OGR. Specify as
|
|
433
|
+
a key-value dictionary.
|
|
434
|
+
layer_options : dict, optional
|
|
435
|
+
Layer creation options (format specific) passed to OGR. Specify as
|
|
436
|
+
a key-value dictionary.
|
|
437
|
+
**kwargs
|
|
438
|
+
Additional driver-specific dataset or layer creation options passed
|
|
439
|
+
to OGR. pyogrio will attempt to automatically pass those keywords
|
|
440
|
+
either as dataset or as layer creation option based on the known
|
|
441
|
+
options for the specific driver. Alternatively, you can use the
|
|
442
|
+
explicit `dataset_options` or `layer_options` keywords to manually
|
|
443
|
+
do this (for example if an option exists as both dataset and layer
|
|
444
|
+
option).
|
|
445
|
+
|
|
446
|
+
"""
|
|
447
|
+
# TODO: add examples to the docstring (e.g. OGR kwargs)
|
|
448
|
+
|
|
449
|
+
if not HAS_GEOPANDAS:
|
|
450
|
+
raise ImportError("geopandas is required to use pyogrio.write_dataframe()")
|
|
451
|
+
|
|
452
|
+
import pandas as pd
|
|
453
|
+
from geopandas.array import to_wkb
|
|
454
|
+
|
|
455
|
+
if not isinstance(df, pd.DataFrame):
|
|
456
|
+
raise ValueError("'df' must be a DataFrame or GeoDataFrame")
|
|
457
|
+
|
|
458
|
+
if use_arrow is None:
|
|
459
|
+
use_arrow = bool(int(os.environ.get("PYOGRIO_USE_ARROW", "0")))
|
|
460
|
+
path, driver = _get_write_path_driver(path, driver, append=append)
|
|
461
|
+
|
|
462
|
+
geometry_columns = df.columns[df.dtypes == "geometry"]
|
|
463
|
+
if len(geometry_columns) > 1:
|
|
464
|
+
raise ValueError(
|
|
465
|
+
"'df' must have only one geometry column. "
|
|
466
|
+
"Multiple geometry columns are not supported for output using OGR."
|
|
467
|
+
)
|
|
468
|
+
|
|
469
|
+
if len(geometry_columns) > 0:
|
|
470
|
+
geometry_column = geometry_columns[0]
|
|
471
|
+
geometry = df[geometry_column]
|
|
472
|
+
fields = [c for c in df.columns if not c == geometry_column]
|
|
473
|
+
else:
|
|
474
|
+
geometry_column = None
|
|
475
|
+
geometry = None
|
|
476
|
+
fields = list(df.columns)
|
|
477
|
+
|
|
478
|
+
# TODO: may need to fill in pd.NA, etc
|
|
479
|
+
field_data = []
|
|
480
|
+
field_mask = []
|
|
481
|
+
# dict[str, np.array(int)] special case for dt-tz fields
|
|
482
|
+
gdal_tz_offsets = {}
|
|
483
|
+
for name in fields:
|
|
484
|
+
col = df[name]
|
|
485
|
+
if isinstance(col.dtype, pd.DatetimeTZDtype):
|
|
486
|
+
# Deal with datetimes with timezones by passing down timezone separately
|
|
487
|
+
# pass down naive datetime
|
|
488
|
+
naive = col.dt.tz_localize(None)
|
|
489
|
+
values = naive.values
|
|
490
|
+
# compute offset relative to UTC explicitly
|
|
491
|
+
tz_offset = naive - col.dt.tz_convert("UTC").dt.tz_localize(None)
|
|
492
|
+
# Convert to GDAL timezone offset representation.
|
|
493
|
+
# GMT is represented as 100 and offsets are represented by adding /
|
|
494
|
+
# subtracting 1 for every 15 minutes different from GMT.
|
|
495
|
+
# https://gdal.org/development/rfc/rfc56_millisecond_precision.html#core-changes
|
|
496
|
+
# Convert each row offset to a signed multiple of 15m and add to GMT value
|
|
497
|
+
gdal_offset_representation = tz_offset // pd.Timedelta("15m") + 100
|
|
498
|
+
gdal_tz_offsets[name] = gdal_offset_representation.values
|
|
499
|
+
else:
|
|
500
|
+
values = col.values
|
|
501
|
+
if isinstance(values, pd.api.extensions.ExtensionArray):
|
|
502
|
+
from pandas.arrays import BooleanArray, FloatingArray, IntegerArray
|
|
503
|
+
|
|
504
|
+
if isinstance(values, (IntegerArray, FloatingArray, BooleanArray)):
|
|
505
|
+
field_data.append(values._data)
|
|
506
|
+
field_mask.append(values._mask)
|
|
507
|
+
else:
|
|
508
|
+
field_data.append(np.asarray(values))
|
|
509
|
+
field_mask.append(np.asarray(values.isna()))
|
|
510
|
+
else:
|
|
511
|
+
field_data.append(values)
|
|
512
|
+
field_mask.append(None)
|
|
513
|
+
|
|
514
|
+
# Determine geometry_type and/or promote_to_multi
|
|
515
|
+
if geometry_column is not None:
|
|
516
|
+
geometry_types_all = geometry.geom_type
|
|
517
|
+
|
|
518
|
+
if geometry_column is not None and (
|
|
519
|
+
geometry_type is None or promote_to_multi is None
|
|
520
|
+
):
|
|
521
|
+
tmp_geometry_type = "Unknown"
|
|
522
|
+
has_z = False
|
|
523
|
+
|
|
524
|
+
# If there is data, infer layer geometry type + promote_to_multi
|
|
525
|
+
if not df.empty:
|
|
526
|
+
# None/Empty geometries sometimes report as Z incorrectly, so ignore them
|
|
527
|
+
with warnings.catch_warnings():
|
|
528
|
+
warnings.filterwarnings("ignore", r"GeoSeries\.notna", UserWarning)
|
|
529
|
+
geometry_notna = geometry.notna()
|
|
530
|
+
has_z_arr = geometry[geometry_notna & (~geometry.is_empty)].has_z
|
|
531
|
+
has_z = has_z_arr.any()
|
|
532
|
+
all_z = has_z_arr.all()
|
|
533
|
+
|
|
534
|
+
if driver in DRIVERS_NO_MIXED_DIMENSIONS and has_z and not all_z:
|
|
535
|
+
raise DataSourceError(
|
|
536
|
+
f"Mixed 2D and 3D coordinates are not supported by {driver}"
|
|
537
|
+
)
|
|
538
|
+
|
|
539
|
+
geometry_types = pd.Series(geometry_types_all.unique()).dropna().values
|
|
540
|
+
if len(geometry_types) == 1:
|
|
541
|
+
tmp_geometry_type = geometry_types[0]
|
|
542
|
+
if promote_to_multi and tmp_geometry_type in (
|
|
543
|
+
"Point",
|
|
544
|
+
"LineString",
|
|
545
|
+
"Polygon",
|
|
546
|
+
):
|
|
547
|
+
tmp_geometry_type = f"Multi{tmp_geometry_type}"
|
|
548
|
+
elif len(geometry_types) == 2:
|
|
549
|
+
# Check if the types are corresponding multi + single types
|
|
550
|
+
if "Polygon" in geometry_types and "MultiPolygon" in geometry_types:
|
|
551
|
+
multi_type = "MultiPolygon"
|
|
552
|
+
elif (
|
|
553
|
+
"LineString" in geometry_types
|
|
554
|
+
and "MultiLineString" in geometry_types
|
|
555
|
+
):
|
|
556
|
+
multi_type = "MultiLineString"
|
|
557
|
+
elif "Point" in geometry_types and "MultiPoint" in geometry_types:
|
|
558
|
+
multi_type = "MultiPoint"
|
|
559
|
+
else:
|
|
560
|
+
multi_type = None
|
|
561
|
+
|
|
562
|
+
# If they are corresponding multi + single types
|
|
563
|
+
if multi_type is not None:
|
|
564
|
+
if (
|
|
565
|
+
promote_to_multi is None
|
|
566
|
+
and driver in DRIVERS_NO_MIXED_SINGLE_MULTI
|
|
567
|
+
):
|
|
568
|
+
promote_to_multi = True
|
|
569
|
+
if promote_to_multi:
|
|
570
|
+
tmp_geometry_type = multi_type
|
|
571
|
+
|
|
572
|
+
if geometry_type is None:
|
|
573
|
+
geometry_type = tmp_geometry_type
|
|
574
|
+
if has_z and geometry_type != "Unknown":
|
|
575
|
+
geometry_type = f"{geometry_type} Z"
|
|
576
|
+
|
|
577
|
+
crs = None
|
|
578
|
+
if geometry_column is not None and geometry.crs:
|
|
579
|
+
# TODO: this may need to be WKT1, due to issues
|
|
580
|
+
# if possible use EPSG codes instead
|
|
581
|
+
epsg = geometry.crs.to_epsg()
|
|
582
|
+
if epsg:
|
|
583
|
+
crs = f"EPSG:{epsg}"
|
|
584
|
+
else:
|
|
585
|
+
crs = geometry.crs.to_wkt("WKT1_GDAL")
|
|
586
|
+
|
|
587
|
+
if use_arrow:
|
|
588
|
+
import pyarrow as pa
|
|
589
|
+
|
|
590
|
+
from pyogrio.raw import write_arrow
|
|
591
|
+
|
|
592
|
+
if geometry_column is not None:
|
|
593
|
+
# Convert to multi type
|
|
594
|
+
if promote_to_multi:
|
|
595
|
+
import shapely
|
|
596
|
+
|
|
597
|
+
mask_points = geometry_types_all == "Point"
|
|
598
|
+
mask_linestrings = geometry_types_all == "LineString"
|
|
599
|
+
mask_polygons = geometry_types_all == "Polygon"
|
|
600
|
+
|
|
601
|
+
if mask_points.any():
|
|
602
|
+
geometry[mask_points] = shapely.multipoints(
|
|
603
|
+
np.atleast_2d(geometry[mask_points]), axis=0
|
|
604
|
+
)
|
|
605
|
+
|
|
606
|
+
if mask_linestrings.any():
|
|
607
|
+
geometry[mask_linestrings] = shapely.multilinestrings(
|
|
608
|
+
np.atleast_2d(geometry[mask_linestrings]), axis=0
|
|
609
|
+
)
|
|
610
|
+
|
|
611
|
+
if mask_polygons.any():
|
|
612
|
+
geometry[mask_polygons] = shapely.multipolygons(
|
|
613
|
+
np.atleast_2d(geometry[mask_polygons]), axis=0
|
|
614
|
+
)
|
|
615
|
+
|
|
616
|
+
geometry = to_wkb(geometry.values)
|
|
617
|
+
df = df.copy(deep=False)
|
|
618
|
+
# convert to plain DataFrame to avoid warning from geopandas about
|
|
619
|
+
# writing non-geometries to the geometry column
|
|
620
|
+
df = pd.DataFrame(df, copy=False)
|
|
621
|
+
df[geometry_column] = geometry
|
|
622
|
+
|
|
623
|
+
table = pa.Table.from_pandas(df, preserve_index=False)
|
|
624
|
+
|
|
625
|
+
if geometry_column is not None:
|
|
626
|
+
# ensure that the geometry column is binary (for all-null geometries,
|
|
627
|
+
# this could be a wrong type)
|
|
628
|
+
geom_field = table.schema.field(geometry_column)
|
|
629
|
+
if not (
|
|
630
|
+
pa.types.is_binary(geom_field.type)
|
|
631
|
+
or pa.types.is_large_binary(geom_field.type)
|
|
632
|
+
):
|
|
633
|
+
table = table.set_column(
|
|
634
|
+
table.schema.get_field_index(geometry_column),
|
|
635
|
+
geom_field.with_type(pa.binary()),
|
|
636
|
+
table[geometry_column].cast(pa.binary()),
|
|
637
|
+
)
|
|
638
|
+
|
|
639
|
+
write_arrow(
|
|
640
|
+
table,
|
|
641
|
+
path,
|
|
642
|
+
layer=layer,
|
|
643
|
+
driver=driver,
|
|
644
|
+
geometry_name=geometry_column,
|
|
645
|
+
geometry_type=geometry_type,
|
|
646
|
+
crs=crs,
|
|
647
|
+
encoding=encoding,
|
|
648
|
+
append=append,
|
|
649
|
+
dataset_metadata=dataset_metadata,
|
|
650
|
+
layer_metadata=layer_metadata,
|
|
651
|
+
metadata=metadata,
|
|
652
|
+
dataset_options=dataset_options,
|
|
653
|
+
layer_options=layer_options,
|
|
654
|
+
**kwargs,
|
|
655
|
+
)
|
|
656
|
+
return
|
|
657
|
+
|
|
658
|
+
# If there is geometry data, prepare it to be written
|
|
659
|
+
if geometry_column is not None:
|
|
660
|
+
geometry = to_wkb(geometry.values)
|
|
661
|
+
|
|
662
|
+
write(
|
|
663
|
+
path,
|
|
664
|
+
layer=layer,
|
|
665
|
+
driver=driver,
|
|
666
|
+
geometry=geometry,
|
|
667
|
+
field_data=field_data,
|
|
668
|
+
field_mask=field_mask,
|
|
669
|
+
fields=fields,
|
|
670
|
+
crs=crs,
|
|
671
|
+
geometry_type=geometry_type,
|
|
672
|
+
encoding=encoding,
|
|
673
|
+
promote_to_multi=promote_to_multi,
|
|
674
|
+
nan_as_null=nan_as_null,
|
|
675
|
+
append=append,
|
|
676
|
+
dataset_metadata=dataset_metadata,
|
|
677
|
+
layer_metadata=layer_metadata,
|
|
678
|
+
metadata=metadata,
|
|
679
|
+
dataset_options=dataset_options,
|
|
680
|
+
layer_options=layer_options,
|
|
681
|
+
gdal_tz_offsets=gdal_tz_offsets,
|
|
682
|
+
**kwargs,
|
|
683
|
+
)
|