pyogrio 0.12.0__cp314-cp314t-macosx_12_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyogrio/.dylibs/libgdal.37.3.11.4.dylib +0 -0
- pyogrio/__init__.py +57 -0
- pyogrio/_compat.py +54 -0
- pyogrio/_env.py +59 -0
- pyogrio/_err.cpython-314t-darwin.so +0 -0
- pyogrio/_geometry.cpython-314t-darwin.so +0 -0
- pyogrio/_io.cpython-314t-darwin.so +0 -0
- pyogrio/_ogr.cpython-314t-darwin.so +0 -0
- pyogrio/_version.py +21 -0
- pyogrio/_vsi.cpython-314t-darwin.so +0 -0
- pyogrio/core.py +387 -0
- pyogrio/errors.py +25 -0
- pyogrio/gdal_data/GDAL-targets-release.cmake +19 -0
- pyogrio/gdal_data/GDAL-targets.cmake +106 -0
- pyogrio/gdal_data/GDALConfig.cmake +24 -0
- pyogrio/gdal_data/GDALConfigVersion.cmake +65 -0
- pyogrio/gdal_data/GDALLogoBW.svg +138 -0
- pyogrio/gdal_data/GDALLogoColor.svg +126 -0
- pyogrio/gdal_data/GDALLogoGS.svg +126 -0
- pyogrio/gdal_data/LICENSE.TXT +467 -0
- pyogrio/gdal_data/MM_m_idofic.csv +321 -0
- pyogrio/gdal_data/copyright +467 -0
- pyogrio/gdal_data/cubewerx_extra.wkt +48 -0
- pyogrio/gdal_data/default.rsc +0 -0
- pyogrio/gdal_data/ecw_cs.wkt +1453 -0
- pyogrio/gdal_data/eedaconf.json +23 -0
- pyogrio/gdal_data/epsg.wkt +1 -0
- pyogrio/gdal_data/esri_StatePlane_extra.wkt +631 -0
- pyogrio/gdal_data/gdal_algorithm.schema.json +220 -0
- pyogrio/gdal_data/gdalg.schema.json +36 -0
- pyogrio/gdal_data/gdalicon.png +0 -0
- pyogrio/gdal_data/gdalinfo_output.schema.json +390 -0
- pyogrio/gdal_data/gdalmdiminfo_output.schema.json +326 -0
- pyogrio/gdal_data/gdaltileindex.xsd +253 -0
- pyogrio/gdal_data/gdalvrt.xsd +927 -0
- pyogrio/gdal_data/gfs.xsd +246 -0
- pyogrio/gdal_data/gml_registry.xml +117 -0
- pyogrio/gdal_data/gml_registry.xsd +66 -0
- pyogrio/gdal_data/grib2_center.csv +251 -0
- pyogrio/gdal_data/grib2_process.csv +102 -0
- pyogrio/gdal_data/grib2_subcenter.csv +63 -0
- pyogrio/gdal_data/grib2_table_4_2_0_0.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_0_1.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_0_13.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_0_14.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_0_15.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_0_16.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_0_17.csv +11 -0
- pyogrio/gdal_data/grib2_table_4_2_0_18.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_0_19.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_0_190.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_0_191.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_0_2.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_0_20.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_0_21.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_0_3.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_0_4.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_0_5.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_0_6.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_0_7.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_10_0.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_10_1.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_10_191.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_10_2.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_10_3.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_10_4.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_1_0.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_1_1.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_1_2.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_20_0.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_20_1.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_20_2.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_2_0.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_2_3.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_2_4.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_2_5.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_2_6.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_3_0.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_3_1.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_3_2.csv +28 -0
- pyogrio/gdal_data/grib2_table_4_2_3_3.csv +8 -0
- pyogrio/gdal_data/grib2_table_4_2_3_4.csv +14 -0
- pyogrio/gdal_data/grib2_table_4_2_3_5.csv +11 -0
- pyogrio/gdal_data/grib2_table_4_2_3_6.csv +11 -0
- pyogrio/gdal_data/grib2_table_4_2_4_0.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_4_1.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_4_10.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_4_2.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_4_3.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_4_4.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_4_5.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_4_6.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_4_7.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_4_8.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_4_9.csv +261 -0
- pyogrio/gdal_data/grib2_table_4_2_local_Canada.csv +5 -0
- pyogrio/gdal_data/grib2_table_4_2_local_HPC.csv +2 -0
- pyogrio/gdal_data/grib2_table_4_2_local_MRMS.csv +175 -0
- pyogrio/gdal_data/grib2_table_4_2_local_NCEP.csv +401 -0
- pyogrio/gdal_data/grib2_table_4_2_local_NDFD.csv +38 -0
- pyogrio/gdal_data/grib2_table_4_2_local_index.csv +7 -0
- pyogrio/gdal_data/grib2_table_4_5.csv +261 -0
- pyogrio/gdal_data/grib2_table_versions.csv +3 -0
- pyogrio/gdal_data/gt_datum.csv +229 -0
- pyogrio/gdal_data/gt_ellips.csv +24 -0
- pyogrio/gdal_data/header.dxf +1124 -0
- pyogrio/gdal_data/inspire_cp_BasicPropertyUnit.gfs +57 -0
- pyogrio/gdal_data/inspire_cp_CadastralBoundary.gfs +60 -0
- pyogrio/gdal_data/inspire_cp_CadastralParcel.gfs +81 -0
- pyogrio/gdal_data/inspire_cp_CadastralZoning.gfs +161 -0
- pyogrio/gdal_data/jpfgdgml_AdmArea.gfs +59 -0
- pyogrio/gdal_data/jpfgdgml_AdmBdry.gfs +49 -0
- pyogrio/gdal_data/jpfgdgml_AdmPt.gfs +59 -0
- pyogrio/gdal_data/jpfgdgml_BldA.gfs +54 -0
- pyogrio/gdal_data/jpfgdgml_BldL.gfs +54 -0
- pyogrio/gdal_data/jpfgdgml_Cntr.gfs +54 -0
- pyogrio/gdal_data/jpfgdgml_CommBdry.gfs +49 -0
- pyogrio/gdal_data/jpfgdgml_CommPt.gfs +59 -0
- pyogrio/gdal_data/jpfgdgml_Cstline.gfs +54 -0
- pyogrio/gdal_data/jpfgdgml_ElevPt.gfs +54 -0
- pyogrio/gdal_data/jpfgdgml_GCP.gfs +94 -0
- pyogrio/gdal_data/jpfgdgml_LeveeEdge.gfs +49 -0
- pyogrio/gdal_data/jpfgdgml_RailCL.gfs +54 -0
- pyogrio/gdal_data/jpfgdgml_RdASL.gfs +44 -0
- pyogrio/gdal_data/jpfgdgml_RdArea.gfs +54 -0
- pyogrio/gdal_data/jpfgdgml_RdCompt.gfs +59 -0
- pyogrio/gdal_data/jpfgdgml_RdEdg.gfs +59 -0
- pyogrio/gdal_data/jpfgdgml_RdMgtBdry.gfs +49 -0
- pyogrio/gdal_data/jpfgdgml_RdSgmtA.gfs +59 -0
- pyogrio/gdal_data/jpfgdgml_RvrMgtBdry.gfs +49 -0
- pyogrio/gdal_data/jpfgdgml_SBAPt.gfs +49 -0
- pyogrio/gdal_data/jpfgdgml_SBArea.gfs +54 -0
- pyogrio/gdal_data/jpfgdgml_SBBdry.gfs +44 -0
- pyogrio/gdal_data/jpfgdgml_WA.gfs +54 -0
- pyogrio/gdal_data/jpfgdgml_WL.gfs +54 -0
- pyogrio/gdal_data/jpfgdgml_WStrA.gfs +54 -0
- pyogrio/gdal_data/jpfgdgml_WStrL.gfs +54 -0
- pyogrio/gdal_data/leaflet_template.html +102 -0
- pyogrio/gdal_data/nitf_spec.xml +3288 -0
- pyogrio/gdal_data/nitf_spec.xsd +171 -0
- pyogrio/gdal_data/ogr_fields_override.schema.json +125 -0
- pyogrio/gdal_data/ogrinfo_output.schema.json +528 -0
- pyogrio/gdal_data/ogrvrt.xsd +528 -0
- pyogrio/gdal_data/osmconf.ini +134 -0
- pyogrio/gdal_data/ozi_datum.csv +131 -0
- pyogrio/gdal_data/ozi_ellips.csv +35 -0
- pyogrio/gdal_data/pci_datum.txt +530 -0
- pyogrio/gdal_data/pci_ellips.txt +129 -0
- pyogrio/gdal_data/pdfcomposition.xsd +703 -0
- pyogrio/gdal_data/pds4_template.xml +65 -0
- pyogrio/gdal_data/plscenesconf.json +1985 -0
- pyogrio/gdal_data/ruian_vf_ob_v1.gfs +1455 -0
- pyogrio/gdal_data/ruian_vf_st_uvoh_v1.gfs +86 -0
- pyogrio/gdal_data/ruian_vf_st_v1.gfs +1489 -0
- pyogrio/gdal_data/ruian_vf_v1.gfs +2126 -0
- pyogrio/gdal_data/s57agencies.csv +249 -0
- pyogrio/gdal_data/s57attributes.csv +484 -0
- pyogrio/gdal_data/s57expectedinput.csv +1008 -0
- pyogrio/gdal_data/s57objectclasses.csv +287 -0
- pyogrio/gdal_data/seed_2d.dgn +0 -0
- pyogrio/gdal_data/seed_3d.dgn +0 -0
- pyogrio/gdal_data/stateplane.csv +259 -0
- pyogrio/gdal_data/template_tiles.mapml +28 -0
- pyogrio/gdal_data/tms_LINZAntarticaMapTileGrid.json +190 -0
- pyogrio/gdal_data/tms_MapML_APSTILE.json +268 -0
- pyogrio/gdal_data/tms_MapML_CBMTILE.json +346 -0
- pyogrio/gdal_data/tms_NZTM2000.json +243 -0
- pyogrio/gdal_data/trailer.dxf +434 -0
- pyogrio/gdal_data/usage +4 -0
- pyogrio/gdal_data/vcpkg-cmake-wrapper.cmake +23 -0
- pyogrio/gdal_data/vcpkg.spdx.json +291 -0
- pyogrio/gdal_data/vcpkg_abi_info.txt +45 -0
- pyogrio/gdal_data/vdv452.xml +349 -0
- pyogrio/gdal_data/vdv452.xsd +45 -0
- pyogrio/gdal_data/vicar.json +164 -0
- pyogrio/geopandas.py +978 -0
- pyogrio/proj_data/CH +22 -0
- pyogrio/proj_data/GL27 +23 -0
- pyogrio/proj_data/ITRF2000 +24 -0
- pyogrio/proj_data/ITRF2008 +94 -0
- pyogrio/proj_data/ITRF2014 +55 -0
- pyogrio/proj_data/ITRF2020 +91 -0
- pyogrio/proj_data/copyright +34 -0
- pyogrio/proj_data/deformation_model.schema.json +582 -0
- pyogrio/proj_data/nad.lst +142 -0
- pyogrio/proj_data/nad27 +810 -0
- pyogrio/proj_data/nad83 +745 -0
- pyogrio/proj_data/other.extra +53 -0
- pyogrio/proj_data/proj-config-version.cmake +44 -0
- pyogrio/proj_data/proj-config.cmake +79 -0
- pyogrio/proj_data/proj-targets-release.cmake +19 -0
- pyogrio/proj_data/proj-targets.cmake +107 -0
- pyogrio/proj_data/proj.db +0 -0
- pyogrio/proj_data/proj.ini +59 -0
- pyogrio/proj_data/proj4-targets-release.cmake +19 -0
- pyogrio/proj_data/proj4-targets.cmake +107 -0
- pyogrio/proj_data/projjson.schema.json +1174 -0
- pyogrio/proj_data/triangulation.schema.json +214 -0
- pyogrio/proj_data/usage +9 -0
- pyogrio/proj_data/vcpkg.spdx.json +203 -0
- pyogrio/proj_data/vcpkg_abi_info.txt +28 -0
- pyogrio/proj_data/world +214 -0
- pyogrio/raw.py +897 -0
- pyogrio/tests/__init__.py +0 -0
- pyogrio/tests/conftest.py +588 -0
- pyogrio/tests/fixtures/README.md +108 -0
- pyogrio/tests/fixtures/curve.gpkg +0 -0
- pyogrio/tests/fixtures/curvepolygon.gpkg +0 -0
- pyogrio/tests/fixtures/line_zm.gpkg +0 -0
- pyogrio/tests/fixtures/list_field_values_file.parquet +0 -0
- pyogrio/tests/fixtures/list_nested_struct_file.parquet +0 -0
- pyogrio/tests/fixtures/multisurface.gpkg +0 -0
- pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.cpg +1 -0
- pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.dbf +0 -0
- pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.prj +1 -0
- pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.shp +0 -0
- pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.shx +0 -0
- pyogrio/tests/fixtures/sample.osm.pbf +0 -0
- pyogrio/tests/fixtures/test_gpkg_nulls.gpkg +0 -0
- pyogrio/tests/test_arrow.py +1160 -0
- pyogrio/tests/test_core.py +702 -0
- pyogrio/tests/test_geopandas_io.py +3218 -0
- pyogrio/tests/test_path.py +374 -0
- pyogrio/tests/test_raw_io.py +1473 -0
- pyogrio/tests/test_util.py +56 -0
- pyogrio/util.py +258 -0
- pyogrio-0.12.0.dist-info/METADATA +125 -0
- pyogrio-0.12.0.dist-info/RECORD +231 -0
- pyogrio-0.12.0.dist-info/WHEEL +6 -0
- pyogrio-0.12.0.dist-info/licenses/LICENSE +21 -0
- pyogrio-0.12.0.dist-info/top_level.txt +1 -0
pyogrio/raw.py
ADDED
|
@@ -0,0 +1,897 @@
|
|
|
1
|
+
"""Low level functions to read and write OGR data sources."""
|
|
2
|
+
|
|
3
|
+
import warnings
|
|
4
|
+
from io import BytesIO
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from pyogrio._compat import HAS_ARROW_WRITE_API, HAS_PYARROW
|
|
8
|
+
from pyogrio._env import GDALEnv
|
|
9
|
+
from pyogrio.core import detect_write_driver
|
|
10
|
+
from pyogrio.errors import DataSourceError
|
|
11
|
+
from pyogrio.util import (
|
|
12
|
+
_mask_to_wkb,
|
|
13
|
+
_preprocess_options_key_value,
|
|
14
|
+
get_vsi_path_or_buffer,
|
|
15
|
+
vsi_path,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
with GDALEnv():
|
|
19
|
+
from pyogrio._io import ogr_open_arrow, ogr_read, ogr_write, ogr_write_arrow
|
|
20
|
+
from pyogrio._ogr import (
|
|
21
|
+
_get_driver_metadata_item,
|
|
22
|
+
get_gdal_version,
|
|
23
|
+
get_gdal_version_string,
|
|
24
|
+
ogr_driver_supports_vsi,
|
|
25
|
+
ogr_driver_supports_write,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
DRIVERS_NO_MIXED_SINGLE_MULTI = {
|
|
30
|
+
"FlatGeobuf",
|
|
31
|
+
"GPKG",
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
DRIVERS_NO_MIXED_DIMENSIONS = {
|
|
35
|
+
"FlatGeobuf",
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def read(
|
|
40
|
+
path_or_buffer,
|
|
41
|
+
/,
|
|
42
|
+
layer=None,
|
|
43
|
+
encoding=None,
|
|
44
|
+
columns=None,
|
|
45
|
+
read_geometry=True,
|
|
46
|
+
force_2d=False,
|
|
47
|
+
skip_features=0,
|
|
48
|
+
max_features=None,
|
|
49
|
+
where=None,
|
|
50
|
+
bbox=None,
|
|
51
|
+
mask=None,
|
|
52
|
+
fids=None,
|
|
53
|
+
sql=None,
|
|
54
|
+
sql_dialect=None,
|
|
55
|
+
return_fids=False,
|
|
56
|
+
datetime_as_string=False,
|
|
57
|
+
**kwargs,
|
|
58
|
+
):
|
|
59
|
+
"""Read OGR data source into numpy arrays.
|
|
60
|
+
|
|
61
|
+
IMPORTANT: non-linear geometry types (e.g., MultiSurface) are converted
|
|
62
|
+
to their linear approximations.
|
|
63
|
+
|
|
64
|
+
Parameters
|
|
65
|
+
----------
|
|
66
|
+
path_or_buffer : pathlib.Path or str, or bytes buffer
|
|
67
|
+
A dataset path or URI, raw buffer, or file-like object with a read method.
|
|
68
|
+
layer : int or str, optional (default: first layer)
|
|
69
|
+
If an integer is provided, it corresponds to the index of the layer
|
|
70
|
+
with the data source. If a string is provided, it must match the name
|
|
71
|
+
of the layer in the data source. Defaults to first layer in data source.
|
|
72
|
+
encoding : str, optional (default: None)
|
|
73
|
+
If present, will be used as the encoding for reading string values from
|
|
74
|
+
the data source. By default will automatically try to detect the native
|
|
75
|
+
encoding and decode to ``UTF-8``.
|
|
76
|
+
columns : list-like, optional (default: all columns)
|
|
77
|
+
List of column names to import from the data source. Column names must
|
|
78
|
+
exactly match the names in the data source, and will be returned in
|
|
79
|
+
the order they occur in the data source. To avoid reading any columns,
|
|
80
|
+
pass an empty list-like. If combined with ``where`` parameter, must
|
|
81
|
+
include columns referenced in the ``where`` expression or the data may
|
|
82
|
+
not be correctly read; the data source may return empty results or
|
|
83
|
+
raise an exception (behavior varies by driver).
|
|
84
|
+
read_geometry : bool, optional (default: True)
|
|
85
|
+
If True, will read geometry into WKB. If False, geometry will be None.
|
|
86
|
+
force_2d : bool, optional (default: False)
|
|
87
|
+
If the geometry has Z values, setting this to True will cause those to
|
|
88
|
+
be ignored and 2D geometries to be returned
|
|
89
|
+
skip_features : int, optional (default: 0)
|
|
90
|
+
Number of features to skip from the beginning of the file before
|
|
91
|
+
returning features. If greater than available number of features, an
|
|
92
|
+
empty DataFrame will be returned. Using this parameter may incur
|
|
93
|
+
significant overhead if the driver does not support the capability to
|
|
94
|
+
randomly seek to a specific feature, because it will need to iterate
|
|
95
|
+
over all prior features.
|
|
96
|
+
max_features : int, optional (default: None)
|
|
97
|
+
Number of features to read from the file.
|
|
98
|
+
where : str, optional (default: None)
|
|
99
|
+
Where clause to filter features in layer by attribute values. If the data source
|
|
100
|
+
natively supports SQL, its specific SQL dialect should be used (eg. SQLite and
|
|
101
|
+
GeoPackage: `SQLITE`_, PostgreSQL). If it doesn't, the `OGRSQL WHERE`_ syntax
|
|
102
|
+
should be used. Note that it is not possible to overrule the SQL dialect, this
|
|
103
|
+
is only possible when you use the SQL parameter.
|
|
104
|
+
Examples: ``"ISO_A3 = 'CAN'"``, ``"POP_EST > 10000000 AND POP_EST < 100000000"``
|
|
105
|
+
bbox : tuple of (xmin, ymin, xmax, ymax), optional (default: None)
|
|
106
|
+
If present, will be used to filter records whose geometry intersects this
|
|
107
|
+
box. This must be in the same CRS as the dataset. If GEOS is present
|
|
108
|
+
and used by GDAL, only geometries that intersect this bbox will be
|
|
109
|
+
returned; if GEOS is not available or not used by GDAL, all geometries
|
|
110
|
+
with bounding boxes that intersect this bbox will be returned.
|
|
111
|
+
Cannot be combined with ``mask`` keyword.
|
|
112
|
+
mask : Shapely geometry, optional (default: None)
|
|
113
|
+
If present, will be used to filter records whose geometry intersects
|
|
114
|
+
this geometry. This must be in the same CRS as the dataset. If GEOS is
|
|
115
|
+
present and used by GDAL, only geometries that intersect this geometry
|
|
116
|
+
will be returned; if GEOS is not available or not used by GDAL, all
|
|
117
|
+
geometries with bounding boxes that intersect the bounding box of this
|
|
118
|
+
geometry will be returned. Requires Shapely >= 2.0.
|
|
119
|
+
Cannot be combined with ``bbox`` keyword.
|
|
120
|
+
fids : array-like, optional (default: None)
|
|
121
|
+
Array of integer feature id (FID) values to select. Cannot be combined
|
|
122
|
+
with other keywords to select a subset (``skip_features``,
|
|
123
|
+
``max_features``, ``where``, ``bbox``, or ``mask``). Note that the
|
|
124
|
+
starting index is driver and file specific (e.g. typically 0 for
|
|
125
|
+
Shapefile and 1 for GeoPackage, but can still depend on the specific
|
|
126
|
+
file). The performance of reading a large number of features usings FIDs
|
|
127
|
+
is also driver specific.
|
|
128
|
+
sql : str, optional (default: None)
|
|
129
|
+
The SQL statement to execute. Look at the sql_dialect parameter for more
|
|
130
|
+
information on the syntax to use for the query. When combined with other
|
|
131
|
+
keywords like ``columns``, ``skip_features``, ``max_features``,
|
|
132
|
+
``where``, ``bbox``, or ``mask``, those are applied after the SQL query.
|
|
133
|
+
Be aware that this can have an impact on performance, (e.g. filtering
|
|
134
|
+
with the ``bbox`` or ``mask`` keywords may not use spatial indexes).
|
|
135
|
+
Cannot be combined with the ``layer`` or ``fids`` keywords.
|
|
136
|
+
sql_dialect : str, optional (default: None)
|
|
137
|
+
The SQL dialect the ``sql`` statement is written in. Possible values:
|
|
138
|
+
|
|
139
|
+
- **None**: if the data source natively supports SQL, its specific SQL dialect
|
|
140
|
+
will be used by default (eg. SQLite and Geopackage: `SQLITE`_, PostgreSQL).
|
|
141
|
+
If the data source doesn't natively support SQL, the `OGRSQL`_ dialect is
|
|
142
|
+
the default.
|
|
143
|
+
- '`OGRSQL`_': can be used on any data source. Performance can suffer
|
|
144
|
+
when used on data sources with native support for SQL.
|
|
145
|
+
- '`SQLITE`_': can be used on any data source. All spatialite_
|
|
146
|
+
functions can be used. Performance can suffer on data sources with
|
|
147
|
+
native support for SQL, except for Geopackage and SQLite as this is
|
|
148
|
+
their native SQL dialect.
|
|
149
|
+
|
|
150
|
+
return_fids : bool, optional (default: False)
|
|
151
|
+
If True, will return the FIDs of the feature that were read.
|
|
152
|
+
datetime_as_string : bool, optional (default: False)
|
|
153
|
+
If True, will return datetime dtypes as detected by GDAL as a string
|
|
154
|
+
array (which can be used to extract time zone info), instead of
|
|
155
|
+
a datetime64 array.
|
|
156
|
+
|
|
157
|
+
**kwargs
|
|
158
|
+
Additional driver-specific dataset open options passed to OGR. Invalid
|
|
159
|
+
options will trigger a warning.
|
|
160
|
+
|
|
161
|
+
Returns
|
|
162
|
+
-------
|
|
163
|
+
(dict, fids, geometry, data fields)
|
|
164
|
+
Returns a tuple of meta information about the data source in a dict,
|
|
165
|
+
an ndarray of FIDs corresponding to the features that were read or None
|
|
166
|
+
(if return_fids is False),
|
|
167
|
+
an ndarray of geometry objects or None (if data source does not include
|
|
168
|
+
geometry or read_geometry is False), a tuple of ndarrays for each field
|
|
169
|
+
in the data layer.
|
|
170
|
+
|
|
171
|
+
Meta is: {
|
|
172
|
+
"crs": "<crs>",
|
|
173
|
+
"fields": <ndarray of field names>,
|
|
174
|
+
"dtypes": <ndarray of numpy dtypes corresponding to fields>,
|
|
175
|
+
"ogr_types": <ndarray of OGR types corresponding to fields>,
|
|
176
|
+
"ogr_subtypes": <ndarray of OGR subtypes corresponding to fields>,
|
|
177
|
+
"encoding": "<encoding>",
|
|
178
|
+
"geometry_type": "<geometry type>",
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
.. _OGRSQL:
|
|
182
|
+
|
|
183
|
+
https://gdal.org/user/ogr_sql_dialect.html#ogr-sql-dialect
|
|
184
|
+
|
|
185
|
+
.. _OGRSQL WHERE:
|
|
186
|
+
|
|
187
|
+
https://gdal.org/user/ogr_sql_dialect.html#where
|
|
188
|
+
|
|
189
|
+
.. _SQLITE:
|
|
190
|
+
|
|
191
|
+
https://gdal.org/user/sql_sqlite_dialect.html#sql-sqlite-dialect
|
|
192
|
+
|
|
193
|
+
.. _spatialite:
|
|
194
|
+
|
|
195
|
+
https://www.gaia-gis.it/gaia-sins/spatialite-sql-latest.html
|
|
196
|
+
|
|
197
|
+
"""
|
|
198
|
+
dataset_kwargs = _preprocess_options_key_value(kwargs) if kwargs else {}
|
|
199
|
+
|
|
200
|
+
return ogr_read(
|
|
201
|
+
get_vsi_path_or_buffer(path_or_buffer),
|
|
202
|
+
layer=layer,
|
|
203
|
+
encoding=encoding,
|
|
204
|
+
columns=columns,
|
|
205
|
+
read_geometry=read_geometry,
|
|
206
|
+
force_2d=force_2d,
|
|
207
|
+
skip_features=skip_features,
|
|
208
|
+
max_features=max_features or 0,
|
|
209
|
+
where=where,
|
|
210
|
+
bbox=bbox,
|
|
211
|
+
mask=_mask_to_wkb(mask),
|
|
212
|
+
fids=fids,
|
|
213
|
+
sql=sql,
|
|
214
|
+
sql_dialect=sql_dialect,
|
|
215
|
+
return_fids=return_fids,
|
|
216
|
+
dataset_kwargs=dataset_kwargs,
|
|
217
|
+
datetime_as_string=datetime_as_string,
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def read_arrow(
|
|
222
|
+
path_or_buffer,
|
|
223
|
+
/,
|
|
224
|
+
layer=None,
|
|
225
|
+
encoding=None,
|
|
226
|
+
columns=None,
|
|
227
|
+
read_geometry=True,
|
|
228
|
+
force_2d=False,
|
|
229
|
+
skip_features=0,
|
|
230
|
+
max_features=None,
|
|
231
|
+
where=None,
|
|
232
|
+
bbox=None,
|
|
233
|
+
mask=None,
|
|
234
|
+
fids=None,
|
|
235
|
+
sql=None,
|
|
236
|
+
sql_dialect=None,
|
|
237
|
+
return_fids=False,
|
|
238
|
+
datetime_as_string=False,
|
|
239
|
+
**kwargs,
|
|
240
|
+
):
|
|
241
|
+
"""Read OGR data source into a pyarrow Table.
|
|
242
|
+
|
|
243
|
+
See docstring of `read` for parameters.
|
|
244
|
+
|
|
245
|
+
Returns
|
|
246
|
+
-------
|
|
247
|
+
(dict, pyarrow.Table)
|
|
248
|
+
|
|
249
|
+
Returns a tuple of meta information about the data source in a dict,
|
|
250
|
+
and a pyarrow Table with data.
|
|
251
|
+
|
|
252
|
+
Meta is: {
|
|
253
|
+
"crs": "<crs>",
|
|
254
|
+
"fields": <ndarray of field names>,
|
|
255
|
+
"dtypes": <ndarray of numpy dtypes corresponding to fields>,
|
|
256
|
+
"ogr_types": <ndarray of OGR types corresponding to fields>,
|
|
257
|
+
"ogr_subtypes": <ndarray of OGR subtypes corresponding to fields>,
|
|
258
|
+
"encoding": "<encoding>",
|
|
259
|
+
"geometry_type": "<geometry_type>",
|
|
260
|
+
"geometry_name": "<name of geometry column in arrow table>",
|
|
261
|
+
"fid_column": "<name of FID column in arrow table>"
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
"""
|
|
265
|
+
if not HAS_PYARROW:
|
|
266
|
+
raise RuntimeError(
|
|
267
|
+
"pyarrow required to read using 'read_arrow'. You can use 'open_arrow' "
|
|
268
|
+
"to read data with an alternative Arrow implementation"
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
from pyarrow import Table
|
|
272
|
+
|
|
273
|
+
gdal_version = get_gdal_version()
|
|
274
|
+
|
|
275
|
+
if skip_features < 0:
|
|
276
|
+
raise ValueError("'skip_features' must be >= 0")
|
|
277
|
+
|
|
278
|
+
if max_features is not None and max_features < 0:
|
|
279
|
+
raise ValueError("'max_features' must be >= 0")
|
|
280
|
+
|
|
281
|
+
# limit batch size to max_features if set
|
|
282
|
+
if "batch_size" in kwargs:
|
|
283
|
+
batch_size = kwargs.pop("batch_size")
|
|
284
|
+
else:
|
|
285
|
+
batch_size = 65_536
|
|
286
|
+
|
|
287
|
+
if max_features is not None and max_features < batch_size:
|
|
288
|
+
batch_size = max_features
|
|
289
|
+
|
|
290
|
+
# handle skip_features internally within open_arrow if GDAL >= 3.8.0
|
|
291
|
+
gdal_skip_features = 0
|
|
292
|
+
if gdal_version >= (3, 8, 0):
|
|
293
|
+
gdal_skip_features = skip_features
|
|
294
|
+
skip_features = 0
|
|
295
|
+
|
|
296
|
+
with open_arrow(
|
|
297
|
+
path_or_buffer,
|
|
298
|
+
layer=layer,
|
|
299
|
+
encoding=encoding,
|
|
300
|
+
columns=columns,
|
|
301
|
+
read_geometry=read_geometry,
|
|
302
|
+
force_2d=force_2d,
|
|
303
|
+
where=where,
|
|
304
|
+
bbox=bbox,
|
|
305
|
+
mask=mask,
|
|
306
|
+
fids=fids,
|
|
307
|
+
sql=sql,
|
|
308
|
+
sql_dialect=sql_dialect,
|
|
309
|
+
return_fids=return_fids,
|
|
310
|
+
skip_features=gdal_skip_features,
|
|
311
|
+
batch_size=batch_size,
|
|
312
|
+
use_pyarrow=True,
|
|
313
|
+
datetime_as_string=datetime_as_string,
|
|
314
|
+
**kwargs,
|
|
315
|
+
) as source:
|
|
316
|
+
meta, reader = source
|
|
317
|
+
|
|
318
|
+
if max_features is not None:
|
|
319
|
+
batches = []
|
|
320
|
+
count = 0
|
|
321
|
+
while True:
|
|
322
|
+
try:
|
|
323
|
+
batch = reader.read_next_batch()
|
|
324
|
+
batches.append(batch)
|
|
325
|
+
|
|
326
|
+
count += len(batch)
|
|
327
|
+
if count >= (skip_features + max_features):
|
|
328
|
+
break
|
|
329
|
+
|
|
330
|
+
except StopIteration:
|
|
331
|
+
break
|
|
332
|
+
|
|
333
|
+
# use combine_chunks to release the original memory that included
|
|
334
|
+
# too many features
|
|
335
|
+
table = (
|
|
336
|
+
Table.from_batches(batches, schema=reader.schema)
|
|
337
|
+
.slice(skip_features, max_features)
|
|
338
|
+
.combine_chunks()
|
|
339
|
+
)
|
|
340
|
+
|
|
341
|
+
elif skip_features > 0:
|
|
342
|
+
table = reader.read_all().slice(skip_features).combine_chunks()
|
|
343
|
+
|
|
344
|
+
else:
|
|
345
|
+
table = reader.read_all()
|
|
346
|
+
|
|
347
|
+
return meta, table
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
def open_arrow(
|
|
351
|
+
path_or_buffer,
|
|
352
|
+
/,
|
|
353
|
+
layer=None,
|
|
354
|
+
encoding=None,
|
|
355
|
+
columns=None,
|
|
356
|
+
read_geometry=True,
|
|
357
|
+
force_2d=False,
|
|
358
|
+
skip_features=0,
|
|
359
|
+
max_features=None,
|
|
360
|
+
where=None,
|
|
361
|
+
bbox=None,
|
|
362
|
+
mask=None,
|
|
363
|
+
fids=None,
|
|
364
|
+
sql=None,
|
|
365
|
+
sql_dialect=None,
|
|
366
|
+
return_fids=False,
|
|
367
|
+
batch_size=65_536,
|
|
368
|
+
use_pyarrow=False,
|
|
369
|
+
datetime_as_string=False,
|
|
370
|
+
**kwargs,
|
|
371
|
+
):
|
|
372
|
+
"""Open OGR data source as a stream of Arrow record batches.
|
|
373
|
+
|
|
374
|
+
See docstring of `read` for parameters.
|
|
375
|
+
|
|
376
|
+
The returned object is reading from a stream provided by OGR and must not be
|
|
377
|
+
accessed after the OGR dataset has been closed, i.e. after the context manager has
|
|
378
|
+
been closed.
|
|
379
|
+
|
|
380
|
+
By default this functions returns a generic stream object implementing
|
|
381
|
+
the `Arrow PyCapsule Protocol`_ (i.e. having an ``__arrow_c_stream__``
|
|
382
|
+
method). This object can then be consumed by your Arrow implementation
|
|
383
|
+
of choice that supports this protocol.
|
|
384
|
+
Optionally, you can specify ``use_pyarrow=True`` to directly get the
|
|
385
|
+
stream as a `pyarrow.RecordBatchReader`.
|
|
386
|
+
|
|
387
|
+
.. _Arrow PyCapsule Protocol: https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
|
|
388
|
+
|
|
389
|
+
Other Parameters
|
|
390
|
+
----------------
|
|
391
|
+
batch_size : int (default: 65_536)
|
|
392
|
+
Maximum number of features to retrieve in a batch.
|
|
393
|
+
use_pyarrow : bool (default: False)
|
|
394
|
+
If True, return a pyarrow RecordBatchReader instead of a generic
|
|
395
|
+
ArrowStream object. In the default case, this stream object needs
|
|
396
|
+
to be passed to another library supporting the Arrow PyCapsule
|
|
397
|
+
Protocol to consume the stream of data.
|
|
398
|
+
datetime_as_string : bool, optional (default: False)
|
|
399
|
+
If True, will return datetime dtypes as detected by GDAL as strings,
|
|
400
|
+
as Arrow doesn't support e.g. mixed time zones.
|
|
401
|
+
|
|
402
|
+
Examples
|
|
403
|
+
--------
|
|
404
|
+
>>> from pyogrio.raw import open_arrow
|
|
405
|
+
>>> import pyarrow as pa
|
|
406
|
+
>>> import shapely
|
|
407
|
+
>>>
|
|
408
|
+
>>> with open_arrow(path) as source:
|
|
409
|
+
>>> meta, stream = source
|
|
410
|
+
>>> # wrap the arrow stream object in a pyarrow RecordBatchReader
|
|
411
|
+
>>> reader = pa.RecordBatchReader.from_stream(stream)
|
|
412
|
+
>>> geom_col = meta["geometry_name"] or "wkb_geometry"
|
|
413
|
+
>>> for batch in reader:
|
|
414
|
+
>>> geometries = shapely.from_wkb(batch[geom_col])
|
|
415
|
+
|
|
416
|
+
The returned `stream` object needs to be consumed by a library implementing
|
|
417
|
+
the Arrow PyCapsule Protocol. In the above example, pyarrow is used through
|
|
418
|
+
its RecordBatchReader. For this case, you can also specify ``use_pyarrow=True``
|
|
419
|
+
to directly get this result as a short-cut:
|
|
420
|
+
|
|
421
|
+
>>> with open_arrow(path, use_pyarrow=True) as source:
|
|
422
|
+
>>> meta, reader = source
|
|
423
|
+
>>> geom_col = meta["geometry_name"] or "wkb_geometry"
|
|
424
|
+
>>> for batch in reader:
|
|
425
|
+
>>> geometries = shapely.from_wkb(batch[geom_col])
|
|
426
|
+
|
|
427
|
+
Returns
|
|
428
|
+
-------
|
|
429
|
+
(dict, pyarrow.RecordBatchReader or ArrowStream)
|
|
430
|
+
|
|
431
|
+
Returns a tuple of meta information about the data source in a dict,
|
|
432
|
+
and a data stream object (a generic ArrowStream object, or a pyarrow
|
|
433
|
+
RecordBatchReader if `use_pyarrow` is set to True).
|
|
434
|
+
|
|
435
|
+
Meta is: {
|
|
436
|
+
"crs": "<crs>",
|
|
437
|
+
"fields": <ndarray of field names>,
|
|
438
|
+
"dtypes": <ndarray of numpy dtypes corresponding to fields>,
|
|
439
|
+
"ogr_types": <ndarray of OGR types corresponding to fields>,
|
|
440
|
+
"ogr_subtypes": <ndarray of OGR subtypes corresponding to fields>,
|
|
441
|
+
"encoding": "<encoding>",
|
|
442
|
+
"geometry_type": "<geometry_type>",
|
|
443
|
+
"geometry_name": "<name of geometry column in arrow table>",
|
|
444
|
+
"fid_column": "<name of FID column in arrow table>"
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
"""
|
|
448
|
+
dataset_kwargs = _preprocess_options_key_value(kwargs) if kwargs else {}
|
|
449
|
+
|
|
450
|
+
return ogr_open_arrow(
|
|
451
|
+
get_vsi_path_or_buffer(path_or_buffer),
|
|
452
|
+
layer=layer,
|
|
453
|
+
encoding=encoding,
|
|
454
|
+
columns=columns,
|
|
455
|
+
read_geometry=read_geometry,
|
|
456
|
+
force_2d=force_2d,
|
|
457
|
+
skip_features=skip_features,
|
|
458
|
+
max_features=max_features or 0,
|
|
459
|
+
where=where,
|
|
460
|
+
bbox=bbox,
|
|
461
|
+
mask=_mask_to_wkb(mask),
|
|
462
|
+
fids=fids,
|
|
463
|
+
sql=sql,
|
|
464
|
+
sql_dialect=sql_dialect,
|
|
465
|
+
return_fids=return_fids,
|
|
466
|
+
dataset_kwargs=dataset_kwargs,
|
|
467
|
+
batch_size=batch_size,
|
|
468
|
+
use_pyarrow=use_pyarrow,
|
|
469
|
+
datetime_as_string=datetime_as_string,
|
|
470
|
+
)
|
|
471
|
+
|
|
472
|
+
|
|
473
|
+
def _parse_options_names(xml):
|
|
474
|
+
"""Convert metadata xml to list of names."""
|
|
475
|
+
# Based on Fiona's meta.py
|
|
476
|
+
# (https://github.com/Toblerity/Fiona/blob/91c13ad8424641557a4e5f038f255f9b657b1bc5/fiona/meta.py)
|
|
477
|
+
import xml.etree.ElementTree as ET
|
|
478
|
+
|
|
479
|
+
options = []
|
|
480
|
+
if xml:
|
|
481
|
+
root = ET.fromstring(xml)
|
|
482
|
+
for option in root.iter("Option"):
|
|
483
|
+
# some options explicitly have scope='raster'
|
|
484
|
+
if option.attrib.get("scope", "vector") != "raster":
|
|
485
|
+
options.append(option.attrib["name"])
|
|
486
|
+
|
|
487
|
+
return options
|
|
488
|
+
|
|
489
|
+
|
|
490
|
+
def _validate_metadata(dataset_metadata, layer_metadata, metadata):
|
|
491
|
+
"""Validate the metadata."""
|
|
492
|
+
if metadata is not None:
|
|
493
|
+
if layer_metadata is not None:
|
|
494
|
+
raise ValueError("Cannot pass both metadata and layer_metadata")
|
|
495
|
+
layer_metadata = metadata
|
|
496
|
+
|
|
497
|
+
# validate metadata types
|
|
498
|
+
for meta in [dataset_metadata, layer_metadata]:
|
|
499
|
+
if meta is not None:
|
|
500
|
+
for k, v in meta.items():
|
|
501
|
+
if not isinstance(k, str):
|
|
502
|
+
raise ValueError(f"metadata key {k} must be a string")
|
|
503
|
+
|
|
504
|
+
if not isinstance(v, str):
|
|
505
|
+
raise ValueError(f"metadata value {v} must be a string")
|
|
506
|
+
|
|
507
|
+
return dataset_metadata, layer_metadata
|
|
508
|
+
|
|
509
|
+
|
|
510
|
+
def _preprocess_options_kwargs(driver, dataset_options, layer_options, kwargs):
|
|
511
|
+
"""Preprocess kwargs and split in dataset and layer creation options."""
|
|
512
|
+
dataset_kwargs = _preprocess_options_key_value(dataset_options or {})
|
|
513
|
+
layer_kwargs = _preprocess_options_key_value(layer_options or {})
|
|
514
|
+
if kwargs:
|
|
515
|
+
kwargs = _preprocess_options_key_value(kwargs)
|
|
516
|
+
dataset_option_names = _parse_options_names(
|
|
517
|
+
_get_driver_metadata_item(driver, "DMD_CREATIONOPTIONLIST")
|
|
518
|
+
)
|
|
519
|
+
layer_option_names = _parse_options_names(
|
|
520
|
+
_get_driver_metadata_item(driver, "DS_LAYER_CREATIONOPTIONLIST")
|
|
521
|
+
)
|
|
522
|
+
for k, v in kwargs.items():
|
|
523
|
+
if k in dataset_option_names:
|
|
524
|
+
dataset_kwargs[k] = v
|
|
525
|
+
elif k in layer_option_names:
|
|
526
|
+
layer_kwargs[k] = v
|
|
527
|
+
else:
|
|
528
|
+
raise ValueError(f"unrecognized option '{k}' for driver '{driver}'")
|
|
529
|
+
|
|
530
|
+
return dataset_kwargs, layer_kwargs
|
|
531
|
+
|
|
532
|
+
|
|
533
|
+
def _get_write_path_driver(path, driver, append=False):
|
|
534
|
+
"""Validate and return path and driver.
|
|
535
|
+
|
|
536
|
+
Parameters
|
|
537
|
+
----------
|
|
538
|
+
path : str or io.BytesIO
|
|
539
|
+
path to output file on writeable file system or an io.BytesIO object to
|
|
540
|
+
allow writing to memory. Will raise NotImplementedError if an open file
|
|
541
|
+
handle is passed.
|
|
542
|
+
driver : str, optional (default: None)
|
|
543
|
+
The OGR format driver used to write the vector file. By default attempts
|
|
544
|
+
to infer driver from path. Must be provided to write to a file-like
|
|
545
|
+
object.
|
|
546
|
+
append : bool, optional (default: False)
|
|
547
|
+
True if path and driver is being tested for append support
|
|
548
|
+
|
|
549
|
+
Returns
|
|
550
|
+
-------
|
|
551
|
+
(path, driver)
|
|
552
|
+
|
|
553
|
+
"""
|
|
554
|
+
if isinstance(path, BytesIO):
|
|
555
|
+
if driver is None:
|
|
556
|
+
raise ValueError("driver must be provided to write to in-memory file")
|
|
557
|
+
|
|
558
|
+
# blacklist certain drivers known not to work in current memory implementation
|
|
559
|
+
# because they create multiple files
|
|
560
|
+
if driver in {"ESRI Shapefile", "OpenFileGDB"}:
|
|
561
|
+
raise ValueError(f"writing to in-memory file is not supported for {driver}")
|
|
562
|
+
|
|
563
|
+
# verify that driver supports VSI methods
|
|
564
|
+
if not ogr_driver_supports_vsi(driver):
|
|
565
|
+
raise DataSourceError(
|
|
566
|
+
f"{driver} does not support ability to write in-memory in GDAL "
|
|
567
|
+
f"{get_gdal_version_string()}"
|
|
568
|
+
)
|
|
569
|
+
|
|
570
|
+
if append:
|
|
571
|
+
raise NotImplementedError("append is not supported for in-memory files")
|
|
572
|
+
|
|
573
|
+
elif hasattr(path, "write") and not isinstance(path, Path):
|
|
574
|
+
raise NotImplementedError(
|
|
575
|
+
"writing to an open file handle is not yet supported; instead, write to a "
|
|
576
|
+
"BytesIO instance and then read bytes from that to write to the file handle"
|
|
577
|
+
)
|
|
578
|
+
|
|
579
|
+
else:
|
|
580
|
+
path = vsi_path(path)
|
|
581
|
+
|
|
582
|
+
if driver is None:
|
|
583
|
+
driver = detect_write_driver(path)
|
|
584
|
+
|
|
585
|
+
# verify that driver supports writing
|
|
586
|
+
if not ogr_driver_supports_write(driver):
|
|
587
|
+
raise DataSourceError(
|
|
588
|
+
f"{driver} does not support write functionality in GDAL "
|
|
589
|
+
f"{get_gdal_version_string()}"
|
|
590
|
+
)
|
|
591
|
+
|
|
592
|
+
return path, driver
|
|
593
|
+
|
|
594
|
+
|
|
595
|
+
def write(
|
|
596
|
+
path,
|
|
597
|
+
geometry,
|
|
598
|
+
field_data,
|
|
599
|
+
fields,
|
|
600
|
+
field_mask=None,
|
|
601
|
+
layer=None,
|
|
602
|
+
driver=None,
|
|
603
|
+
# derived from meta if roundtrip
|
|
604
|
+
geometry_type=None,
|
|
605
|
+
crs=None,
|
|
606
|
+
encoding=None,
|
|
607
|
+
promote_to_multi=None,
|
|
608
|
+
nan_as_null=True,
|
|
609
|
+
append=False,
|
|
610
|
+
dataset_metadata=None,
|
|
611
|
+
layer_metadata=None,
|
|
612
|
+
metadata=None,
|
|
613
|
+
dataset_options=None,
|
|
614
|
+
layer_options=None,
|
|
615
|
+
gdal_tz_offsets=None,
|
|
616
|
+
**kwargs,
|
|
617
|
+
):
|
|
618
|
+
"""Write geometry and field data to an OGR file format.
|
|
619
|
+
|
|
620
|
+
Parameters
|
|
621
|
+
----------
|
|
622
|
+
path : str or io.BytesIO
|
|
623
|
+
path to output file on writeable file system or an io.BytesIO object to
|
|
624
|
+
allow writing to memory. Will raise NotImplementedError if an open file
|
|
625
|
+
handle is passed; use BytesIO instead.
|
|
626
|
+
NOTE: support for writing to memory is limited to specific drivers.
|
|
627
|
+
geometry : ndarray of WKB encoded geometries or None
|
|
628
|
+
If None, geometries will not be written to output file
|
|
629
|
+
field_data : list-like of shape (num_fields, num_records)
|
|
630
|
+
contains one record per field to be written in same order as fields
|
|
631
|
+
fields : list-like
|
|
632
|
+
contains field names
|
|
633
|
+
field_mask : list-like of ndarrays or None, optional (default: None)
|
|
634
|
+
contains mask arrays indicating null values of the field at the same
|
|
635
|
+
position in the outer list, or None to indicate field does not have
|
|
636
|
+
a mask array
|
|
637
|
+
layer : str, optional (default: None)
|
|
638
|
+
layer name to create. If writing to memory and layer name is not
|
|
639
|
+
provided, it layer name will be set to a UUID4 value.
|
|
640
|
+
driver : string, optional (default: None)
|
|
641
|
+
The OGR format driver used to write the vector file. By default attempts
|
|
642
|
+
to infer driver from path. Must be provided to write to memory.
|
|
643
|
+
geometry_type : str, optional (default: None)
|
|
644
|
+
Possible values are: "Unknown", "Point", "LineString", "Polygon",
|
|
645
|
+
"MultiPoint", "MultiLineString", "MultiPolygon" or "GeometryCollection".
|
|
646
|
+
|
|
647
|
+
This parameter does not modify the geometry, but it will try to force
|
|
648
|
+
the layer type of the output file to this value. Use this parameter with
|
|
649
|
+
caution because using a wrong layer geometry type may result in errors
|
|
650
|
+
when writing the file, may be ignored by the driver, or may result in
|
|
651
|
+
invalid files.
|
|
652
|
+
crs : str, optional (default: None)
|
|
653
|
+
WKT-encoded CRS of the geometries to be written.
|
|
654
|
+
encoding : str, optional (default: None)
|
|
655
|
+
If present, will be used as the encoding for writing string values to
|
|
656
|
+
the file. Use with caution, only certain drivers support encodings
|
|
657
|
+
other than UTF-8.
|
|
658
|
+
promote_to_multi : bool, optional (default: None)
|
|
659
|
+
If True, will convert singular geometry types in the data to their
|
|
660
|
+
corresponding multi geometry type for writing. By default, will convert
|
|
661
|
+
mixed singular and multi geometry types to multi geometry types for
|
|
662
|
+
drivers that do not support mixed singular and multi geometry types. If
|
|
663
|
+
False, geometry types will not be promoted, which may result in errors
|
|
664
|
+
or invalid files when attempting to write mixed singular and multi
|
|
665
|
+
geometry types to drivers that do not support such combinations.
|
|
666
|
+
nan_as_null : bool, default True
|
|
667
|
+
For floating point columns (float32 / float64), whether NaN values are
|
|
668
|
+
written as "null" (missing value). Defaults to True because in pandas
|
|
669
|
+
NaNs are typically used as missing value. Note that when set to False,
|
|
670
|
+
behaviour is format specific: some formats don't support NaNs by
|
|
671
|
+
default (e.g. GeoJSON will skip this property) or might treat them as
|
|
672
|
+
null anyway (e.g. GeoPackage).
|
|
673
|
+
append : bool, optional (default: False)
|
|
674
|
+
If True, the data source specified by path already exists, and the
|
|
675
|
+
driver supports appending to an existing data source, will cause the
|
|
676
|
+
data to be appended to the existing records in the data source. Not
|
|
677
|
+
supported for writing to in-memory files.
|
|
678
|
+
NOTE: append support is limited to specific drivers and GDAL versions.
|
|
679
|
+
dataset_metadata : dict, optional (default: None)
|
|
680
|
+
Metadata to be stored at the dataset level in the output file; limited
|
|
681
|
+
to drivers that support writing metadata, such as GPKG, and silently
|
|
682
|
+
ignored otherwise. Keys and values must be strings.
|
|
683
|
+
layer_metadata : dict, optional (default: None)
|
|
684
|
+
Metadata to be stored at the layer level in the output file; limited to
|
|
685
|
+
drivers that support writing metadata, such as GPKG, and silently
|
|
686
|
+
ignored otherwise. Keys and values must be strings.
|
|
687
|
+
metadata : dict, optional (default: None)
|
|
688
|
+
alias of layer_metadata
|
|
689
|
+
dataset_options : dict, optional
|
|
690
|
+
Dataset creation options (format specific) passed to OGR. Specify as
|
|
691
|
+
a key-value dictionary.
|
|
692
|
+
layer_options : dict, optional
|
|
693
|
+
Layer creation options (format specific) passed to OGR. Specify as
|
|
694
|
+
a key-value dictionary.
|
|
695
|
+
gdal_tz_offsets : dict, optional (default: None)
|
|
696
|
+
Used to handle GDAL time zone offsets for each field contained in dict.
|
|
697
|
+
**kwargs
|
|
698
|
+
Additional driver-specific dataset creation options passed to OGR. Invalid
|
|
699
|
+
options will trigger a warning.
|
|
700
|
+
|
|
701
|
+
"""
|
|
702
|
+
# remove some unneeded kwargs (e.g. dtypes is included in meta returned by
|
|
703
|
+
# read, and it is convenient to pass meta directly into write for round trip tests)
|
|
704
|
+
kwargs.pop("dtypes", None)
|
|
705
|
+
kwargs.pop("ogr_types", None)
|
|
706
|
+
kwargs.pop("ogr_subtypes", None)
|
|
707
|
+
|
|
708
|
+
path, driver = _get_write_path_driver(path, driver, append=append)
|
|
709
|
+
|
|
710
|
+
dataset_metadata, layer_metadata = _validate_metadata(
|
|
711
|
+
dataset_metadata, layer_metadata, metadata
|
|
712
|
+
)
|
|
713
|
+
|
|
714
|
+
if geometry is not None and promote_to_multi is None:
|
|
715
|
+
promote_to_multi = (
|
|
716
|
+
geometry_type.startswith("Multi")
|
|
717
|
+
and driver in DRIVERS_NO_MIXED_SINGLE_MULTI
|
|
718
|
+
)
|
|
719
|
+
|
|
720
|
+
if geometry is not None and crs is None:
|
|
721
|
+
warnings.warn(
|
|
722
|
+
"'crs' was not provided. The output dataset will not have "
|
|
723
|
+
"projection information defined and may not be usable in other "
|
|
724
|
+
"systems.",
|
|
725
|
+
stacklevel=2,
|
|
726
|
+
)
|
|
727
|
+
|
|
728
|
+
# preprocess kwargs and split in dataset and layer creation options
|
|
729
|
+
dataset_kwargs, layer_kwargs = _preprocess_options_kwargs(
|
|
730
|
+
driver, dataset_options, layer_options, kwargs
|
|
731
|
+
)
|
|
732
|
+
|
|
733
|
+
ogr_write(
|
|
734
|
+
path,
|
|
735
|
+
layer=layer,
|
|
736
|
+
driver=driver,
|
|
737
|
+
geometry=geometry,
|
|
738
|
+
geometry_type=geometry_type,
|
|
739
|
+
field_data=field_data,
|
|
740
|
+
field_mask=field_mask,
|
|
741
|
+
fields=fields,
|
|
742
|
+
crs=crs,
|
|
743
|
+
encoding=encoding,
|
|
744
|
+
promote_to_multi=promote_to_multi,
|
|
745
|
+
nan_as_null=nan_as_null,
|
|
746
|
+
append=append,
|
|
747
|
+
dataset_metadata=dataset_metadata,
|
|
748
|
+
layer_metadata=layer_metadata,
|
|
749
|
+
dataset_kwargs=dataset_kwargs,
|
|
750
|
+
layer_kwargs=layer_kwargs,
|
|
751
|
+
gdal_tz_offsets=gdal_tz_offsets,
|
|
752
|
+
)
|
|
753
|
+
|
|
754
|
+
|
|
755
|
+
def write_arrow(
|
|
756
|
+
arrow_obj,
|
|
757
|
+
path,
|
|
758
|
+
layer=None,
|
|
759
|
+
driver=None,
|
|
760
|
+
geometry_name=None,
|
|
761
|
+
geometry_type=None,
|
|
762
|
+
crs=None,
|
|
763
|
+
encoding=None,
|
|
764
|
+
append=False,
|
|
765
|
+
dataset_metadata=None,
|
|
766
|
+
layer_metadata=None,
|
|
767
|
+
metadata=None,
|
|
768
|
+
dataset_options=None,
|
|
769
|
+
layer_options=None,
|
|
770
|
+
**kwargs,
|
|
771
|
+
):
|
|
772
|
+
"""Write an Arrow-compatible data source to an OGR file format.
|
|
773
|
+
|
|
774
|
+
.. _Arrow PyCapsule Protocol: https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
|
|
775
|
+
|
|
776
|
+
Parameters
|
|
777
|
+
----------
|
|
778
|
+
arrow_obj
|
|
779
|
+
The Arrow data to write. This can be any Arrow-compatible tabular data
|
|
780
|
+
object that implements the `Arrow PyCapsule Protocol`_ (i.e. has an
|
|
781
|
+
``__arrow_c_stream__`` method), for example a pyarrow Table or
|
|
782
|
+
RecordBatchReader.
|
|
783
|
+
path : str or io.BytesIO
|
|
784
|
+
path to output file on writeable file system or an io.BytesIO object to
|
|
785
|
+
allow writing to memory
|
|
786
|
+
NOTE: support for writing to memory is limited to specific drivers.
|
|
787
|
+
layer : str, optional (default: None)
|
|
788
|
+
layer name to create. If writing to memory and layer name is not
|
|
789
|
+
provided, it layer name will be set to a UUID4 value.
|
|
790
|
+
driver : string, optional (default: None)
|
|
791
|
+
The OGR format driver used to write the vector file. By default attempts
|
|
792
|
+
to infer driver from path. Must be provided to write to memory.
|
|
793
|
+
geometry_name : str, optional (default: None)
|
|
794
|
+
The name of the column in the input data that will be written as the
|
|
795
|
+
geometry field. Will be inferred from the input data if the geometry
|
|
796
|
+
column is annotated as an "geoarrow.wkb" or "ogc.wkb" extension type.
|
|
797
|
+
Otherwise needs to be specified explicitly.
|
|
798
|
+
geometry_type : str
|
|
799
|
+
The geometry type of the written layer. Currently, this needs to be
|
|
800
|
+
specified explicitly when creating a new layer with geometries.
|
|
801
|
+
Possible values are: "Unknown", "Point", "LineString", "Polygon",
|
|
802
|
+
"MultiPoint", "MultiLineString", "MultiPolygon" or "GeometryCollection".
|
|
803
|
+
|
|
804
|
+
This parameter does not modify the geometry, but it will try to force the layer
|
|
805
|
+
type of the output file to this value. Use this parameter with caution because
|
|
806
|
+
using a wrong layer geometry type may result in errors when writing the
|
|
807
|
+
file, may be ignored by the driver, or may result in invalid files.
|
|
808
|
+
crs : str, optional (default: None)
|
|
809
|
+
WKT-encoded CRS of the geometries to be written.
|
|
810
|
+
encoding : str, optional (default: None)
|
|
811
|
+
Only used for the .dbf file of ESRI Shapefiles. If not specified,
|
|
812
|
+
uses the default locale.
|
|
813
|
+
append : bool, optional (default: False)
|
|
814
|
+
If True, the data source specified by path already exists, and the
|
|
815
|
+
driver supports appending to an existing data source, will cause the
|
|
816
|
+
data to be appended to the existing records in the data source. Not
|
|
817
|
+
supported for writing to in-memory files.
|
|
818
|
+
NOTE: append support is limited to specific drivers and GDAL versions.
|
|
819
|
+
dataset_metadata : dict, optional (default: None)
|
|
820
|
+
Metadata to be stored at the dataset level in the output file; limited
|
|
821
|
+
to drivers that support writing metadata, such as GPKG, and silently
|
|
822
|
+
ignored otherwise. Keys and values must be strings.
|
|
823
|
+
layer_metadata : dict, optional (default: None)
|
|
824
|
+
Metadata to be stored at the layer level in the output file; limited to
|
|
825
|
+
drivers that support writing metadata, such as GPKG, and silently
|
|
826
|
+
ignored otherwise. Keys and values must be strings.
|
|
827
|
+
metadata : dict, optional (default: None)
|
|
828
|
+
alias of layer_metadata
|
|
829
|
+
dataset_options : dict, optional
|
|
830
|
+
Dataset creation options (format specific) passed to OGR. Specify as
|
|
831
|
+
a key-value dictionary.
|
|
832
|
+
layer_options : dict, optional
|
|
833
|
+
Layer creation options (format specific) passed to OGR. Specify as
|
|
834
|
+
a key-value dictionary.
|
|
835
|
+
**kwargs
|
|
836
|
+
Additional driver-specific dataset or layer creation options passed
|
|
837
|
+
to OGR. pyogrio will attempt to automatically pass those keywords
|
|
838
|
+
either as dataset or as layer creation option based on the known
|
|
839
|
+
options for the specific driver. Alternatively, you can use the
|
|
840
|
+
explicit `dataset_options` or `layer_options` keywords to manually
|
|
841
|
+
do this (for example if an option exists as both dataset and layer
|
|
842
|
+
option).
|
|
843
|
+
|
|
844
|
+
"""
|
|
845
|
+
if not HAS_ARROW_WRITE_API:
|
|
846
|
+
raise RuntimeError("GDAL>=3.8 required to write using arrow")
|
|
847
|
+
|
|
848
|
+
if not hasattr(arrow_obj, "__arrow_c_stream__"):
|
|
849
|
+
raise ValueError(
|
|
850
|
+
"The provided data is not recognized as Arrow data. The object "
|
|
851
|
+
"should implement the Arrow PyCapsule Protocol (i.e. have a "
|
|
852
|
+
"'__arrow_c_stream__' method)."
|
|
853
|
+
)
|
|
854
|
+
|
|
855
|
+
path, driver = _get_write_path_driver(path, driver, append=append)
|
|
856
|
+
|
|
857
|
+
if "promote_to_multi" in kwargs:
|
|
858
|
+
raise ValueError(
|
|
859
|
+
"The 'promote_to_multi' option is not supported when writing using Arrow"
|
|
860
|
+
)
|
|
861
|
+
|
|
862
|
+
if geometry_name is not None:
|
|
863
|
+
if geometry_type is None:
|
|
864
|
+
raise ValueError("'geometry_type' keyword is required")
|
|
865
|
+
if crs is None:
|
|
866
|
+
# TODO: does GDAL infer CRS automatically from geometry metadata?
|
|
867
|
+
warnings.warn(
|
|
868
|
+
"'crs' was not provided. The output dataset will not have "
|
|
869
|
+
"projection information defined and may not be usable in other "
|
|
870
|
+
"systems.",
|
|
871
|
+
stacklevel=2,
|
|
872
|
+
)
|
|
873
|
+
|
|
874
|
+
dataset_metadata, layer_metadata = _validate_metadata(
|
|
875
|
+
dataset_metadata, layer_metadata, metadata
|
|
876
|
+
)
|
|
877
|
+
|
|
878
|
+
# preprocess kwargs and split in dataset and layer creation options
|
|
879
|
+
dataset_kwargs, layer_kwargs = _preprocess_options_kwargs(
|
|
880
|
+
driver, dataset_options, layer_options, kwargs
|
|
881
|
+
)
|
|
882
|
+
|
|
883
|
+
ogr_write_arrow(
|
|
884
|
+
path,
|
|
885
|
+
layer=layer,
|
|
886
|
+
driver=driver,
|
|
887
|
+
arrow_obj=arrow_obj,
|
|
888
|
+
geometry_type=geometry_type,
|
|
889
|
+
geometry_name=geometry_name,
|
|
890
|
+
crs=crs,
|
|
891
|
+
encoding=encoding,
|
|
892
|
+
append=append,
|
|
893
|
+
dataset_metadata=dataset_metadata,
|
|
894
|
+
layer_metadata=layer_metadata,
|
|
895
|
+
dataset_kwargs=dataset_kwargs,
|
|
896
|
+
layer_kwargs=layer_kwargs,
|
|
897
|
+
)
|