pyogrio 0.7.2__cp311-cp311-manylinux_2_28_aarch64.whl → 0.9.0__cp311-cp311-manylinux_2_28_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyogrio might be problematic. Click here for more details.
- pyogrio/__init__.py +4 -0
- pyogrio/_compat.py +6 -1
- pyogrio/_err.cpython-311-aarch64-linux-gnu.so +0 -0
- pyogrio/_err.pyx +7 -3
- pyogrio/_geometry.cpython-311-aarch64-linux-gnu.so +0 -0
- pyogrio/_io.cpython-311-aarch64-linux-gnu.so +0 -0
- pyogrio/_io.pyx +904 -242
- pyogrio/_ogr.cpython-311-aarch64-linux-gnu.so +0 -0
- pyogrio/_ogr.pxd +69 -13
- pyogrio/_ogr.pyx +8 -24
- pyogrio/_version.py +3 -3
- pyogrio/_vsi.cpython-311-aarch64-linux-gnu.so +0 -0
- pyogrio/_vsi.pxd +4 -0
- pyogrio/_vsi.pyx +140 -0
- pyogrio/core.py +43 -44
- pyogrio/gdal_data/GDAL-targets-release.cmake +3 -3
- pyogrio/gdal_data/GDAL-targets.cmake +10 -6
- pyogrio/gdal_data/GDALConfigVersion.cmake +3 -3
- pyogrio/gdal_data/gdalinfo_output.schema.json +2 -0
- pyogrio/gdal_data/gdalvrt.xsd +163 -0
- pyogrio/gdal_data/ogrinfo_output.schema.json +12 -1
- pyogrio/gdal_data/vcpkg.spdx.json +26 -26
- pyogrio/gdal_data/vcpkg_abi_info.txt +27 -26
- pyogrio/geopandas.py +140 -34
- pyogrio/proj_data/ITRF2008 +2 -2
- pyogrio/proj_data/proj-config-version.cmake +2 -2
- pyogrio/proj_data/proj-config.cmake +2 -1
- pyogrio/proj_data/proj-targets.cmake +13 -13
- pyogrio/proj_data/proj.db +0 -0
- pyogrio/proj_data/proj4-targets.cmake +13 -13
- pyogrio/proj_data/vcpkg.spdx.json +20 -42
- pyogrio/proj_data/vcpkg_abi_info.txt +14 -15
- pyogrio/raw.py +438 -116
- pyogrio/tests/conftest.py +75 -6
- pyogrio/tests/fixtures/poly_not_enough_points.shp.zip +0 -0
- pyogrio/tests/test_arrow.py +841 -7
- pyogrio/tests/test_core.py +99 -7
- pyogrio/tests/test_geopandas_io.py +827 -121
- pyogrio/tests/test_path.py +23 -3
- pyogrio/tests/test_raw_io.py +276 -50
- pyogrio/util.py +39 -19
- {pyogrio-0.7.2.dist-info → pyogrio-0.9.0.dist-info}/METADATA +2 -2
- {pyogrio-0.7.2.dist-info → pyogrio-0.9.0.dist-info}/RECORD +210 -207
- {pyogrio-0.7.2.dist-info → pyogrio-0.9.0.dist-info}/WHEEL +1 -1
- pyogrio.libs/{libgdal-cb554135.so.33.3.7.2 → libgdal-6ff0914e.so.34.3.8.5} +0 -0
- pyogrio/tests/win32.py +0 -86
- {pyogrio-0.7.2.dist-info → pyogrio-0.9.0.dist-info}/LICENSE +0 -0
- {pyogrio-0.7.2.dist-info → pyogrio-0.9.0.dist-info}/top_level.txt +0 -0
pyogrio/_io.pyx
CHANGED
|
@@ -10,24 +10,27 @@ import locale
|
|
|
10
10
|
import logging
|
|
11
11
|
import math
|
|
12
12
|
import os
|
|
13
|
+
import sys
|
|
13
14
|
import warnings
|
|
14
15
|
|
|
15
16
|
from libc.stdint cimport uint8_t, uintptr_t
|
|
16
17
|
from libc.stdlib cimport malloc, free
|
|
17
18
|
from libc.string cimport strlen
|
|
18
19
|
from libc.math cimport isnan
|
|
20
|
+
from cpython.pycapsule cimport PyCapsule_GetPointer
|
|
19
21
|
|
|
20
22
|
cimport cython
|
|
23
|
+
from cpython.pycapsule cimport PyCapsule_New, PyCapsule_GetPointer
|
|
24
|
+
|
|
21
25
|
import numpy as np
|
|
22
|
-
cimport numpy as np
|
|
23
26
|
|
|
24
27
|
from pyogrio._ogr cimport *
|
|
25
28
|
from pyogrio._err cimport *
|
|
29
|
+
from pyogrio._vsi cimport *
|
|
26
30
|
from pyogrio._err import CPLE_BaseError, CPLE_NotSupportedError, NullPointerError
|
|
27
31
|
from pyogrio._geometry cimport get_geometry_type, get_geometry_type_code
|
|
28
32
|
from pyogrio.errors import CRSError, DataSourceError, DataLayerError, GeometryError, FieldError, FeatureError
|
|
29
33
|
|
|
30
|
-
|
|
31
34
|
log = logging.getLogger(__name__)
|
|
32
35
|
|
|
33
36
|
|
|
@@ -135,7 +138,52 @@ cdef char** dict_to_options(object values):
|
|
|
135
138
|
return options
|
|
136
139
|
|
|
137
140
|
|
|
141
|
+
cdef const char* override_threadlocal_config_option(str key, str value):
|
|
142
|
+
"""Set the CPLSetThreadLocalConfigOption for key=value
|
|
143
|
+
|
|
144
|
+
Parameters
|
|
145
|
+
----------
|
|
146
|
+
key : str
|
|
147
|
+
value : str
|
|
148
|
+
|
|
149
|
+
Returns
|
|
150
|
+
-------
|
|
151
|
+
const char*
|
|
152
|
+
value previously set for key, so that it can be later restored. Caller
|
|
153
|
+
is responsible for freeing this via CPLFree() if not NULL.
|
|
154
|
+
"""
|
|
155
|
+
|
|
156
|
+
key_b = key.encode("UTF-8")
|
|
157
|
+
cdef const char* key_c = key_b
|
|
158
|
+
|
|
159
|
+
value_b = value.encode("UTF-8")
|
|
160
|
+
cdef const char* value_c = value_b
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
cdef const char *prev_value = CPLGetThreadLocalConfigOption(key_c, NULL)
|
|
164
|
+
if prev_value != NULL:
|
|
165
|
+
# strings returned from config options may be replaced via
|
|
166
|
+
# CPLSetConfigOption() below; GDAL instructs us to save a copy
|
|
167
|
+
# in a new string
|
|
168
|
+
prev_value = CPLStrdup(prev_value)
|
|
169
|
+
|
|
170
|
+
CPLSetThreadLocalConfigOption(key_c, value_c)
|
|
171
|
+
|
|
172
|
+
return prev_value
|
|
173
|
+
|
|
174
|
+
|
|
138
175
|
cdef void* ogr_open(const char* path_c, int mode, char** options) except NULL:
|
|
176
|
+
"""Open an existing OGR data source
|
|
177
|
+
|
|
178
|
+
Parameters
|
|
179
|
+
----------
|
|
180
|
+
path_c : char *
|
|
181
|
+
input path, including an in-memory path (/vsimem/...)
|
|
182
|
+
mode : int
|
|
183
|
+
set to 1 to allow updating data source
|
|
184
|
+
options : char **, optional
|
|
185
|
+
dataset open options
|
|
186
|
+
"""
|
|
139
187
|
cdef void* ogr_dataset = NULL
|
|
140
188
|
|
|
141
189
|
# Force linear approximations in all cases
|
|
@@ -163,7 +211,7 @@ cdef void* ogr_open(const char* path_c, int mode, char** options) except NULL:
|
|
|
163
211
|
) from None
|
|
164
212
|
|
|
165
213
|
except CPLE_BaseError as exc:
|
|
166
|
-
if str(exc).endswith("
|
|
214
|
+
if str(exc).endswith("a supported file format."):
|
|
167
215
|
raise DataSourceError(
|
|
168
216
|
f"{str(exc)} It might help to specify the correct driver explicitly by "
|
|
169
217
|
"prefixing the file path with '<DRIVER>:', e.g. 'CSV:path'."
|
|
@@ -171,6 +219,25 @@ cdef void* ogr_open(const char* path_c, int mode, char** options) except NULL:
|
|
|
171
219
|
raise DataSourceError(str(exc)) from None
|
|
172
220
|
|
|
173
221
|
|
|
222
|
+
cdef ogr_close(GDALDatasetH ogr_dataset):
|
|
223
|
+
"""Close the dataset and raise exception if that fails.
|
|
224
|
+
NOTE: some drivers only raise errors on write when calling GDALClose()
|
|
225
|
+
"""
|
|
226
|
+
if ogr_dataset != NULL:
|
|
227
|
+
IF CTE_GDAL_VERSION >= (3, 7, 0):
|
|
228
|
+
if GDALClose(ogr_dataset) != CE_None:
|
|
229
|
+
return exc_check()
|
|
230
|
+
|
|
231
|
+
return
|
|
232
|
+
|
|
233
|
+
ELSE:
|
|
234
|
+
GDALClose(ogr_dataset)
|
|
235
|
+
|
|
236
|
+
# GDAL will set an error if there was an error writing the data source
|
|
237
|
+
# on close
|
|
238
|
+
return exc_check()
|
|
239
|
+
|
|
240
|
+
|
|
174
241
|
cdef OGRLayerH get_ogr_layer(GDALDatasetH ogr_dataset, layer) except NULL:
|
|
175
242
|
"""Open OGR layer by index or name.
|
|
176
243
|
|
|
@@ -462,9 +529,11 @@ cdef get_metadata(GDALMajorObjectH obj):
|
|
|
462
529
|
|
|
463
530
|
|
|
464
531
|
cdef detect_encoding(OGRDataSourceH ogr_dataset, OGRLayerH ogr_layer):
|
|
465
|
-
"""Attempt to detect the encoding
|
|
466
|
-
|
|
467
|
-
If
|
|
532
|
+
"""Attempt to detect the encoding to use to read/write string values.
|
|
533
|
+
|
|
534
|
+
If the layer/dataset supports reading/writing data in UTF-8, returns UTF-8.
|
|
535
|
+
If UTF-8 is not supported and ESRI Shapefile, returns ISO-8859-1
|
|
536
|
+
Otherwise the system locale preferred encoding is returned.
|
|
468
537
|
|
|
469
538
|
Parameters
|
|
470
539
|
----------
|
|
@@ -477,18 +546,53 @@ cdef detect_encoding(OGRDataSourceH ogr_dataset, OGRLayerH ogr_layer):
|
|
|
477
546
|
"""
|
|
478
547
|
|
|
479
548
|
if OGR_L_TestCapability(ogr_layer, OLCStringsAsUTF8):
|
|
480
|
-
|
|
549
|
+
# OGR_L_TestCapability returns True for OLCStringsAsUTF8 if GDAL hides encoding
|
|
550
|
+
# complexities for this layer/driver type. In this case all string attribute
|
|
551
|
+
# values have to be supplied in UTF-8 and values will be returned in UTF-8.
|
|
552
|
+
# The encoding used to read/write under the hood depends on the driver used.
|
|
553
|
+
# For layers/drivers where False is returned, the string values are written and
|
|
554
|
+
# read without recoding. Hence, it is up to you to supply the data in the
|
|
555
|
+
# appropriate encoding. More info:
|
|
556
|
+
# https://gdal.org/development/rfc/rfc23_ogr_unicode.html#oftstring-oftstringlist-fields
|
|
557
|
+
# NOTE: for shapefiles, this always returns False for the layer returned
|
|
558
|
+
# when executing SQL, even when it supports UTF-8 (patched below);
|
|
559
|
+
# this may be fixed by https://github.com/OSGeo/gdal/pull/9649 (GDAL >=3.9.0?)
|
|
560
|
+
return "UTF-8"
|
|
481
561
|
|
|
482
562
|
driver = get_driver(ogr_dataset)
|
|
483
|
-
if driver ==
|
|
484
|
-
|
|
563
|
+
if driver == "ESRI Shapefile":
|
|
564
|
+
# OGR_L_TestCapability returns True for OLCStringsAsUTF8 (above) for
|
|
565
|
+
# shapefiles when a .cpg file is present with a valid encoding, or GDAL
|
|
566
|
+
# auto-detects the encoding from the code page of the .dbf file, or
|
|
567
|
+
# SHAPE_ENCODING config option is set, or ENCODING layer creation option
|
|
568
|
+
# is specified (shapefiles only). Otherwise, we can only assume that
|
|
569
|
+
# shapefiles are in their default encoding of ISO-8859-1 (which may be
|
|
570
|
+
# incorrect and must be overridden by user-provided encoding)
|
|
571
|
+
|
|
572
|
+
# Always use the first layer to test capabilities until detection for
|
|
573
|
+
# SQL results from shapefiles are fixed (above)
|
|
574
|
+
# This block should only be used for unfixed versions of GDAL (<3.9.0?)
|
|
575
|
+
if OGR_L_TestCapability(GDALDatasetGetLayer(ogr_dataset, 0), OLCStringsAsUTF8):
|
|
576
|
+
return "UTF-8"
|
|
577
|
+
|
|
578
|
+
return "ISO-8859-1"
|
|
485
579
|
|
|
486
580
|
if driver == "OSM":
|
|
487
581
|
# always set OSM data to UTF-8
|
|
488
582
|
# per https://help.openstreetmap.org/questions/2172/what-encoding-does-openstreetmap-use
|
|
489
583
|
return "UTF-8"
|
|
490
584
|
|
|
491
|
-
|
|
585
|
+
if driver in ("XLSX", "ODS"):
|
|
586
|
+
# TestCapability for OLCStringsAsUTF8 for XLSX and ODS was False for new files
|
|
587
|
+
# being created for GDAL < 3.8.5. Once these versions of GDAL are no longer
|
|
588
|
+
# supported, this can be removed.
|
|
589
|
+
return "UTF-8"
|
|
590
|
+
|
|
591
|
+
if driver == "GeoJSONSeq":
|
|
592
|
+
# In old gdal versions, OLCStringsAsUTF8 wasn't advertised yet.
|
|
593
|
+
return "UTF-8"
|
|
594
|
+
|
|
595
|
+
return locale.getpreferredencoding()
|
|
492
596
|
|
|
493
597
|
|
|
494
598
|
cdef get_fields(OGRLayerH ogr_layer, str encoding, use_arrow=False):
|
|
@@ -608,7 +712,7 @@ cdef apply_bbox_filter(OGRLayerH ogr_layer, bbox):
|
|
|
608
712
|
Parameters
|
|
609
713
|
----------
|
|
610
714
|
ogr_layer : pointer to open OGR layer
|
|
611
|
-
bbox: list or tuple of xmin, ymin, xmax, ymax
|
|
715
|
+
bbox : list or tuple of xmin, ymin, xmax, ymax
|
|
612
716
|
|
|
613
717
|
Raises
|
|
614
718
|
------
|
|
@@ -629,7 +733,7 @@ cdef apply_geometry_filter(OGRLayerH ogr_layer, wkb):
|
|
|
629
733
|
Parameters
|
|
630
734
|
----------
|
|
631
735
|
ogr_layer : pointer to open OGR layer
|
|
632
|
-
wkb: WKB encoding of geometry
|
|
736
|
+
wkb : WKB encoding of geometry
|
|
633
737
|
"""
|
|
634
738
|
|
|
635
739
|
cdef OGRGeometryH ogr_geometry = NULL
|
|
@@ -783,7 +887,7 @@ cdef process_fields(
|
|
|
783
887
|
data[i] = bin_value[:ret_length]
|
|
784
888
|
|
|
785
889
|
elif field_type == OFTDateTime or field_type == OFTDate:
|
|
786
|
-
|
|
890
|
+
|
|
787
891
|
if datetime_as_string:
|
|
788
892
|
# defer datetime parsing to user/ pandas layer
|
|
789
893
|
# Update to OGR_F_GetFieldAsISO8601DateTime when GDAL 3.7+ only
|
|
@@ -851,7 +955,7 @@ cdef get_features(
|
|
|
851
955
|
|
|
852
956
|
field_data = [
|
|
853
957
|
np.empty(shape=(num_features, ),
|
|
854
|
-
dtype = ("object" if datetime_as_string and
|
|
958
|
+
dtype = ("object" if datetime_as_string and
|
|
855
959
|
fields[field_index,3].startswith("datetime") else fields[field_index,3])
|
|
856
960
|
) for field_index in range(n_fields)
|
|
857
961
|
]
|
|
@@ -950,8 +1054,8 @@ cdef get_features_by_fid(
|
|
|
950
1054
|
field_ogr_types = fields[:,1]
|
|
951
1055
|
field_data = [
|
|
952
1056
|
np.empty(shape=(count, ),
|
|
953
|
-
dtype=("object" if datetime_as_string and fields[field_index,3].startswith("datetime")
|
|
954
|
-
else fields[field_index,3]))
|
|
1057
|
+
dtype=("object" if datetime_as_string and fields[field_index,3].startswith("datetime")
|
|
1058
|
+
else fields[field_index,3]))
|
|
955
1059
|
for field_index in range(n_fields)
|
|
956
1060
|
]
|
|
957
1061
|
|
|
@@ -1060,7 +1164,7 @@ cdef get_bounds(
|
|
|
1060
1164
|
|
|
1061
1165
|
|
|
1062
1166
|
def ogr_read(
|
|
1063
|
-
|
|
1167
|
+
object path_or_buffer,
|
|
1064
1168
|
object dataset_kwargs,
|
|
1065
1169
|
object layer=None,
|
|
1066
1170
|
object encoding=None,
|
|
@@ -1080,6 +1184,7 @@ def ogr_read(
|
|
|
1080
1184
|
):
|
|
1081
1185
|
|
|
1082
1186
|
cdef int err = 0
|
|
1187
|
+
cdef bint is_vsimem = isinstance(path_or_buffer, bytes)
|
|
1083
1188
|
cdef const char *path_c = NULL
|
|
1084
1189
|
cdef char **dataset_options = NULL
|
|
1085
1190
|
cdef const char *where_c = NULL
|
|
@@ -1089,9 +1194,8 @@ def ogr_read(
|
|
|
1089
1194
|
cdef OGRLayerH ogr_layer = NULL
|
|
1090
1195
|
cdef int feature_count = 0
|
|
1091
1196
|
cdef double xmin, ymin, xmax, ymax
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
path_c = path_b
|
|
1197
|
+
cdef const char *prev_shape_encoding = NULL
|
|
1198
|
+
cdef bint override_shape_encoding = False
|
|
1095
1199
|
|
|
1096
1200
|
if fids is not None:
|
|
1097
1201
|
if where is not None or bbox is not None or mask is not None or sql is not None or skip_features or max_features:
|
|
@@ -1120,13 +1224,23 @@ def ogr_read(
|
|
|
1120
1224
|
raise ValueError("'max_features' must be >= 0")
|
|
1121
1225
|
|
|
1122
1226
|
try:
|
|
1227
|
+
path = read_buffer_to_vsimem(path_or_buffer) if is_vsimem else path_or_buffer
|
|
1228
|
+
|
|
1229
|
+
if encoding:
|
|
1230
|
+
# for shapefiles, SHAPE_ENCODING must be set before opening the file
|
|
1231
|
+
# to prevent automatic decoding to UTF-8 by GDAL, so we save previous
|
|
1232
|
+
# SHAPE_ENCODING so that it can be restored later
|
|
1233
|
+
# (we do this for all data sources where encoding is set because
|
|
1234
|
+
# we don't know the driver until after it is opened, which is too late)
|
|
1235
|
+
override_shape_encoding = True
|
|
1236
|
+
prev_shape_encoding = override_threadlocal_config_option("SHAPE_ENCODING", encoding)
|
|
1237
|
+
|
|
1123
1238
|
dataset_options = dict_to_options(dataset_kwargs)
|
|
1124
|
-
ogr_dataset = ogr_open(
|
|
1239
|
+
ogr_dataset = ogr_open(path.encode('UTF-8'), 0, dataset_options)
|
|
1125
1240
|
|
|
1126
1241
|
if sql is None:
|
|
1127
|
-
# layer defaults to index 0
|
|
1128
1242
|
if layer is None:
|
|
1129
|
-
layer =
|
|
1243
|
+
layer = get_default_layer(ogr_dataset)
|
|
1130
1244
|
ogr_layer = get_ogr_layer(ogr_dataset, layer)
|
|
1131
1245
|
else:
|
|
1132
1246
|
ogr_layer = execute_sql(ogr_dataset, sql, sql_dialect)
|
|
@@ -1135,23 +1249,31 @@ def ogr_read(
|
|
|
1135
1249
|
|
|
1136
1250
|
# Encoding is derived from the user, from the dataset capabilities / type,
|
|
1137
1251
|
# or from the system locale
|
|
1138
|
-
encoding
|
|
1139
|
-
|
|
1140
|
-
|
|
1141
|
-
|
|
1142
|
-
|
|
1252
|
+
if encoding:
|
|
1253
|
+
if get_driver(ogr_dataset) == "ESRI Shapefile":
|
|
1254
|
+
# NOTE: SHAPE_ENCODING is a configuration option whereas ENCODING is the dataset open option
|
|
1255
|
+
if "ENCODING" in dataset_kwargs:
|
|
1256
|
+
raise ValueError('cannot provide both encoding parameter and "ENCODING" option; use encoding parameter to specify correct encoding for data source')
|
|
1257
|
+
|
|
1258
|
+
# Because SHAPE_ENCODING is set above, GDAL will automatically
|
|
1259
|
+
# decode shapefiles to UTF-8; ignore any encoding set by user
|
|
1260
|
+
encoding = "UTF-8"
|
|
1261
|
+
|
|
1262
|
+
else:
|
|
1263
|
+
encoding = detect_encoding(ogr_dataset, ogr_layer)
|
|
1143
1264
|
|
|
1144
1265
|
fields = get_fields(ogr_layer, encoding)
|
|
1145
1266
|
|
|
1146
1267
|
ignored_fields = []
|
|
1147
1268
|
if columns is not None:
|
|
1269
|
+
# identify ignored fields first
|
|
1270
|
+
ignored_fields = list(set(fields[:,2]) - set(columns))
|
|
1271
|
+
|
|
1148
1272
|
# Fields are matched exactly by name, duplicates are dropped.
|
|
1149
1273
|
# Find index of each field into fields
|
|
1150
1274
|
idx = np.intersect1d(fields[:,2], columns, return_indices=True)[1]
|
|
1151
1275
|
fields = fields[idx, :]
|
|
1152
1276
|
|
|
1153
|
-
ignored_fields = list(set(fields[:,2]) - set(columns))
|
|
1154
|
-
|
|
1155
1277
|
if not read_geometry:
|
|
1156
1278
|
ignored_fields.append("OGR_GEOMETRY")
|
|
1157
1279
|
|
|
@@ -1232,6 +1354,17 @@ def ogr_read(
|
|
|
1232
1354
|
GDALClose(ogr_dataset)
|
|
1233
1355
|
ogr_dataset = NULL
|
|
1234
1356
|
|
|
1357
|
+
# reset SHAPE_ENCODING config parameter if temporarily set above
|
|
1358
|
+
if override_shape_encoding:
|
|
1359
|
+
CPLSetThreadLocalConfigOption("SHAPE_ENCODING", prev_shape_encoding)
|
|
1360
|
+
|
|
1361
|
+
if prev_shape_encoding != NULL:
|
|
1362
|
+
CPLFree(<void*>prev_shape_encoding)
|
|
1363
|
+
prev_shape_encoding = NULL
|
|
1364
|
+
|
|
1365
|
+
if is_vsimem:
|
|
1366
|
+
delete_vsimem_file(path)
|
|
1367
|
+
|
|
1235
1368
|
return (
|
|
1236
1369
|
meta,
|
|
1237
1370
|
fid_data,
|
|
@@ -1239,9 +1372,38 @@ def ogr_read(
|
|
|
1239
1372
|
field_data
|
|
1240
1373
|
)
|
|
1241
1374
|
|
|
1375
|
+
|
|
1376
|
+
cdef void pycapsule_array_stream_deleter(object stream_capsule) noexcept:
|
|
1377
|
+
cdef ArrowArrayStream* stream = <ArrowArrayStream*>PyCapsule_GetPointer(
|
|
1378
|
+
stream_capsule, 'arrow_array_stream'
|
|
1379
|
+
)
|
|
1380
|
+
# Do not invoke the deleter on a used/moved capsule
|
|
1381
|
+
if stream.release != NULL:
|
|
1382
|
+
stream.release(stream)
|
|
1383
|
+
|
|
1384
|
+
free(stream)
|
|
1385
|
+
|
|
1386
|
+
|
|
1387
|
+
cdef object alloc_c_stream(ArrowArrayStream** c_stream):
|
|
1388
|
+
c_stream[0] = <ArrowArrayStream*> malloc(sizeof(ArrowArrayStream))
|
|
1389
|
+
# Ensure the capsule destructor doesn't call a random release pointer
|
|
1390
|
+
c_stream[0].release = NULL
|
|
1391
|
+
return PyCapsule_New(c_stream[0], 'arrow_array_stream', &pycapsule_array_stream_deleter)
|
|
1392
|
+
|
|
1393
|
+
|
|
1394
|
+
class _ArrowStream:
|
|
1395
|
+
def __init__(self, capsule):
|
|
1396
|
+
self._capsule = capsule
|
|
1397
|
+
|
|
1398
|
+
def __arrow_c_stream__(self, requested_schema=None):
|
|
1399
|
+
if requested_schema is not None:
|
|
1400
|
+
raise NotImplementedError("requested_schema is not supported")
|
|
1401
|
+
return self._capsule
|
|
1402
|
+
|
|
1403
|
+
|
|
1242
1404
|
@contextlib.contextmanager
|
|
1243
1405
|
def ogr_open_arrow(
|
|
1244
|
-
|
|
1406
|
+
object path_or_buffer,
|
|
1245
1407
|
dataset_kwargs,
|
|
1246
1408
|
object layer=None,
|
|
1247
1409
|
object encoding=None,
|
|
@@ -1257,31 +1419,38 @@ def ogr_open_arrow(
|
|
|
1257
1419
|
str sql=None,
|
|
1258
1420
|
str sql_dialect=None,
|
|
1259
1421
|
int return_fids=False,
|
|
1260
|
-
int batch_size=0
|
|
1422
|
+
int batch_size=0,
|
|
1423
|
+
use_pyarrow=False,
|
|
1424
|
+
):
|
|
1261
1425
|
|
|
1262
1426
|
cdef int err = 0
|
|
1427
|
+
cdef bint is_vsimem = isinstance(path_or_buffer, bytes)
|
|
1263
1428
|
cdef const char *path_c = NULL
|
|
1264
1429
|
cdef char **dataset_options = NULL
|
|
1265
1430
|
cdef const char *where_c = NULL
|
|
1266
1431
|
cdef OGRDataSourceH ogr_dataset = NULL
|
|
1267
1432
|
cdef OGRLayerH ogr_layer = NULL
|
|
1433
|
+
cdef void *ogr_driver = NULL
|
|
1268
1434
|
cdef char **fields_c = NULL
|
|
1269
1435
|
cdef const char *field_c = NULL
|
|
1270
1436
|
cdef char **options = NULL
|
|
1271
|
-
cdef
|
|
1437
|
+
cdef const char *prev_shape_encoding = NULL
|
|
1438
|
+
cdef bint override_shape_encoding = False
|
|
1439
|
+
cdef ArrowArrayStream* stream
|
|
1272
1440
|
cdef ArrowSchema schema
|
|
1273
1441
|
|
|
1274
1442
|
IF CTE_GDAL_VERSION < (3, 6, 0):
|
|
1275
1443
|
raise RuntimeError("Need GDAL>=3.6 for Arrow support")
|
|
1276
1444
|
|
|
1277
|
-
path_b = path.encode('utf-8')
|
|
1278
|
-
path_c = path_b
|
|
1279
|
-
|
|
1280
1445
|
if force_2d:
|
|
1281
1446
|
raise ValueError("forcing 2D is not supported for Arrow")
|
|
1282
1447
|
|
|
1283
1448
|
if fids is not None:
|
|
1284
|
-
|
|
1449
|
+
if where is not None or bbox is not None or mask is not None or sql is not None or skip_features or max_features:
|
|
1450
|
+
raise ValueError(
|
|
1451
|
+
"cannot set both 'fids' and any of 'where', 'bbox', 'mask', "
|
|
1452
|
+
"'sql', 'skip_features', or 'max_features'"
|
|
1453
|
+
)
|
|
1285
1454
|
|
|
1286
1455
|
IF CTE_GDAL_VERSION < (3, 8, 0):
|
|
1287
1456
|
if skip_features:
|
|
@@ -1311,13 +1480,18 @@ def ogr_open_arrow(
|
|
|
1311
1480
|
|
|
1312
1481
|
reader = None
|
|
1313
1482
|
try:
|
|
1483
|
+
path = read_buffer_to_vsimem(path_or_buffer) if is_vsimem else path_or_buffer
|
|
1484
|
+
|
|
1485
|
+
if encoding:
|
|
1486
|
+
override_shape_encoding = True
|
|
1487
|
+
prev_shape_encoding = override_threadlocal_config_option("SHAPE_ENCODING", encoding)
|
|
1488
|
+
|
|
1314
1489
|
dataset_options = dict_to_options(dataset_kwargs)
|
|
1315
|
-
ogr_dataset = ogr_open(
|
|
1490
|
+
ogr_dataset = ogr_open(path.encode('UTF-8'), 0, dataset_options)
|
|
1316
1491
|
|
|
1317
1492
|
if sql is None:
|
|
1318
|
-
# layer defaults to index 0
|
|
1319
1493
|
if layer is None:
|
|
1320
|
-
layer =
|
|
1494
|
+
layer = get_default_layer(ogr_dataset)
|
|
1321
1495
|
ogr_layer = get_ogr_layer(ogr_dataset, layer)
|
|
1322
1496
|
else:
|
|
1323
1497
|
ogr_layer = execute_sql(ogr_dataset, sql, sql_dialect)
|
|
@@ -1326,11 +1500,18 @@ def ogr_open_arrow(
|
|
|
1326
1500
|
|
|
1327
1501
|
# Encoding is derived from the user, from the dataset capabilities / type,
|
|
1328
1502
|
# or from the system locale
|
|
1329
|
-
encoding
|
|
1330
|
-
|
|
1331
|
-
|
|
1332
|
-
|
|
1333
|
-
|
|
1503
|
+
if encoding:
|
|
1504
|
+
if get_driver(ogr_dataset) == "ESRI Shapefile":
|
|
1505
|
+
if "ENCODING" in dataset_kwargs:
|
|
1506
|
+
raise ValueError('cannot provide both encoding parameter and "ENCODING" option; use encoding parameter to specify correct encoding for data source')
|
|
1507
|
+
|
|
1508
|
+
encoding = "UTF-8"
|
|
1509
|
+
|
|
1510
|
+
elif encoding.replace('-','').upper() != 'UTF8':
|
|
1511
|
+
raise ValueError("non-UTF-8 encoding is not supported for Arrow; use the non-Arrow interface instead")
|
|
1512
|
+
|
|
1513
|
+
else:
|
|
1514
|
+
encoding = detect_encoding(ogr_dataset, ogr_layer)
|
|
1334
1515
|
|
|
1335
1516
|
fields = get_fields(ogr_layer, encoding, use_arrow=True)
|
|
1336
1517
|
|
|
@@ -1341,19 +1522,64 @@ def ogr_open_arrow(
|
|
|
1341
1522
|
if not read_geometry:
|
|
1342
1523
|
ignored_fields.append("OGR_GEOMETRY")
|
|
1343
1524
|
|
|
1525
|
+
# raise error if schema has bool values for FGB / GPKG and GDAL <3.8.3
|
|
1526
|
+
# due to https://github.com/OSGeo/gdal/issues/8998
|
|
1527
|
+
IF CTE_GDAL_VERSION < (3, 8, 3):
|
|
1528
|
+
|
|
1529
|
+
driver = get_driver(ogr_dataset)
|
|
1530
|
+
if driver in {'FlatGeobuf', 'GPKG'}:
|
|
1531
|
+
ignored = set(ignored_fields)
|
|
1532
|
+
for f in fields:
|
|
1533
|
+
if f[2] not in ignored and f[3] == 'bool':
|
|
1534
|
+
raise RuntimeError(
|
|
1535
|
+
"GDAL < 3.8.3 does not correctly read boolean data values using the "
|
|
1536
|
+
"Arrow API. Do not use read_arrow() / use_arrow=True for this dataset."
|
|
1537
|
+
)
|
|
1538
|
+
|
|
1344
1539
|
geometry_type = get_geometry_type(ogr_layer)
|
|
1345
1540
|
|
|
1346
1541
|
geometry_name = get_string(OGR_L_GetGeometryColumn(ogr_layer))
|
|
1347
1542
|
|
|
1348
1543
|
fid_column = get_string(OGR_L_GetFIDColumn(ogr_layer))
|
|
1544
|
+
fid_column_where = fid_column
|
|
1349
1545
|
# OGR_L_GetFIDColumn returns the column name if it is a custom column,
|
|
1350
|
-
# or "" if not. For arrow, the default column name
|
|
1546
|
+
# or "" if not. For arrow, the default column name used to return the FID data
|
|
1547
|
+
# read is "OGC_FID". When accessing the underlying datasource like when using a
|
|
1548
|
+
# where clause, the default column name is "FID".
|
|
1351
1549
|
if fid_column == "":
|
|
1352
1550
|
fid_column = "OGC_FID"
|
|
1551
|
+
fid_column_where = "FID"
|
|
1552
|
+
|
|
1553
|
+
# Use fids list to create a where clause, as arrow doesn't support direct fid
|
|
1554
|
+
# filtering.
|
|
1555
|
+
if fids is not None:
|
|
1556
|
+
IF CTE_GDAL_VERSION < (3, 8, 0):
|
|
1557
|
+
driver = get_driver(ogr_dataset)
|
|
1558
|
+
if driver not in {"GPKG", "GeoJSON"}:
|
|
1559
|
+
warnings.warn(
|
|
1560
|
+
"Using 'fids' and 'use_arrow=True' with GDAL < 3.8 can be slow "
|
|
1561
|
+
"for some drivers. Upgrading GDAL or using 'use_arrow=False' "
|
|
1562
|
+
"can avoid this.",
|
|
1563
|
+
stacklevel=2,
|
|
1564
|
+
)
|
|
1565
|
+
|
|
1566
|
+
fids_str = ",".join([str(fid) for fid in fids])
|
|
1567
|
+
where = f"{fid_column_where} IN ({fids_str})"
|
|
1353
1568
|
|
|
1354
1569
|
# Apply the attribute filter
|
|
1355
1570
|
if where is not None and where != "":
|
|
1356
|
-
|
|
1571
|
+
try:
|
|
1572
|
+
apply_where_filter(ogr_layer, where)
|
|
1573
|
+
except ValueError as ex:
|
|
1574
|
+
if fids is not None and str(ex).startswith("Invalid SQL query"):
|
|
1575
|
+
# If fids is not None, the where being applied is the one formatted
|
|
1576
|
+
# above.
|
|
1577
|
+
raise ValueError(
|
|
1578
|
+
f"error applying filter for {len(fids)} fids; max. number for "
|
|
1579
|
+
f"drivers with default SQL dialect 'OGRSQL' is 4997"
|
|
1580
|
+
) from ex
|
|
1581
|
+
|
|
1582
|
+
raise
|
|
1357
1583
|
|
|
1358
1584
|
# Apply the spatial filter
|
|
1359
1585
|
if bbox is not None:
|
|
@@ -1381,22 +1607,34 @@ def ogr_open_arrow(
|
|
|
1381
1607
|
str(batch_size).encode('UTF-8')
|
|
1382
1608
|
)
|
|
1383
1609
|
|
|
1610
|
+
# Default to geoarrow metadata encoding
|
|
1611
|
+
IF CTE_GDAL_VERSION >= (3, 8, 0):
|
|
1612
|
+
options = CSLSetNameValue(
|
|
1613
|
+
options,
|
|
1614
|
+
"GEOMETRY_METADATA_ENCODING",
|
|
1615
|
+
"GEOARROW".encode('UTF-8')
|
|
1616
|
+
)
|
|
1617
|
+
|
|
1384
1618
|
# make sure layer is read from beginning
|
|
1385
1619
|
OGR_L_ResetReading(ogr_layer)
|
|
1386
1620
|
|
|
1387
|
-
|
|
1388
|
-
|
|
1621
|
+
# allocate the stream struct and wrap in capsule to ensure clean-up on error
|
|
1622
|
+
capsule = alloc_c_stream(&stream)
|
|
1389
1623
|
|
|
1390
|
-
|
|
1624
|
+
if not OGR_L_GetArrowStream(ogr_layer, stream, options):
|
|
1625
|
+
raise RuntimeError("Failed to open ArrowArrayStream from Layer")
|
|
1391
1626
|
|
|
1392
1627
|
if skip_features:
|
|
1393
1628
|
# only supported for GDAL >= 3.8.0; have to do this after getting
|
|
1394
1629
|
# the Arrow stream
|
|
1395
1630
|
OGR_L_SetNextByIndex(ogr_layer, skip_features)
|
|
1396
1631
|
|
|
1397
|
-
|
|
1398
|
-
|
|
1399
|
-
|
|
1632
|
+
if use_pyarrow:
|
|
1633
|
+
import pyarrow as pa
|
|
1634
|
+
|
|
1635
|
+
reader = pa.RecordBatchStreamReader._import_from_c(<uintptr_t> stream)
|
|
1636
|
+
else:
|
|
1637
|
+
reader = _ArrowStream(capsule)
|
|
1400
1638
|
|
|
1401
1639
|
meta = {
|
|
1402
1640
|
'crs': crs,
|
|
@@ -1407,13 +1645,16 @@ def ogr_open_arrow(
|
|
|
1407
1645
|
'fid_column': fid_column,
|
|
1408
1646
|
}
|
|
1409
1647
|
|
|
1648
|
+
# stream has to be consumed before the Dataset is closed
|
|
1410
1649
|
yield meta, reader
|
|
1411
1650
|
|
|
1412
1651
|
finally:
|
|
1413
|
-
if reader is not None:
|
|
1652
|
+
if use_pyarrow and reader is not None:
|
|
1414
1653
|
# Mark reader as closed to prevent reading batches
|
|
1415
1654
|
reader.close()
|
|
1416
1655
|
|
|
1656
|
+
# `stream` will be freed through `capsule` destructor
|
|
1657
|
+
|
|
1417
1658
|
CSLDestroy(options)
|
|
1418
1659
|
if fields_c != NULL:
|
|
1419
1660
|
CSLDestroy(fields_c)
|
|
@@ -1430,8 +1671,20 @@ def ogr_open_arrow(
|
|
|
1430
1671
|
GDALClose(ogr_dataset)
|
|
1431
1672
|
ogr_dataset = NULL
|
|
1432
1673
|
|
|
1674
|
+
# reset SHAPE_ENCODING config parameter if temporarily set above
|
|
1675
|
+
if override_shape_encoding:
|
|
1676
|
+
CPLSetThreadLocalConfigOption("SHAPE_ENCODING", prev_shape_encoding)
|
|
1677
|
+
|
|
1678
|
+
if prev_shape_encoding != NULL:
|
|
1679
|
+
CPLFree(<void*>prev_shape_encoding)
|
|
1680
|
+
prev_shape_encoding = NULL
|
|
1681
|
+
|
|
1682
|
+
if is_vsimem:
|
|
1683
|
+
delete_vsimem_file(path)
|
|
1684
|
+
|
|
1685
|
+
|
|
1433
1686
|
def ogr_read_bounds(
|
|
1434
|
-
|
|
1687
|
+
object path_or_buffer,
|
|
1435
1688
|
object layer=None,
|
|
1436
1689
|
object encoding=None,
|
|
1437
1690
|
int read_geometry=True,
|
|
@@ -1444,6 +1697,7 @@ def ogr_read_bounds(
|
|
|
1444
1697
|
object mask=None):
|
|
1445
1698
|
|
|
1446
1699
|
cdef int err = 0
|
|
1700
|
+
cdef bint is_vsimem = isinstance(path_or_buffer, bytes)
|
|
1447
1701
|
cdef const char *path_c = NULL
|
|
1448
1702
|
cdef const char *where_c = NULL
|
|
1449
1703
|
cdef OGRDataSourceH ogr_dataset = NULL
|
|
@@ -1460,77 +1714,91 @@ def ogr_read_bounds(
|
|
|
1460
1714
|
if max_features < 0:
|
|
1461
1715
|
raise ValueError("'max_features' must be >= 0")
|
|
1462
1716
|
|
|
1463
|
-
|
|
1464
|
-
|
|
1717
|
+
try:
|
|
1718
|
+
path = read_buffer_to_vsimem(path_or_buffer) if is_vsimem else path_or_buffer
|
|
1719
|
+
ogr_dataset = ogr_open(path.encode('UTF-8'), 0, NULL)
|
|
1465
1720
|
|
|
1466
|
-
|
|
1467
|
-
|
|
1468
|
-
layer = 0
|
|
1721
|
+
if layer is None:
|
|
1722
|
+
layer = get_default_layer(ogr_dataset)
|
|
1469
1723
|
|
|
1470
|
-
|
|
1471
|
-
ogr_layer = get_ogr_layer(ogr_dataset, layer)
|
|
1724
|
+
ogr_layer = get_ogr_layer(ogr_dataset, layer)
|
|
1472
1725
|
|
|
1473
|
-
|
|
1474
|
-
|
|
1475
|
-
|
|
1726
|
+
# Apply the attribute filter
|
|
1727
|
+
if where is not None and where != "":
|
|
1728
|
+
apply_where_filter(ogr_layer, where)
|
|
1729
|
+
|
|
1730
|
+
# Apply the spatial filter
|
|
1731
|
+
if bbox is not None:
|
|
1732
|
+
apply_bbox_filter(ogr_layer, bbox)
|
|
1733
|
+
|
|
1734
|
+
elif mask is not None:
|
|
1735
|
+
apply_geometry_filter(ogr_layer, mask)
|
|
1476
1736
|
|
|
1477
|
-
|
|
1478
|
-
|
|
1479
|
-
apply_bbox_filter(ogr_layer, bbox)
|
|
1737
|
+
# Limit feature range to available range
|
|
1738
|
+
skip_features, num_features = validate_feature_range(ogr_layer, skip_features, max_features)
|
|
1480
1739
|
|
|
1481
|
-
|
|
1482
|
-
apply_geometry_filter(ogr_layer, mask)
|
|
1740
|
+
bounds = get_bounds(ogr_layer, skip_features, num_features)
|
|
1483
1741
|
|
|
1484
|
-
|
|
1485
|
-
|
|
1742
|
+
finally:
|
|
1743
|
+
if ogr_dataset != NULL:
|
|
1744
|
+
GDALClose(ogr_dataset)
|
|
1745
|
+
ogr_dataset = NULL
|
|
1486
1746
|
|
|
1487
|
-
|
|
1747
|
+
if is_vsimem:
|
|
1748
|
+
delete_vsimem_file(path)
|
|
1749
|
+
|
|
1750
|
+
return bounds
|
|
1488
1751
|
|
|
1489
1752
|
|
|
1490
1753
|
def ogr_read_info(
|
|
1491
|
-
|
|
1754
|
+
object path_or_buffer,
|
|
1492
1755
|
dataset_kwargs,
|
|
1493
1756
|
object layer=None,
|
|
1494
1757
|
object encoding=None,
|
|
1495
1758
|
int force_feature_count=False,
|
|
1496
1759
|
int force_total_bounds=False):
|
|
1497
1760
|
|
|
1761
|
+
cdef bint is_vsimem = isinstance(path_or_buffer, bytes)
|
|
1498
1762
|
cdef const char *path_c = NULL
|
|
1499
1763
|
cdef char **dataset_options = NULL
|
|
1500
1764
|
cdef OGRDataSourceH ogr_dataset = NULL
|
|
1501
1765
|
cdef OGRLayerH ogr_layer = NULL
|
|
1766
|
+
cdef const char *prev_shape_encoding = NULL
|
|
1767
|
+
cdef bint override_shape_encoding = False
|
|
1502
1768
|
|
|
1503
|
-
|
|
1504
|
-
|
|
1769
|
+
try:
|
|
1770
|
+
path = read_buffer_to_vsimem(path_or_buffer) if is_vsimem else path_or_buffer
|
|
1505
1771
|
|
|
1506
|
-
|
|
1507
|
-
|
|
1508
|
-
|
|
1772
|
+
if encoding:
|
|
1773
|
+
override_shape_encoding = True
|
|
1774
|
+
prev_shape_encoding = override_threadlocal_config_option("SHAPE_ENCODING", encoding)
|
|
1509
1775
|
|
|
1510
|
-
try:
|
|
1511
1776
|
dataset_options = dict_to_options(dataset_kwargs)
|
|
1512
|
-
ogr_dataset = ogr_open(
|
|
1777
|
+
ogr_dataset = ogr_open(path.encode('UTF-8'), 0, dataset_options)
|
|
1778
|
+
|
|
1779
|
+
if layer is None:
|
|
1780
|
+
layer = get_default_layer(ogr_dataset)
|
|
1513
1781
|
ogr_layer = get_ogr_layer(ogr_dataset, layer)
|
|
1514
1782
|
|
|
1515
|
-
|
|
1516
|
-
|
|
1517
|
-
|
|
1518
|
-
encoding
|
|
1519
|
-
or detect_encoding(ogr_dataset, ogr_layer)
|
|
1520
|
-
or locale.getpreferredencoding()
|
|
1521
|
-
)
|
|
1783
|
+
if encoding and get_driver(ogr_dataset) == "ESRI Shapefile":
|
|
1784
|
+
encoding = "UTF-8"
|
|
1785
|
+
else:
|
|
1786
|
+
encoding = encoding or detect_encoding(ogr_dataset, ogr_layer)
|
|
1522
1787
|
|
|
1523
1788
|
fields = get_fields(ogr_layer, encoding)
|
|
1524
1789
|
|
|
1525
1790
|
meta = {
|
|
1526
|
-
|
|
1527
|
-
|
|
1528
|
-
|
|
1529
|
-
|
|
1530
|
-
|
|
1531
|
-
|
|
1532
|
-
|
|
1533
|
-
|
|
1791
|
+
"layer_name": get_string(OGR_L_GetName(ogr_layer)),
|
|
1792
|
+
"crs": get_crs(ogr_layer),
|
|
1793
|
+
"encoding": encoding,
|
|
1794
|
+
"fields": fields[:,2], # return only names
|
|
1795
|
+
"dtypes": fields[:,3],
|
|
1796
|
+
"fid_column": get_string(OGR_L_GetFIDColumn(ogr_layer)),
|
|
1797
|
+
"geometry_name": get_string(OGR_L_GetGeometryColumn(ogr_layer)),
|
|
1798
|
+
"geometry_type": get_geometry_type(ogr_layer),
|
|
1799
|
+
"features": get_feature_count(ogr_layer, force_feature_count),
|
|
1800
|
+
"total_bounds": get_total_bounds(ogr_layer, force_total_bounds),
|
|
1801
|
+
"driver": get_driver(ogr_dataset),
|
|
1534
1802
|
"capabilities": {
|
|
1535
1803
|
"random_read": OGR_L_TestCapability(ogr_layer, OLCRandomRead) == 1,
|
|
1536
1804
|
"fast_set_next_by_index": OGR_L_TestCapability(ogr_layer, OLCFastSetNextByIndex) == 1,
|
|
@@ -1538,8 +1806,8 @@ def ogr_read_info(
|
|
|
1538
1806
|
"fast_feature_count": OGR_L_TestCapability(ogr_layer, OLCFastFeatureCount) == 1,
|
|
1539
1807
|
"fast_total_bounds": OGR_L_TestCapability(ogr_layer, OLCFastGetExtent) == 1,
|
|
1540
1808
|
},
|
|
1541
|
-
|
|
1542
|
-
|
|
1809
|
+
"layer_metadata": get_metadata(ogr_layer),
|
|
1810
|
+
"dataset_metadata": get_metadata(ogr_dataset),
|
|
1543
1811
|
}
|
|
1544
1812
|
|
|
1545
1813
|
finally:
|
|
@@ -1551,19 +1819,88 @@ def ogr_read_info(
|
|
|
1551
1819
|
GDALClose(ogr_dataset)
|
|
1552
1820
|
ogr_dataset = NULL
|
|
1553
1821
|
|
|
1822
|
+
# reset SHAPE_ENCODING config parameter if temporarily set above
|
|
1823
|
+
if override_shape_encoding:
|
|
1824
|
+
CPLSetThreadLocalConfigOption("SHAPE_ENCODING", prev_shape_encoding)
|
|
1825
|
+
|
|
1826
|
+
if prev_shape_encoding != NULL:
|
|
1827
|
+
CPLFree(<void*>prev_shape_encoding)
|
|
1828
|
+
|
|
1829
|
+
if is_vsimem:
|
|
1830
|
+
delete_vsimem_file(path)
|
|
1831
|
+
|
|
1554
1832
|
return meta
|
|
1555
1833
|
|
|
1556
1834
|
|
|
1557
|
-
def ogr_list_layers(
|
|
1835
|
+
def ogr_list_layers(object path_or_buffer):
|
|
1836
|
+
cdef bint is_vsimem = isinstance(path_or_buffer, bytes)
|
|
1558
1837
|
cdef const char *path_c = NULL
|
|
1559
|
-
cdef const char *ogr_name = NULL
|
|
1560
1838
|
cdef OGRDataSourceH ogr_dataset = NULL
|
|
1561
|
-
cdef OGRLayerH ogr_layer = NULL
|
|
1562
1839
|
|
|
1563
|
-
|
|
1564
|
-
|
|
1840
|
+
try:
|
|
1841
|
+
path = read_buffer_to_vsimem(path_or_buffer) if is_vsimem else path_or_buffer
|
|
1842
|
+
ogr_dataset = ogr_open(path.encode('UTF-8'), 0, NULL)
|
|
1843
|
+
layers = get_layer_names(ogr_dataset)
|
|
1844
|
+
|
|
1845
|
+
finally:
|
|
1846
|
+
if ogr_dataset != NULL:
|
|
1847
|
+
GDALClose(ogr_dataset)
|
|
1848
|
+
ogr_dataset = NULL
|
|
1849
|
+
|
|
1850
|
+
if is_vsimem:
|
|
1851
|
+
delete_vsimem_file(path)
|
|
1852
|
+
|
|
1853
|
+
return layers
|
|
1854
|
+
|
|
1855
|
+
|
|
1856
|
+
cdef str get_default_layer(OGRDataSourceH ogr_dataset):
|
|
1857
|
+
""" Get the layer in the dataset that is read by default.
|
|
1858
|
+
|
|
1859
|
+
The caller is responsible for closing the dataset.
|
|
1860
|
+
|
|
1861
|
+
Parameters
|
|
1862
|
+
----------
|
|
1863
|
+
ogr_dataset : pointer to open OGR dataset
|
|
1864
|
+
|
|
1865
|
+
Returns
|
|
1866
|
+
-------
|
|
1867
|
+
str
|
|
1868
|
+
the name of the default layer to be read.
|
|
1869
|
+
|
|
1870
|
+
"""
|
|
1871
|
+
layers = get_layer_names(ogr_dataset)
|
|
1872
|
+
first_layer_name = layers[0][0]
|
|
1873
|
+
|
|
1874
|
+
if len(layers) > 1:
|
|
1875
|
+
dataset_name = os.path.basename(get_string(OGR_DS_GetName(ogr_dataset)))
|
|
1876
|
+
|
|
1877
|
+
other_layer_names = ', '.join([f"'{l}'" for l in layers[1:, 0]])
|
|
1878
|
+
warnings.warn(
|
|
1879
|
+
f"More than one layer found in '{dataset_name}': '{first_layer_name}' "
|
|
1880
|
+
f"(default), {other_layer_names}. Specify layer parameter to avoid this "
|
|
1881
|
+
"warning.",
|
|
1882
|
+
stacklevel=2,
|
|
1883
|
+
)
|
|
1565
1884
|
|
|
1566
|
-
|
|
1885
|
+
return first_layer_name
|
|
1886
|
+
|
|
1887
|
+
|
|
1888
|
+
cdef get_layer_names(OGRDataSourceH ogr_dataset):
|
|
1889
|
+
""" Get the layers in the dataset.
|
|
1890
|
+
|
|
1891
|
+
The caller is responsible for closing the dataset.
|
|
1892
|
+
|
|
1893
|
+
Parameters
|
|
1894
|
+
----------
|
|
1895
|
+
ogr_dataset : pointer to open OGR dataset
|
|
1896
|
+
|
|
1897
|
+
Returns
|
|
1898
|
+
-------
|
|
1899
|
+
ndarray(n)
|
|
1900
|
+
array of layer names
|
|
1901
|
+
|
|
1902
|
+
"""
|
|
1903
|
+
cdef OGRLayerH ogr_layer = NULL
|
|
1567
1904
|
|
|
1568
1905
|
layer_count = GDALDatasetGetLayerCount(ogr_dataset)
|
|
1569
1906
|
|
|
@@ -1575,10 +1912,6 @@ def ogr_list_layers(str path):
|
|
|
1575
1912
|
data_view[i, 0] = get_string(OGR_L_GetName(ogr_layer))
|
|
1576
1913
|
data_view[i, 1] = get_geometry_type(ogr_layer)
|
|
1577
1914
|
|
|
1578
|
-
if ogr_dataset != NULL:
|
|
1579
|
-
GDALClose(ogr_dataset)
|
|
1580
|
-
ogr_dataset = NULL
|
|
1581
|
-
|
|
1582
1915
|
return data
|
|
1583
1916
|
|
|
1584
1917
|
|
|
@@ -1659,10 +1992,10 @@ cdef infer_field_types(list dtypes):
|
|
|
1659
1992
|
field_types_view[i, 0] = OFTString
|
|
1660
1993
|
# Convert to unicode string then take itemsize
|
|
1661
1994
|
# TODO: better implementation of this
|
|
1662
|
-
# width = values.astype(np.
|
|
1995
|
+
# width = values.astype(np.str_).dtype.itemsize // 4
|
|
1663
1996
|
# DO WE NEED WIDTH HERE?
|
|
1664
1997
|
|
|
1665
|
-
elif dtype.type is np.
|
|
1998
|
+
elif dtype.type is np.str_ or dtype.type is np.bytes_:
|
|
1666
1999
|
field_types_view[i, 0] = OFTString
|
|
1667
2000
|
field_types_view[i, 2] = int(dtype.itemsize // 4)
|
|
1668
2001
|
|
|
@@ -1679,14 +2012,49 @@ cdef infer_field_types(list dtypes):
|
|
|
1679
2012
|
return field_types
|
|
1680
2013
|
|
|
1681
2014
|
|
|
1682
|
-
|
|
1683
|
-
|
|
1684
|
-
|
|
1685
|
-
str
|
|
1686
|
-
|
|
1687
|
-
|
|
1688
|
-
|
|
2015
|
+
cdef create_ogr_dataset_layer(
|
|
2016
|
+
str path,
|
|
2017
|
+
bint is_vsi,
|
|
2018
|
+
str layer,
|
|
2019
|
+
str driver,
|
|
2020
|
+
str crs,
|
|
2021
|
+
str geometry_type,
|
|
2022
|
+
str encoding,
|
|
2023
|
+
object dataset_kwargs,
|
|
2024
|
+
object layer_kwargs,
|
|
2025
|
+
bint append,
|
|
2026
|
+
dataset_metadata,
|
|
2027
|
+
layer_metadata,
|
|
2028
|
+
OGRDataSourceH* ogr_dataset_out,
|
|
2029
|
+
OGRLayerH* ogr_layer_out,
|
|
1689
2030
|
):
|
|
2031
|
+
"""
|
|
2032
|
+
Construct the OGRDataSource and OGRLayer objects based on input
|
|
2033
|
+
path and layer.
|
|
2034
|
+
|
|
2035
|
+
If the file already exists, will open the existing dataset and overwrite
|
|
2036
|
+
or append the layer (depending on `append`), otherwise will create a new
|
|
2037
|
+
dataset.
|
|
2038
|
+
|
|
2039
|
+
Fills in the `ogr_dataset_out` and `ogr_layer_out` pointers passed as
|
|
2040
|
+
parameter with initialized objects (or raise error is it fails to do so).
|
|
2041
|
+
It is the responsibility of the caller to clean up those objects after use.
|
|
2042
|
+
Returns whether a new layer was created or not (when the layer was created,
|
|
2043
|
+
the caller still needs to set up the layer definition, i.e. create the
|
|
2044
|
+
fields).
|
|
2045
|
+
|
|
2046
|
+
Parameters
|
|
2047
|
+
----------
|
|
2048
|
+
encoding : str
|
|
2049
|
+
Only used if `driver` is "ESRI Shapefile". If not None, it overrules the default
|
|
2050
|
+
shapefile encoding, which is "UTF-8" in pyogrio.
|
|
2051
|
+
|
|
2052
|
+
Returns
|
|
2053
|
+
-------
|
|
2054
|
+
bool :
|
|
2055
|
+
Whether a new layer was created, or False if we are appending to an
|
|
2056
|
+
existing layer.
|
|
2057
|
+
"""
|
|
1690
2058
|
cdef const char *path_c = NULL
|
|
1691
2059
|
cdef const char *layer_c = NULL
|
|
1692
2060
|
cdef const char *driver_c = NULL
|
|
@@ -1697,55 +2065,9 @@ def ogr_write(
|
|
|
1697
2065
|
cdef const char *ogr_name = NULL
|
|
1698
2066
|
cdef OGRDataSourceH ogr_dataset = NULL
|
|
1699
2067
|
cdef OGRLayerH ogr_layer = NULL
|
|
1700
|
-
cdef OGRFeatureH ogr_feature = NULL
|
|
1701
|
-
cdef OGRGeometryH ogr_geometry = NULL
|
|
1702
|
-
cdef OGRGeometryH ogr_geometry_multi = NULL
|
|
1703
|
-
cdef OGRFeatureDefnH ogr_featuredef = NULL
|
|
1704
|
-
cdef OGRFieldDefnH ogr_fielddef = NULL
|
|
1705
|
-
cdef unsigned char *wkb_buffer = NULL
|
|
1706
2068
|
cdef OGRSpatialReferenceH ogr_crs = NULL
|
|
1707
|
-
cdef int layer_idx = -1
|
|
1708
|
-
cdef int supports_transactions = 0
|
|
1709
2069
|
cdef OGRwkbGeometryType geometry_code
|
|
1710
|
-
cdef int
|
|
1711
|
-
cdef int i = 0
|
|
1712
|
-
cdef int num_records = -1
|
|
1713
|
-
cdef int num_field_data = len(field_data) if field_data is not None else 0
|
|
1714
|
-
cdef int num_fields = len(fields) if fields is not None else 0
|
|
1715
|
-
|
|
1716
|
-
if num_fields != num_field_data:
|
|
1717
|
-
raise ValueError("field_data array needs to be same length as fields array")
|
|
1718
|
-
|
|
1719
|
-
if num_fields == 0 and geometry is None:
|
|
1720
|
-
raise ValueError("You must provide at least a geometry column or a field")
|
|
1721
|
-
|
|
1722
|
-
if num_fields > 0:
|
|
1723
|
-
num_records = len(field_data[0])
|
|
1724
|
-
for i in range(1, len(field_data)):
|
|
1725
|
-
if len(field_data[i]) != num_records:
|
|
1726
|
-
raise ValueError("field_data arrays must be same length")
|
|
1727
|
-
|
|
1728
|
-
if geometry is None:
|
|
1729
|
-
# If no geometry data, we ignore the geometry_type and don't create a geometry
|
|
1730
|
-
# column
|
|
1731
|
-
geometry_type = None
|
|
1732
|
-
else:
|
|
1733
|
-
if num_fields > 0:
|
|
1734
|
-
if len(geometry) != num_records:
|
|
1735
|
-
raise ValueError(
|
|
1736
|
-
"field_data arrays must be same length as geometry array"
|
|
1737
|
-
)
|
|
1738
|
-
else:
|
|
1739
|
-
num_records = len(geometry)
|
|
1740
|
-
|
|
1741
|
-
if field_mask is not None:
|
|
1742
|
-
if len(field_data) != len(field_mask):
|
|
1743
|
-
raise ValueError("field_data and field_mask must be same length")
|
|
1744
|
-
for i in range(0, len(field_mask)):
|
|
1745
|
-
if field_mask[i] is not None and len(field_mask[i]) != num_records:
|
|
1746
|
-
raise ValueError("field_mask arrays must be same length as geometry array")
|
|
1747
|
-
else:
|
|
1748
|
-
field_mask = [None] * num_fields
|
|
2070
|
+
cdef int layer_idx = -1
|
|
1749
2071
|
|
|
1750
2072
|
path_b = path.encode('UTF-8')
|
|
1751
2073
|
path_c = path_b
|
|
@@ -1753,22 +2075,22 @@ def ogr_write(
|
|
|
1753
2075
|
driver_b = driver.encode('UTF-8')
|
|
1754
2076
|
driver_c = driver_b
|
|
1755
2077
|
|
|
2078
|
+
# in-memory dataset is always created from scratch
|
|
2079
|
+
path_exists = os.path.exists(path) if not is_vsi else False
|
|
2080
|
+
|
|
1756
2081
|
if not layer:
|
|
1757
2082
|
layer = os.path.splitext(os.path.split(path)[1])[0]
|
|
1758
2083
|
|
|
1759
|
-
if gdal_tz_offsets is None:
|
|
1760
|
-
gdal_tz_offsets = {}
|
|
1761
|
-
|
|
1762
|
-
|
|
1763
2084
|
# if shapefile, GeoJSON, or FlatGeobuf, always delete first
|
|
1764
2085
|
# for other types, check if we can create layers
|
|
1765
2086
|
# GPKG might be the only multi-layer writeable type. TODO: check this
|
|
1766
|
-
if driver in ('ESRI Shapefile', 'GeoJSON', 'GeoJSONSeq', 'FlatGeobuf') and
|
|
2087
|
+
if driver in ('ESRI Shapefile', 'GeoJSON', 'GeoJSONSeq', 'FlatGeobuf') and path_exists:
|
|
1767
2088
|
if not append:
|
|
1768
2089
|
os.unlink(path)
|
|
2090
|
+
path_exists = False
|
|
1769
2091
|
|
|
1770
2092
|
layer_exists = False
|
|
1771
|
-
if
|
|
2093
|
+
if path_exists:
|
|
1772
2094
|
try:
|
|
1773
2095
|
ogr_dataset = ogr_open(path_c, 1, NULL)
|
|
1774
2096
|
|
|
@@ -1790,7 +2112,11 @@ def ogr_write(
|
|
|
1790
2112
|
raise exc
|
|
1791
2113
|
|
|
1792
2114
|
# otherwise create from scratch
|
|
1793
|
-
|
|
2115
|
+
if is_vsi:
|
|
2116
|
+
VSIUnlink(path_c)
|
|
2117
|
+
else:
|
|
2118
|
+
os.unlink(path)
|
|
2119
|
+
|
|
1794
2120
|
ogr_dataset = NULL
|
|
1795
2121
|
|
|
1796
2122
|
# either it didn't exist or could not open it in write mode
|
|
@@ -1808,25 +2134,20 @@ def ogr_write(
|
|
|
1808
2134
|
if crs is not None:
|
|
1809
2135
|
try:
|
|
1810
2136
|
ogr_crs = create_crs(crs)
|
|
2137
|
+
# force geographic CRS to use lon, lat order and ignore axis order specified by CRS, in order
|
|
2138
|
+
# to correctly write KML and GeoJSON coordinates in correct order
|
|
2139
|
+
OSRSetAxisMappingStrategy(ogr_crs, OAMS_TRADITIONAL_GIS_ORDER)
|
|
2140
|
+
|
|
1811
2141
|
|
|
1812
2142
|
except Exception as exc:
|
|
1813
|
-
OGRReleaseDataSource(ogr_dataset)
|
|
1814
|
-
ogr_dataset = NULL
|
|
1815
2143
|
if dataset_options != NULL:
|
|
1816
2144
|
CSLDestroy(dataset_options)
|
|
1817
2145
|
dataset_options = NULL
|
|
1818
|
-
raise exc
|
|
1819
2146
|
|
|
1820
|
-
|
|
1821
|
-
|
|
1822
|
-
encoding = locale.getpreferredencoding()
|
|
2147
|
+
GDALClose(ogr_dataset)
|
|
2148
|
+
ogr_dataset = NULL
|
|
1823
2149
|
|
|
1824
|
-
|
|
1825
|
-
# Fiona only sets encoding for shapefiles; other drivers do not support
|
|
1826
|
-
# encoding as an option.
|
|
1827
|
-
encoding_b = encoding.upper().encode('UTF-8')
|
|
1828
|
-
encoding_c = encoding_b
|
|
1829
|
-
layer_options = CSLSetNameValue(layer_options, "ENCODING", encoding_c)
|
|
2150
|
+
raise exc
|
|
1830
2151
|
|
|
1831
2152
|
# Setup other layer creation options
|
|
1832
2153
|
for k, v in layer_kwargs.items():
|
|
@@ -1834,6 +2155,21 @@ def ogr_write(
|
|
|
1834
2155
|
v = v.encode('UTF-8')
|
|
1835
2156
|
layer_options = CSLAddNameValue(layer_options, <const char *>k, <const char *>v)
|
|
1836
2157
|
|
|
2158
|
+
if driver == 'ESRI Shapefile':
|
|
2159
|
+
# ENCODING option must be set for shapefiles to properly write *.cpg
|
|
2160
|
+
# file containing the encoding; this is not a supported option for
|
|
2161
|
+
# other drivers. This is done after setting general options above
|
|
2162
|
+
# to override ENCODING if passed by the user as a layer option.
|
|
2163
|
+
if encoding and "ENCODING" in layer_kwargs:
|
|
2164
|
+
raise ValueError('cannot provide both encoding parameter and "ENCODING" layer creation option; use the encoding parameter')
|
|
2165
|
+
|
|
2166
|
+
# always write to UTF-8 if encoding is not set
|
|
2167
|
+
encoding = encoding or "UTF-8"
|
|
2168
|
+
encoding_b = encoding.upper().encode('UTF-8')
|
|
2169
|
+
encoding_c = encoding_b
|
|
2170
|
+
layer_options = CSLSetNameValue(layer_options, "ENCODING", encoding_c)
|
|
2171
|
+
|
|
2172
|
+
|
|
1837
2173
|
### Get geometry type
|
|
1838
2174
|
# TODO: this is brittle for 3D / ZM / M types
|
|
1839
2175
|
# TODO: fail on M / ZM types
|
|
@@ -1856,7 +2192,7 @@ def ogr_write(
|
|
|
1856
2192
|
set_metadata(ogr_layer, layer_metadata)
|
|
1857
2193
|
|
|
1858
2194
|
except Exception as exc:
|
|
1859
|
-
|
|
2195
|
+
GDALClose(ogr_dataset)
|
|
1860
2196
|
ogr_dataset = NULL
|
|
1861
2197
|
raise DataLayerError(str(exc))
|
|
1862
2198
|
|
|
@@ -1873,60 +2209,152 @@ def ogr_write(
|
|
|
1873
2209
|
CSLDestroy(layer_options)
|
|
1874
2210
|
layer_options = NULL
|
|
1875
2211
|
|
|
1876
|
-
|
|
1877
|
-
|
|
2212
|
+
ogr_dataset_out[0] = ogr_dataset
|
|
2213
|
+
ogr_layer_out[0] = ogr_layer
|
|
2214
|
+
|
|
2215
|
+
return create_layer
|
|
2216
|
+
|
|
2217
|
+
|
|
2218
|
+
# TODO: set geometry and field data as memory views?
|
|
2219
|
+
def ogr_write(
|
|
2220
|
+
object path_or_fp,
|
|
2221
|
+
str layer,
|
|
2222
|
+
str driver,
|
|
2223
|
+
geometry,
|
|
2224
|
+
fields,
|
|
2225
|
+
field_data,
|
|
2226
|
+
field_mask,
|
|
2227
|
+
str crs,
|
|
2228
|
+
str geometry_type,
|
|
2229
|
+
str encoding,
|
|
2230
|
+
object dataset_kwargs,
|
|
2231
|
+
object layer_kwargs,
|
|
2232
|
+
bint promote_to_multi=False,
|
|
2233
|
+
bint nan_as_null=True,
|
|
2234
|
+
bint append=False,
|
|
2235
|
+
dataset_metadata=None,
|
|
2236
|
+
layer_metadata=None,
|
|
2237
|
+
gdal_tz_offsets=None
|
|
2238
|
+
):
|
|
2239
|
+
cdef OGRDataSourceH ogr_dataset = NULL
|
|
2240
|
+
cdef OGRLayerH ogr_layer = NULL
|
|
2241
|
+
cdef OGRFeatureH ogr_feature = NULL
|
|
2242
|
+
cdef OGRGeometryH ogr_geometry = NULL
|
|
2243
|
+
cdef OGRGeometryH ogr_geometry_multi = NULL
|
|
2244
|
+
cdef OGRFeatureDefnH ogr_featuredef = NULL
|
|
2245
|
+
cdef OGRFieldDefnH ogr_fielddef = NULL
|
|
2246
|
+
cdef unsigned char *wkb_buffer = NULL
|
|
2247
|
+
cdef int supports_transactions = 0
|
|
2248
|
+
cdef int err = 0
|
|
2249
|
+
cdef int i = 0
|
|
2250
|
+
cdef int num_records = -1
|
|
2251
|
+
cdef int num_field_data = len(field_data) if field_data is not None else 0
|
|
2252
|
+
cdef int num_fields = len(fields) if fields is not None else 0
|
|
2253
|
+
cdef bint is_vsi = False
|
|
2254
|
+
|
|
2255
|
+
if num_fields != num_field_data:
|
|
2256
|
+
raise ValueError("field_data array needs to be same length as fields array")
|
|
2257
|
+
|
|
2258
|
+
if num_fields == 0 and geometry is None:
|
|
2259
|
+
raise ValueError("You must provide at least a geometry column or a field")
|
|
2260
|
+
|
|
1878
2261
|
if num_fields > 0:
|
|
1879
|
-
|
|
2262
|
+
num_records = len(field_data[0])
|
|
2263
|
+
for i in range(1, len(field_data)):
|
|
2264
|
+
if len(field_data[i]) != num_records:
|
|
2265
|
+
raise ValueError("field_data arrays must be same length")
|
|
1880
2266
|
|
|
1881
|
-
|
|
1882
|
-
|
|
1883
|
-
|
|
1884
|
-
|
|
2267
|
+
if geometry is None:
|
|
2268
|
+
# If no geometry data, we ignore the geometry_type and don't create a geometry
|
|
2269
|
+
# column
|
|
2270
|
+
geometry_type = None
|
|
2271
|
+
else:
|
|
2272
|
+
if num_fields > 0:
|
|
2273
|
+
if len(geometry) != num_records:
|
|
2274
|
+
raise ValueError(
|
|
2275
|
+
"field_data arrays must be same length as geometry array"
|
|
2276
|
+
)
|
|
2277
|
+
else:
|
|
2278
|
+
num_records = len(geometry)
|
|
1885
2279
|
|
|
1886
|
-
|
|
1887
|
-
|
|
1888
|
-
|
|
2280
|
+
if field_mask is not None:
|
|
2281
|
+
if len(field_data) != len(field_mask):
|
|
2282
|
+
raise ValueError("field_data and field_mask must be same length")
|
|
2283
|
+
for i in range(0, len(field_mask)):
|
|
2284
|
+
if field_mask[i] is not None and len(field_mask[i]) != num_records:
|
|
2285
|
+
raise ValueError("field_mask arrays must be same length as geometry array")
|
|
2286
|
+
else:
|
|
2287
|
+
field_mask = [None] * num_fields
|
|
1889
2288
|
|
|
1890
|
-
|
|
1891
|
-
|
|
1892
|
-
OGR_Fld_SetSubType(ogr_fielddef, field_subtype)
|
|
2289
|
+
if gdal_tz_offsets is None:
|
|
2290
|
+
gdal_tz_offsets = {}
|
|
1893
2291
|
|
|
1894
|
-
|
|
1895
|
-
|
|
2292
|
+
try:
|
|
2293
|
+
# Setup in-memory handler if needed
|
|
2294
|
+
path = get_ogr_vsimem_write_path(path_or_fp, driver)
|
|
2295
|
+
is_vsi = path.startswith('/vsimem/')
|
|
2296
|
+
|
|
2297
|
+
# Setup dataset and layer
|
|
2298
|
+
layer_created = create_ogr_dataset_layer(
|
|
2299
|
+
path, is_vsi, layer, driver, crs, geometry_type, encoding,
|
|
2300
|
+
dataset_kwargs, layer_kwargs, append,
|
|
2301
|
+
dataset_metadata, layer_metadata,
|
|
2302
|
+
&ogr_dataset, &ogr_layer,
|
|
2303
|
+
)
|
|
1896
2304
|
|
|
1897
|
-
|
|
2305
|
+
if driver == 'ESRI Shapefile':
|
|
2306
|
+
# force encoding for remaining operations to be in UTF-8 (even if user
|
|
2307
|
+
# provides an encoding) because GDAL will automatically convert those to
|
|
2308
|
+
# the target encoding because ENCODING is set as a layer creation option
|
|
2309
|
+
encoding = "UTF-8"
|
|
1898
2310
|
|
|
1899
|
-
|
|
1900
|
-
|
|
1901
|
-
|
|
1902
|
-
|
|
2311
|
+
else:
|
|
2312
|
+
# Now the dataset and layer have been created, we can properly determine the
|
|
2313
|
+
# encoding. It is derived from the user, from the dataset capabilities / type,
|
|
2314
|
+
# or from the system locale
|
|
2315
|
+
encoding = encoding or detect_encoding(ogr_dataset, ogr_layer)
|
|
1903
2316
|
|
|
1904
|
-
|
|
1905
|
-
|
|
1906
|
-
|
|
2317
|
+
### Create the fields
|
|
2318
|
+
field_types = None
|
|
2319
|
+
if num_fields > 0:
|
|
2320
|
+
field_types = infer_field_types([field.dtype for field in field_data])
|
|
1907
2321
|
|
|
1908
|
-
|
|
1909
|
-
|
|
2322
|
+
if layer_created:
|
|
2323
|
+
for i in range(num_fields):
|
|
2324
|
+
field_type, field_subtype, width, precision = field_types[i]
|
|
1910
2325
|
|
|
1911
|
-
|
|
1912
|
-
|
|
1913
|
-
|
|
1914
|
-
raise FieldError(f"Error adding field '{fields[i]}' to layer") from None
|
|
2326
|
+
name_b = fields[i].encode(encoding)
|
|
2327
|
+
try:
|
|
2328
|
+
ogr_fielddef = exc_wrap_pointer(OGR_Fld_Create(name_b, field_type))
|
|
1915
2329
|
|
|
1916
|
-
|
|
1917
|
-
|
|
1918
|
-
|
|
2330
|
+
# subtypes, see: https://gdal.org/development/rfc/rfc50_ogr_field_subtype.html
|
|
2331
|
+
if field_subtype != OFSTNone:
|
|
2332
|
+
OGR_Fld_SetSubType(ogr_fielddef, field_subtype)
|
|
1919
2333
|
|
|
2334
|
+
if width:
|
|
2335
|
+
OGR_Fld_SetWidth(ogr_fielddef, width)
|
|
1920
2336
|
|
|
1921
|
-
|
|
1922
|
-
ogr_featuredef = OGR_L_GetLayerDefn(ogr_layer)
|
|
2337
|
+
# TODO: set precision
|
|
1923
2338
|
|
|
1924
|
-
|
|
1925
|
-
if supports_transactions:
|
|
1926
|
-
start_transaction(ogr_dataset, 0)
|
|
2339
|
+
exc_wrap_int(OGR_L_CreateField(ogr_layer, ogr_fielddef, 1))
|
|
1927
2340
|
|
|
1928
|
-
|
|
1929
|
-
|
|
2341
|
+
except:
|
|
2342
|
+
raise FieldError(f"Error adding field '{fields[i]}' to layer") from None
|
|
2343
|
+
|
|
2344
|
+
finally:
|
|
2345
|
+
if ogr_fielddef != NULL:
|
|
2346
|
+
OGR_Fld_Destroy(ogr_fielddef)
|
|
2347
|
+
ogr_fielddef = NULL
|
|
2348
|
+
|
|
2349
|
+
|
|
2350
|
+
### Create the features
|
|
2351
|
+
ogr_featuredef = OGR_L_GetLayerDefn(ogr_layer)
|
|
2352
|
+
|
|
2353
|
+
supports_transactions = OGR_L_TestCapability(ogr_layer, OLCTransactions)
|
|
2354
|
+
if supports_transactions:
|
|
2355
|
+
start_transaction(ogr_dataset, 0)
|
|
2356
|
+
|
|
2357
|
+
for i in range(num_records):
|
|
1930
2358
|
# create the feature
|
|
1931
2359
|
ogr_feature = OGR_F_Create(ogr_featuredef)
|
|
1932
2360
|
if ogr_feature == NULL:
|
|
@@ -1947,9 +2375,6 @@ def ogr_write(
|
|
|
1947
2375
|
wkb_buffer = wkb
|
|
1948
2376
|
err = OGR_G_ImportFromWkb(ogr_geometry, wkb_buffer, len(wkb))
|
|
1949
2377
|
if err:
|
|
1950
|
-
if ogr_geometry != NULL:
|
|
1951
|
-
OGR_G_DestroyGeometry(ogr_geometry)
|
|
1952
|
-
ogr_geometry = NULL
|
|
1953
2378
|
raise GeometryError(f"Could not create geometry from WKB at index {i}") from None
|
|
1954
2379
|
|
|
1955
2380
|
# Convert to multi type
|
|
@@ -1964,6 +2389,7 @@ def ogr_write(
|
|
|
1964
2389
|
# Set the geometry on the feature
|
|
1965
2390
|
# this assumes ownership of the geometry and it's cleanup
|
|
1966
2391
|
err = OGR_F_SetGeometryDirectly(ogr_feature, ogr_geometry)
|
|
2392
|
+
ogr_geometry = NULL # to prevent cleanup after this point
|
|
1967
2393
|
if err:
|
|
1968
2394
|
raise GeometryError(f"Could not set geometry for feature at index {i}") from None
|
|
1969
2395
|
|
|
@@ -1977,7 +2403,6 @@ def ogr_write(
|
|
|
1977
2403
|
OGR_F_SetFieldNull(ogr_feature, field_idx)
|
|
1978
2404
|
|
|
1979
2405
|
elif field_type == OFTString:
|
|
1980
|
-
# TODO: encode string using approach from _get_internal_encoding which checks layer capabilities
|
|
1981
2406
|
if (
|
|
1982
2407
|
field_value is None
|
|
1983
2408
|
or (isinstance(field_value, float) and isnan(field_value))
|
|
@@ -1989,7 +2414,7 @@ def ogr_write(
|
|
|
1989
2414
|
field_value = str(field_value)
|
|
1990
2415
|
|
|
1991
2416
|
try:
|
|
1992
|
-
value_b = field_value.encode(
|
|
2417
|
+
value_b = field_value.encode(encoding)
|
|
1993
2418
|
OGR_F_SetFieldString(ogr_feature, field_idx, value_b)
|
|
1994
2419
|
|
|
1995
2420
|
except AttributeError:
|
|
@@ -2056,25 +2481,262 @@ def ogr_write(
|
|
|
2056
2481
|
# Add feature to the layer
|
|
2057
2482
|
try:
|
|
2058
2483
|
exc_wrap_int(OGR_L_CreateFeature(ogr_layer, ogr_feature))
|
|
2484
|
+
|
|
2059
2485
|
except CPLE_BaseError as exc:
|
|
2060
2486
|
raise FeatureError(f"Could not add feature to layer at index {i}: {exc}") from None
|
|
2061
2487
|
|
|
2062
|
-
|
|
2063
|
-
|
|
2064
|
-
OGR_F_Destroy(ogr_feature)
|
|
2065
|
-
ogr_feature = NULL
|
|
2488
|
+
OGR_F_Destroy(ogr_feature)
|
|
2489
|
+
ogr_feature = NULL
|
|
2066
2490
|
|
|
2067
|
-
if supports_transactions:
|
|
2068
|
-
commit_transaction(ogr_dataset)
|
|
2069
2491
|
|
|
2070
|
-
|
|
2492
|
+
if supports_transactions:
|
|
2493
|
+
commit_transaction(ogr_dataset)
|
|
2071
2494
|
|
|
2072
|
-
|
|
2073
|
-
|
|
2074
|
-
|
|
2495
|
+
log.info(f"Created {num_records:,} records" )
|
|
2496
|
+
|
|
2497
|
+
# close dataset to force driver to flush data
|
|
2498
|
+
exc = ogr_close(ogr_dataset)
|
|
2499
|
+
ogr_dataset = NULL
|
|
2500
|
+
if exc:
|
|
2501
|
+
raise DataSourceError(f"Failed to write features to dataset {path}; {exc}")
|
|
2502
|
+
|
|
2503
|
+
# copy in-memory file back to path_or_fp object
|
|
2504
|
+
if is_vsi:
|
|
2505
|
+
read_vsimem_to_buffer(path, path_or_fp)
|
|
2506
|
+
|
|
2507
|
+
finally:
|
|
2508
|
+
### Final cleanup
|
|
2509
|
+
# make sure that all objects allocated above are released if exceptions
|
|
2510
|
+
# are raised, and the dataset is closed
|
|
2511
|
+
if ogr_fielddef != NULL:
|
|
2512
|
+
OGR_Fld_Destroy(ogr_fielddef)
|
|
2513
|
+
ogr_fielddef = NULL
|
|
2514
|
+
|
|
2515
|
+
if ogr_feature != NULL:
|
|
2516
|
+
OGR_F_Destroy(ogr_feature)
|
|
2517
|
+
ogr_feature = NULL
|
|
2518
|
+
|
|
2519
|
+
if ogr_geometry != NULL:
|
|
2520
|
+
OGR_G_DestroyGeometry(ogr_geometry)
|
|
2521
|
+
ogr_geometry = NULL
|
|
2522
|
+
|
|
2523
|
+
if ogr_dataset != NULL:
|
|
2524
|
+
ogr_close(ogr_dataset)
|
|
2525
|
+
|
|
2526
|
+
if is_vsi:
|
|
2527
|
+
delete_vsimem_file(path)
|
|
2528
|
+
|
|
2529
|
+
|
|
2530
|
+
def ogr_write_arrow(
|
|
2531
|
+
object path_or_fp,
|
|
2532
|
+
str layer,
|
|
2533
|
+
str driver,
|
|
2534
|
+
object arrow_obj,
|
|
2535
|
+
str crs,
|
|
2536
|
+
str geometry_type,
|
|
2537
|
+
str geometry_name,
|
|
2538
|
+
str encoding,
|
|
2539
|
+
object dataset_kwargs,
|
|
2540
|
+
object layer_kwargs,
|
|
2541
|
+
bint append=False,
|
|
2542
|
+
dataset_metadata=None,
|
|
2543
|
+
layer_metadata=None,
|
|
2544
|
+
):
|
|
2545
|
+
IF CTE_GDAL_VERSION < (3, 8, 0):
|
|
2546
|
+
raise RuntimeError("Need GDAL>=3.8 for Arrow write support")
|
|
2547
|
+
|
|
2548
|
+
cdef OGRDataSourceH ogr_dataset = NULL
|
|
2549
|
+
cdef OGRLayerH ogr_layer = NULL
|
|
2550
|
+
cdef char **options = NULL
|
|
2551
|
+
cdef bint is_vsi = False
|
|
2552
|
+
cdef ArrowArrayStream* stream = NULL
|
|
2553
|
+
cdef ArrowSchema schema
|
|
2554
|
+
cdef ArrowArray array
|
|
2555
|
+
|
|
2556
|
+
schema.release = NULL
|
|
2557
|
+
array.release = NULL
|
|
2558
|
+
|
|
2559
|
+
try:
|
|
2560
|
+
path = get_ogr_vsimem_write_path(path_or_fp, driver)
|
|
2561
|
+
is_vsi = path.startswith('/vsimem/')
|
|
2562
|
+
|
|
2563
|
+
layer_created = create_ogr_dataset_layer(
|
|
2564
|
+
path, is_vsi, layer, driver, crs, geometry_type, encoding,
|
|
2565
|
+
dataset_kwargs, layer_kwargs, append,
|
|
2566
|
+
dataset_metadata, layer_metadata,
|
|
2567
|
+
&ogr_dataset, &ogr_layer,
|
|
2568
|
+
)
|
|
2569
|
+
|
|
2570
|
+
# only shapefile supports non-UTF encoding because ENCODING option is set
|
|
2571
|
+
# during dataset creation and GDAL auto-translates from UTF-8 values to that
|
|
2572
|
+
# encoding
|
|
2573
|
+
if encoding and encoding.replace('-','').upper() != 'UTF8' and driver != 'ESRI Shapefile':
|
|
2574
|
+
raise ValueError("non-UTF-8 encoding is not supported for Arrow; use the non-Arrow interface instead")
|
|
2575
|
+
|
|
2576
|
+
if geometry_name:
|
|
2577
|
+
opts = {"GEOMETRY_NAME": geometry_name}
|
|
2578
|
+
else:
|
|
2579
|
+
opts = {}
|
|
2075
2580
|
|
|
2076
|
-
|
|
2077
|
-
|
|
2078
|
-
|
|
2581
|
+
options = dict_to_options(opts)
|
|
2582
|
+
|
|
2583
|
+
stream_capsule = arrow_obj.__arrow_c_stream__()
|
|
2584
|
+
stream = <ArrowArrayStream*>PyCapsule_GetPointer(
|
|
2585
|
+
stream_capsule, "arrow_array_stream"
|
|
2586
|
+
)
|
|
2587
|
+
|
|
2588
|
+
if stream == NULL:
|
|
2589
|
+
raise RuntimeError("Could not extract valid Arrow array stream.")
|
|
2590
|
+
|
|
2591
|
+
if stream.release == NULL:
|
|
2592
|
+
raise RuntimeError("Arrow array stream was already released.")
|
|
2593
|
+
|
|
2594
|
+
if stream.get_schema(stream, &schema) != 0:
|
|
2595
|
+
raise RuntimeError("Could not get Arrow schema from stream.")
|
|
2596
|
+
|
|
2597
|
+
if layer_created:
|
|
2598
|
+
create_fields_from_arrow_schema(ogr_layer, &schema, options, geometry_name)
|
|
2599
|
+
|
|
2600
|
+
while True:
|
|
2601
|
+
if stream.get_next(stream, &array) != 0:
|
|
2602
|
+
raise RuntimeError("Error while accessing batch from stream.")
|
|
2603
|
+
|
|
2604
|
+
# We've reached the end of the stream
|
|
2605
|
+
if array.release == NULL:
|
|
2606
|
+
break
|
|
2607
|
+
|
|
2608
|
+
if not OGR_L_WriteArrowBatch(ogr_layer, &schema, &array, options):
|
|
2609
|
+
exc = exc_check()
|
|
2610
|
+
gdal_msg = f": {str(exc)}" if exc else "."
|
|
2611
|
+
raise DataLayerError(
|
|
2612
|
+
f"Error while writing batch to OGR layer{gdal_msg}"
|
|
2613
|
+
)
|
|
2614
|
+
|
|
2615
|
+
if array.release != NULL:
|
|
2616
|
+
array.release(&array)
|
|
2617
|
+
|
|
2618
|
+
# close dataset to force driver to flush data
|
|
2619
|
+
exc = ogr_close(ogr_dataset)
|
|
2620
|
+
ogr_dataset = NULL
|
|
2079
2621
|
if exc:
|
|
2080
|
-
raise DataSourceError(f"Failed to write features to dataset {path}; {exc}")
|
|
2622
|
+
raise DataSourceError(f"Failed to write features to dataset {path}; {exc}")
|
|
2623
|
+
|
|
2624
|
+
# copy in-memory file back to path_or_fp object
|
|
2625
|
+
if is_vsi:
|
|
2626
|
+
read_vsimem_to_buffer(path, path_or_fp)
|
|
2627
|
+
|
|
2628
|
+
finally:
|
|
2629
|
+
if stream != NULL and stream.release != NULL:
|
|
2630
|
+
stream.release(stream)
|
|
2631
|
+
|
|
2632
|
+
if schema.release != NULL:
|
|
2633
|
+
schema.release(&schema)
|
|
2634
|
+
|
|
2635
|
+
if array.release != NULL:
|
|
2636
|
+
array.release(&array)
|
|
2637
|
+
|
|
2638
|
+
if options != NULL:
|
|
2639
|
+
CSLDestroy(options)
|
|
2640
|
+
options = NULL
|
|
2641
|
+
|
|
2642
|
+
if ogr_dataset != NULL:
|
|
2643
|
+
ogr_close(ogr_dataset)
|
|
2644
|
+
|
|
2645
|
+
if is_vsi:
|
|
2646
|
+
delete_vsimem_file(path)
|
|
2647
|
+
|
|
2648
|
+
|
|
2649
|
+
cdef get_arrow_extension_metadata(const ArrowSchema* schema):
|
|
2650
|
+
"""
|
|
2651
|
+
Parse the metadata of the ArrowSchema and extract extension type
|
|
2652
|
+
metadata (extension name and metadata).
|
|
2653
|
+
|
|
2654
|
+
For the exact layout of the bytes, see
|
|
2655
|
+
https://arrow.apache.org/docs/dev/format/CDataInterface.html#c.ArrowSchema.metadata
|
|
2656
|
+
"""
|
|
2657
|
+
cdef const char *metadata = schema.metadata
|
|
2658
|
+
|
|
2659
|
+
extension_name = None
|
|
2660
|
+
extension_metadata = None
|
|
2661
|
+
|
|
2662
|
+
if metadata == NULL:
|
|
2663
|
+
return extension_name, extension_metadata
|
|
2664
|
+
|
|
2665
|
+
# the number of metadata key/value pairs is stored
|
|
2666
|
+
# as an int32 value in the first 4 bytes
|
|
2667
|
+
n = int.from_bytes(metadata[:4], byteorder=sys.byteorder)
|
|
2668
|
+
pos = 4
|
|
2669
|
+
|
|
2670
|
+
for i in range(n):
|
|
2671
|
+
# for each metadata key/value pair, the first 4 bytes is the byte length
|
|
2672
|
+
# of the key as an int32, then follows the key (not null-terminated),
|
|
2673
|
+
# and then the same for the value length and bytes
|
|
2674
|
+
key_length = int.from_bytes(
|
|
2675
|
+
metadata[pos:pos+4], byteorder=sys.byteorder, signed=True
|
|
2676
|
+
)
|
|
2677
|
+
pos += 4
|
|
2678
|
+
key = metadata[pos:pos+key_length]
|
|
2679
|
+
pos += key_length
|
|
2680
|
+
value_length = int.from_bytes(
|
|
2681
|
+
metadata[pos:pos+4], byteorder=sys.byteorder, signed=True
|
|
2682
|
+
)
|
|
2683
|
+
pos += 4
|
|
2684
|
+
value = metadata[pos:pos+value_length]
|
|
2685
|
+
pos += value_length
|
|
2686
|
+
|
|
2687
|
+
if key == b"ARROW:extension:name":
|
|
2688
|
+
extension_name = value
|
|
2689
|
+
elif key == b"ARROW:extension:metadata":
|
|
2690
|
+
extension_metadata = value
|
|
2691
|
+
|
|
2692
|
+
if extension_name is not None and extension_metadata is not None:
|
|
2693
|
+
break
|
|
2694
|
+
|
|
2695
|
+
return extension_name, extension_metadata
|
|
2696
|
+
|
|
2697
|
+
|
|
2698
|
+
cdef is_arrow_geometry_field(const ArrowSchema* schema):
|
|
2699
|
+
name, _ = get_arrow_extension_metadata(schema)
|
|
2700
|
+
if name is not None:
|
|
2701
|
+
if name == b"geoarrow.wkb" or name == b"ogc.wkb":
|
|
2702
|
+
return True
|
|
2703
|
+
|
|
2704
|
+
# raise an error for other geoarrow types
|
|
2705
|
+
if name.startswith(b"geoarrow."):
|
|
2706
|
+
raise NotImplementedError(
|
|
2707
|
+
f"Writing a geometry column of type {name.decode()} is not yet "
|
|
2708
|
+
"supported. Only WKB is currently supported ('geoarrow.wkb' or "
|
|
2709
|
+
"'ogc.wkb' types)."
|
|
2710
|
+
)
|
|
2711
|
+
|
|
2712
|
+
return False
|
|
2713
|
+
|
|
2714
|
+
|
|
2715
|
+
cdef create_fields_from_arrow_schema(
|
|
2716
|
+
OGRLayerH destLayer, const ArrowSchema* schema, char** options, str geometry_name
|
|
2717
|
+
):
|
|
2718
|
+
"""Create output fields using CreateFieldFromArrowSchema()"""
|
|
2719
|
+
|
|
2720
|
+
IF CTE_GDAL_VERSION < (3, 8, 0):
|
|
2721
|
+
raise RuntimeError("Need GDAL>=3.8 for Arrow write support")
|
|
2722
|
+
|
|
2723
|
+
# The schema object is a struct type where each child is a column.
|
|
2724
|
+
cdef ArrowSchema* child
|
|
2725
|
+
for i in range(schema.n_children):
|
|
2726
|
+
child = schema.children[i]
|
|
2727
|
+
|
|
2728
|
+
if child == NULL:
|
|
2729
|
+
raise RuntimeError("Received invalid Arrow schema (null child)")
|
|
2730
|
+
|
|
2731
|
+
# Don't create property for geometry column
|
|
2732
|
+
if get_string(child.name) == geometry_name or is_arrow_geometry_field(child):
|
|
2733
|
+
continue
|
|
2734
|
+
|
|
2735
|
+
if not OGR_L_CreateFieldFromArrowSchema(destLayer, child, options):
|
|
2736
|
+
exc = exc_check()
|
|
2737
|
+
gdal_msg = f" ({str(exc)})" if exc else ""
|
|
2738
|
+
raise FieldError(
|
|
2739
|
+
f"Error while creating field from Arrow for field {i} with name "
|
|
2740
|
+
f"'{get_string(child.name)}' and type {get_string(child.format)}"
|
|
2741
|
+
f"{gdal_msg}."
|
|
2742
|
+
)
|