pyogrio 0.9.0__cp39-cp39-manylinux_2_28_aarch64.whl → 0.11.0__cp39-cp39-manylinux_2_28_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyogrio might be problematic. Click here for more details.
- pyogrio/__init__.py +28 -21
- pyogrio/_compat.py +15 -1
- pyogrio/_env.py +4 -6
- pyogrio/_err.cpython-39-aarch64-linux-gnu.so +0 -0
- pyogrio/_geometry.cpython-39-aarch64-linux-gnu.so +0 -0
- pyogrio/_io.cpython-39-aarch64-linux-gnu.so +0 -0
- pyogrio/_ogr.cpython-39-aarch64-linux-gnu.so +0 -0
- pyogrio/_version.py +3 -3
- pyogrio/_vsi.cpython-39-aarch64-linux-gnu.so +0 -0
- pyogrio/core.py +86 -20
- pyogrio/errors.py +9 -16
- pyogrio/gdal_data/GDAL-targets-release.cmake +3 -3
- pyogrio/gdal_data/GDAL-targets.cmake +2 -2
- pyogrio/gdal_data/GDALConfig.cmake +0 -1
- pyogrio/gdal_data/GDALConfigVersion.cmake +3 -3
- pyogrio/gdal_data/MM_m_idofic.csv +321 -0
- pyogrio/gdal_data/gdalinfo_output.schema.json +3 -3
- pyogrio/gdal_data/gdaltileindex.xsd +253 -0
- pyogrio/gdal_data/gdalvrt.xsd +178 -63
- pyogrio/gdal_data/nitf_spec.xml +1 -17
- pyogrio/gdal_data/nitf_spec.xsd +1 -17
- pyogrio/gdal_data/ogrinfo_output.schema.json +23 -0
- pyogrio/gdal_data/ogrvrt.xsd +4 -17
- pyogrio/gdal_data/osmconf.ini +3 -1
- pyogrio/gdal_data/pci_datum.txt +222 -155
- pyogrio/gdal_data/pci_ellips.txt +90 -38
- pyogrio/gdal_data/pdfcomposition.xsd +1 -17
- pyogrio/gdal_data/vcpkg.spdx.json +32 -27
- pyogrio/gdal_data/vcpkg_abi_info.txt +30 -29
- pyogrio/gdal_data/vdv452.xml +1 -17
- pyogrio/gdal_data/vdv452.xsd +1 -17
- pyogrio/geopandas.py +122 -66
- pyogrio/proj_data/ITRF2014 +1 -1
- pyogrio/proj_data/ITRF2020 +91 -0
- pyogrio/proj_data/proj-config-version.cmake +2 -2
- pyogrio/proj_data/proj-config.cmake +1 -1
- pyogrio/proj_data/proj-targets.cmake +3 -3
- pyogrio/proj_data/proj.db +0 -0
- pyogrio/proj_data/proj.ini +11 -3
- pyogrio/proj_data/proj4-targets.cmake +3 -3
- pyogrio/proj_data/projjson.schema.json +1 -1
- pyogrio/proj_data/usage +7 -2
- pyogrio/proj_data/vcpkg.spdx.json +27 -22
- pyogrio/proj_data/vcpkg_abi_info.txt +17 -16
- pyogrio/raw.py +46 -30
- pyogrio/tests/conftest.py +214 -12
- pyogrio/tests/fixtures/README.md +32 -13
- pyogrio/tests/fixtures/curve.gpkg +0 -0
- pyogrio/tests/fixtures/{test_multisurface.gpkg → curvepolygon.gpkg} +0 -0
- pyogrio/tests/fixtures/line_zm.gpkg +0 -0
- pyogrio/tests/fixtures/multisurface.gpkg +0 -0
- pyogrio/tests/test_arrow.py +181 -24
- pyogrio/tests/test_core.py +170 -76
- pyogrio/tests/test_geopandas_io.py +483 -135
- pyogrio/tests/test_path.py +39 -17
- pyogrio/tests/test_raw_io.py +170 -55
- pyogrio/tests/test_util.py +56 -0
- pyogrio/util.py +69 -32
- pyogrio-0.11.0.dist-info/METADATA +124 -0
- {pyogrio-0.9.0.dist-info → pyogrio-0.11.0.dist-info}/RECORD +200 -214
- {pyogrio-0.9.0.dist-info → pyogrio-0.11.0.dist-info}/WHEEL +1 -1
- {pyogrio-0.9.0.dist-info → pyogrio-0.11.0.dist-info/licenses}/LICENSE +1 -1
- pyogrio.libs/{libgdal-6ff0914e.so.34.3.8.5 → libgdal-4bc0d15f.so.36.3.10.3} +0 -0
- pyogrio/_err.pxd +0 -4
- pyogrio/_err.pyx +0 -250
- pyogrio/_geometry.pxd +0 -4
- pyogrio/_geometry.pyx +0 -129
- pyogrio/_io.pxd +0 -0
- pyogrio/_io.pyx +0 -2742
- pyogrio/_ogr.pxd +0 -444
- pyogrio/_ogr.pyx +0 -346
- pyogrio/_vsi.pxd +0 -4
- pyogrio/_vsi.pyx +0 -140
- pyogrio/arrow_bridge.h +0 -115
- pyogrio/gdal_data/bag_template.xml +0 -201
- pyogrio/gdal_data/gmlasconf.xml +0 -169
- pyogrio/gdal_data/gmlasconf.xsd +0 -1066
- pyogrio/gdal_data/netcdf_config.xsd +0 -143
- pyogrio/tests/fixtures/poly_not_enough_points.shp.zip +0 -0
- pyogrio/tests/fixtures/test_datetime.geojson +0 -7
- pyogrio/tests/fixtures/test_datetime_tz.geojson +0 -8
- pyogrio/tests/fixtures/test_fgdb.gdb.zip +0 -0
- pyogrio/tests/fixtures/test_nested.geojson +0 -18
- pyogrio/tests/fixtures/test_ogr_types_list.geojson +0 -12
- pyogrio-0.9.0.dist-info/METADATA +0 -100
- {pyogrio-0.9.0.dist-info → pyogrio-0.11.0.dist-info}/top_level.txt +0 -0
pyogrio/_io.pyx
DELETED
|
@@ -1,2742 +0,0 @@
|
|
|
1
|
-
#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
|
|
2
|
-
|
|
3
|
-
"""IO support for OGR vector data sources
|
|
4
|
-
"""
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
import contextlib
|
|
8
|
-
import datetime
|
|
9
|
-
import locale
|
|
10
|
-
import logging
|
|
11
|
-
import math
|
|
12
|
-
import os
|
|
13
|
-
import sys
|
|
14
|
-
import warnings
|
|
15
|
-
|
|
16
|
-
from libc.stdint cimport uint8_t, uintptr_t
|
|
17
|
-
from libc.stdlib cimport malloc, free
|
|
18
|
-
from libc.string cimport strlen
|
|
19
|
-
from libc.math cimport isnan
|
|
20
|
-
from cpython.pycapsule cimport PyCapsule_GetPointer
|
|
21
|
-
|
|
22
|
-
cimport cython
|
|
23
|
-
from cpython.pycapsule cimport PyCapsule_New, PyCapsule_GetPointer
|
|
24
|
-
|
|
25
|
-
import numpy as np
|
|
26
|
-
|
|
27
|
-
from pyogrio._ogr cimport *
|
|
28
|
-
from pyogrio._err cimport *
|
|
29
|
-
from pyogrio._vsi cimport *
|
|
30
|
-
from pyogrio._err import CPLE_BaseError, CPLE_NotSupportedError, NullPointerError
|
|
31
|
-
from pyogrio._geometry cimport get_geometry_type, get_geometry_type_code
|
|
32
|
-
from pyogrio.errors import CRSError, DataSourceError, DataLayerError, GeometryError, FieldError, FeatureError
|
|
33
|
-
|
|
34
|
-
log = logging.getLogger(__name__)
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
# Mapping of OGR integer field types to Python field type names
|
|
38
|
-
# (index in array is the integer field type)
|
|
39
|
-
FIELD_TYPES = [
|
|
40
|
-
'int32', # OFTInteger, Simple 32bit integer
|
|
41
|
-
None, # OFTIntegerList, List of 32bit integers, not supported
|
|
42
|
-
'float64', # OFTReal, Double Precision floating point
|
|
43
|
-
None, # OFTRealList, List of doubles, not supported
|
|
44
|
-
'object', # OFTString, String of UTF-8 chars
|
|
45
|
-
None, # OFTStringList, Array of strings, not supported
|
|
46
|
-
None, # OFTWideString, deprecated, not supported
|
|
47
|
-
None, # OFTWideStringList, deprecated, not supported
|
|
48
|
-
'object', # OFTBinary, Raw Binary data
|
|
49
|
-
'datetime64[D]', # OFTDate, Date
|
|
50
|
-
None, # OFTTime, Time, NOTE: not directly supported in numpy
|
|
51
|
-
'datetime64[ms]',# OFTDateTime, Date and Time
|
|
52
|
-
'int64', # OFTInteger64, Single 64bit integer
|
|
53
|
-
None # OFTInteger64List, List of 64bit integers, not supported
|
|
54
|
-
]
|
|
55
|
-
|
|
56
|
-
FIELD_SUBTYPES = {
|
|
57
|
-
OFSTNone: None, # No subtype
|
|
58
|
-
OFSTBoolean: "bool", # Boolean integer
|
|
59
|
-
OFSTInt16: "int16", # Signed 16-bit integer
|
|
60
|
-
OFSTFloat32: "float32", # Single precision (32 bit) floating point
|
|
61
|
-
}
|
|
62
|
-
|
|
63
|
-
# Mapping of numpy ndarray dtypes to (field type, subtype)
|
|
64
|
-
DTYPE_OGR_FIELD_TYPES = {
|
|
65
|
-
'int8': (OFTInteger, OFSTInt16),
|
|
66
|
-
'int16': (OFTInteger, OFSTInt16),
|
|
67
|
-
'int32': (OFTInteger, OFSTNone),
|
|
68
|
-
'int': (OFTInteger64, OFSTNone),
|
|
69
|
-
'int64': (OFTInteger64, OFSTNone),
|
|
70
|
-
# unsigned ints have to be converted to ints; these are converted
|
|
71
|
-
# to the next largest integer size
|
|
72
|
-
'uint8': (OFTInteger, OFSTInt16),
|
|
73
|
-
'uint16': (OFTInteger, OFSTNone),
|
|
74
|
-
'uint32': (OFTInteger64, OFSTNone),
|
|
75
|
-
# TODO: these might get truncated, check maximum value and raise error
|
|
76
|
-
'uint': (OFTInteger64, OFSTNone),
|
|
77
|
-
'uint64': (OFTInteger64, OFSTNone),
|
|
78
|
-
|
|
79
|
-
# bool is handled as integer with boolean subtype
|
|
80
|
-
'bool': (OFTInteger, OFSTBoolean),
|
|
81
|
-
|
|
82
|
-
'float32': (OFTReal,OFSTFloat32),
|
|
83
|
-
'float': (OFTReal, OFSTNone),
|
|
84
|
-
'float64': (OFTReal, OFSTNone),
|
|
85
|
-
|
|
86
|
-
'datetime64[D]': (OFTDate, OFSTNone),
|
|
87
|
-
'datetime64': (OFTDateTime, OFSTNone),
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
cdef int start_transaction(OGRDataSourceH ogr_dataset, int force) except 1:
|
|
92
|
-
cdef int err = GDALDatasetStartTransaction(ogr_dataset, force)
|
|
93
|
-
if err == OGRERR_FAILURE:
|
|
94
|
-
raise DataSourceError("Failed to start transaction")
|
|
95
|
-
|
|
96
|
-
return 0
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
cdef int commit_transaction(OGRDataSourceH ogr_dataset) except 1:
|
|
100
|
-
cdef int err = GDALDatasetCommitTransaction(ogr_dataset)
|
|
101
|
-
if err == OGRERR_FAILURE:
|
|
102
|
-
raise DataSourceError("Failed to commit transaction")
|
|
103
|
-
|
|
104
|
-
return 0
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
# Not currently used; uncomment when used
|
|
108
|
-
# cdef int rollback_transaction(OGRDataSourceH ogr_dataset) except 1:
|
|
109
|
-
# cdef int err = GDALDatasetRollbackTransaction(ogr_dataset)
|
|
110
|
-
# if err == OGRERR_FAILURE:
|
|
111
|
-
# raise DataSourceError("Failed to rollback transaction")
|
|
112
|
-
|
|
113
|
-
# return 0
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
cdef char** dict_to_options(object values):
|
|
117
|
-
"""Convert a python dictionary into name / value pairs (stored in a char**)
|
|
118
|
-
|
|
119
|
-
Parameters
|
|
120
|
-
----------
|
|
121
|
-
values: dict
|
|
122
|
-
all keys and values must be strings
|
|
123
|
-
|
|
124
|
-
Returns
|
|
125
|
-
-------
|
|
126
|
-
char**
|
|
127
|
-
"""
|
|
128
|
-
cdef char **options = NULL
|
|
129
|
-
|
|
130
|
-
if values is None:
|
|
131
|
-
return NULL
|
|
132
|
-
|
|
133
|
-
for k, v in values.items():
|
|
134
|
-
k = k.encode('UTF-8')
|
|
135
|
-
v = v.encode('UTF-8')
|
|
136
|
-
options = CSLAddNameValue(options, <const char *>k, <const char *>v)
|
|
137
|
-
|
|
138
|
-
return options
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
cdef const char* override_threadlocal_config_option(str key, str value):
|
|
142
|
-
"""Set the CPLSetThreadLocalConfigOption for key=value
|
|
143
|
-
|
|
144
|
-
Parameters
|
|
145
|
-
----------
|
|
146
|
-
key : str
|
|
147
|
-
value : str
|
|
148
|
-
|
|
149
|
-
Returns
|
|
150
|
-
-------
|
|
151
|
-
const char*
|
|
152
|
-
value previously set for key, so that it can be later restored. Caller
|
|
153
|
-
is responsible for freeing this via CPLFree() if not NULL.
|
|
154
|
-
"""
|
|
155
|
-
|
|
156
|
-
key_b = key.encode("UTF-8")
|
|
157
|
-
cdef const char* key_c = key_b
|
|
158
|
-
|
|
159
|
-
value_b = value.encode("UTF-8")
|
|
160
|
-
cdef const char* value_c = value_b
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
cdef const char *prev_value = CPLGetThreadLocalConfigOption(key_c, NULL)
|
|
164
|
-
if prev_value != NULL:
|
|
165
|
-
# strings returned from config options may be replaced via
|
|
166
|
-
# CPLSetConfigOption() below; GDAL instructs us to save a copy
|
|
167
|
-
# in a new string
|
|
168
|
-
prev_value = CPLStrdup(prev_value)
|
|
169
|
-
|
|
170
|
-
CPLSetThreadLocalConfigOption(key_c, value_c)
|
|
171
|
-
|
|
172
|
-
return prev_value
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
cdef void* ogr_open(const char* path_c, int mode, char** options) except NULL:
|
|
176
|
-
"""Open an existing OGR data source
|
|
177
|
-
|
|
178
|
-
Parameters
|
|
179
|
-
----------
|
|
180
|
-
path_c : char *
|
|
181
|
-
input path, including an in-memory path (/vsimem/...)
|
|
182
|
-
mode : int
|
|
183
|
-
set to 1 to allow updating data source
|
|
184
|
-
options : char **, optional
|
|
185
|
-
dataset open options
|
|
186
|
-
"""
|
|
187
|
-
cdef void* ogr_dataset = NULL
|
|
188
|
-
|
|
189
|
-
# Force linear approximations in all cases
|
|
190
|
-
OGRSetNonLinearGeometriesEnabledFlag(0)
|
|
191
|
-
|
|
192
|
-
flags = GDAL_OF_VECTOR | GDAL_OF_VERBOSE_ERROR
|
|
193
|
-
if mode == 1:
|
|
194
|
-
flags |= GDAL_OF_UPDATE
|
|
195
|
-
else:
|
|
196
|
-
flags |= GDAL_OF_READONLY
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
try:
|
|
200
|
-
# WARNING: GDAL logs warnings about invalid open options to stderr
|
|
201
|
-
# instead of raising an error
|
|
202
|
-
ogr_dataset = exc_wrap_pointer(
|
|
203
|
-
GDALOpenEx(path_c, flags, NULL, <const char *const *>options, NULL)
|
|
204
|
-
)
|
|
205
|
-
|
|
206
|
-
return ogr_dataset
|
|
207
|
-
|
|
208
|
-
except NullPointerError:
|
|
209
|
-
raise DataSourceError(
|
|
210
|
-
"Failed to open dataset (mode={}): {}".format(mode, path_c.decode("utf-8"))
|
|
211
|
-
) from None
|
|
212
|
-
|
|
213
|
-
except CPLE_BaseError as exc:
|
|
214
|
-
if str(exc).endswith("a supported file format."):
|
|
215
|
-
raise DataSourceError(
|
|
216
|
-
f"{str(exc)} It might help to specify the correct driver explicitly by "
|
|
217
|
-
"prefixing the file path with '<DRIVER>:', e.g. 'CSV:path'."
|
|
218
|
-
) from None
|
|
219
|
-
raise DataSourceError(str(exc)) from None
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
cdef ogr_close(GDALDatasetH ogr_dataset):
|
|
223
|
-
"""Close the dataset and raise exception if that fails.
|
|
224
|
-
NOTE: some drivers only raise errors on write when calling GDALClose()
|
|
225
|
-
"""
|
|
226
|
-
if ogr_dataset != NULL:
|
|
227
|
-
IF CTE_GDAL_VERSION >= (3, 7, 0):
|
|
228
|
-
if GDALClose(ogr_dataset) != CE_None:
|
|
229
|
-
return exc_check()
|
|
230
|
-
|
|
231
|
-
return
|
|
232
|
-
|
|
233
|
-
ELSE:
|
|
234
|
-
GDALClose(ogr_dataset)
|
|
235
|
-
|
|
236
|
-
# GDAL will set an error if there was an error writing the data source
|
|
237
|
-
# on close
|
|
238
|
-
return exc_check()
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
cdef OGRLayerH get_ogr_layer(GDALDatasetH ogr_dataset, layer) except NULL:
|
|
242
|
-
"""Open OGR layer by index or name.
|
|
243
|
-
|
|
244
|
-
Parameters
|
|
245
|
-
----------
|
|
246
|
-
ogr_dataset : pointer to open OGR dataset
|
|
247
|
-
layer : str or int
|
|
248
|
-
name or index of layer
|
|
249
|
-
|
|
250
|
-
Returns
|
|
251
|
-
-------
|
|
252
|
-
pointer to OGR layer
|
|
253
|
-
"""
|
|
254
|
-
cdef OGRLayerH ogr_layer = NULL
|
|
255
|
-
|
|
256
|
-
try:
|
|
257
|
-
if isinstance(layer, str):
|
|
258
|
-
name_b = layer.encode('utf-8')
|
|
259
|
-
name_c = name_b
|
|
260
|
-
ogr_layer = exc_wrap_pointer(GDALDatasetGetLayerByName(ogr_dataset, name_c))
|
|
261
|
-
|
|
262
|
-
elif isinstance(layer, int):
|
|
263
|
-
ogr_layer = exc_wrap_pointer(GDALDatasetGetLayer(ogr_dataset, layer))
|
|
264
|
-
|
|
265
|
-
# GDAL does not always raise exception messages in this case
|
|
266
|
-
except NullPointerError:
|
|
267
|
-
raise DataLayerError(f"Layer '{layer}' could not be opened") from None
|
|
268
|
-
|
|
269
|
-
except CPLE_BaseError as exc:
|
|
270
|
-
raise DataLayerError(str(exc))
|
|
271
|
-
|
|
272
|
-
# if the driver is OSM, we need to execute SQL to set the layer to read in
|
|
273
|
-
# order to read it properly
|
|
274
|
-
if get_driver(ogr_dataset) == "OSM":
|
|
275
|
-
# Note: this returns NULL and does not need to be freed via
|
|
276
|
-
# GDALDatasetReleaseResultSet()
|
|
277
|
-
layer_name = get_string(OGR_L_GetName(ogr_layer))
|
|
278
|
-
sql_b = f"SET interest_layers = {layer_name}".encode('utf-8')
|
|
279
|
-
sql_c = sql_b
|
|
280
|
-
|
|
281
|
-
GDALDatasetExecuteSQL(ogr_dataset, sql_c, NULL, NULL)
|
|
282
|
-
|
|
283
|
-
return ogr_layer
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
cdef OGRLayerH execute_sql(GDALDatasetH ogr_dataset, str sql, str sql_dialect=None) except NULL:
|
|
287
|
-
"""Execute an SQL statement on a dataset.
|
|
288
|
-
|
|
289
|
-
Parameters
|
|
290
|
-
----------
|
|
291
|
-
ogr_dataset : pointer to open OGR dataset
|
|
292
|
-
sql : str
|
|
293
|
-
The sql statement to execute
|
|
294
|
-
sql_dialect : str, optional (default: None)
|
|
295
|
-
The sql dialect the sql statement is written in
|
|
296
|
-
|
|
297
|
-
Returns
|
|
298
|
-
-------
|
|
299
|
-
pointer to OGR layer
|
|
300
|
-
"""
|
|
301
|
-
|
|
302
|
-
try:
|
|
303
|
-
sql_b = sql.encode('utf-8')
|
|
304
|
-
sql_c = sql_b
|
|
305
|
-
if sql_dialect is None:
|
|
306
|
-
return exc_wrap_pointer(GDALDatasetExecuteSQL(ogr_dataset, sql_c, NULL, NULL))
|
|
307
|
-
|
|
308
|
-
sql_dialect_b = sql_dialect.encode('utf-8')
|
|
309
|
-
sql_dialect_c = sql_dialect_b
|
|
310
|
-
return exc_wrap_pointer(GDALDatasetExecuteSQL(ogr_dataset, sql_c, NULL, sql_dialect_c))
|
|
311
|
-
|
|
312
|
-
# GDAL does not always raise exception messages in this case
|
|
313
|
-
except NullPointerError:
|
|
314
|
-
raise DataLayerError(f"Error executing sql '{sql}'") from None
|
|
315
|
-
|
|
316
|
-
except CPLE_BaseError as exc:
|
|
317
|
-
raise DataLayerError(str(exc))
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
cdef str get_crs(OGRLayerH ogr_layer):
|
|
321
|
-
"""Read CRS from layer as EPSG:<code> if available or WKT.
|
|
322
|
-
|
|
323
|
-
Parameters
|
|
324
|
-
----------
|
|
325
|
-
ogr_layer : pointer to open OGR layer
|
|
326
|
-
|
|
327
|
-
Returns
|
|
328
|
-
-------
|
|
329
|
-
str or None
|
|
330
|
-
EPSG:<code> or WKT
|
|
331
|
-
"""
|
|
332
|
-
cdef void *ogr_crs = NULL
|
|
333
|
-
cdef const char *authority_key = NULL
|
|
334
|
-
cdef const char *authority_val = NULL
|
|
335
|
-
cdef char *ogr_wkt = NULL
|
|
336
|
-
|
|
337
|
-
try:
|
|
338
|
-
ogr_crs = exc_wrap_pointer(OGR_L_GetSpatialRef(ogr_layer))
|
|
339
|
-
|
|
340
|
-
except NullPointerError:
|
|
341
|
-
# No coordinate system defined.
|
|
342
|
-
# This is expected and valid for nonspatial tables.
|
|
343
|
-
return None
|
|
344
|
-
|
|
345
|
-
except CPLE_BaseError as exc:
|
|
346
|
-
raise CRSError(str(exc))
|
|
347
|
-
|
|
348
|
-
# If CRS can be decoded to an EPSG code, use that.
|
|
349
|
-
# The following pointers will be NULL if it cannot be decoded.
|
|
350
|
-
retval = OSRAutoIdentifyEPSG(ogr_crs)
|
|
351
|
-
authority_key = <const char *>OSRGetAuthorityName(ogr_crs, NULL)
|
|
352
|
-
authority_val = <const char *>OSRGetAuthorityCode(ogr_crs, NULL)
|
|
353
|
-
|
|
354
|
-
if authority_key != NULL and authority_val != NULL:
|
|
355
|
-
key = get_string(authority_key)
|
|
356
|
-
if key == 'EPSG':
|
|
357
|
-
value = get_string(authority_val)
|
|
358
|
-
return f"EPSG:{value}"
|
|
359
|
-
|
|
360
|
-
try:
|
|
361
|
-
OSRExportToWkt(ogr_crs, &ogr_wkt)
|
|
362
|
-
if ogr_wkt == NULL:
|
|
363
|
-
raise CRSError("CRS could not be extracted as WKT") from None
|
|
364
|
-
|
|
365
|
-
wkt = get_string(ogr_wkt)
|
|
366
|
-
|
|
367
|
-
finally:
|
|
368
|
-
CPLFree(ogr_wkt)
|
|
369
|
-
return wkt
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
cdef get_driver(OGRDataSourceH ogr_dataset):
|
|
373
|
-
"""Get the driver for a dataset.
|
|
374
|
-
|
|
375
|
-
Parameters
|
|
376
|
-
----------
|
|
377
|
-
ogr_dataset : pointer to open OGR dataset
|
|
378
|
-
Returns
|
|
379
|
-
-------
|
|
380
|
-
str or None
|
|
381
|
-
"""
|
|
382
|
-
cdef void *ogr_driver
|
|
383
|
-
|
|
384
|
-
try:
|
|
385
|
-
ogr_driver = exc_wrap_pointer(GDALGetDatasetDriver(ogr_dataset))
|
|
386
|
-
|
|
387
|
-
except NullPointerError:
|
|
388
|
-
raise DataLayerError(f"Could not detect driver of dataset") from None
|
|
389
|
-
|
|
390
|
-
except CPLE_BaseError as exc:
|
|
391
|
-
raise DataLayerError(str(exc))
|
|
392
|
-
|
|
393
|
-
driver = OGR_Dr_GetName(ogr_driver).decode("UTF-8")
|
|
394
|
-
return driver
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
cdef get_feature_count(OGRLayerH ogr_layer, int force):
|
|
398
|
-
"""Get the feature count of a layer.
|
|
399
|
-
|
|
400
|
-
If GDAL returns an unknown count (-1), this iterates over every feature
|
|
401
|
-
to calculate the count.
|
|
402
|
-
|
|
403
|
-
Parameters
|
|
404
|
-
----------
|
|
405
|
-
ogr_layer : pointer to open OGR layer
|
|
406
|
-
force : bool
|
|
407
|
-
True if the feature count should be computed even if it is expensive
|
|
408
|
-
|
|
409
|
-
Returns
|
|
410
|
-
-------
|
|
411
|
-
int
|
|
412
|
-
count of features
|
|
413
|
-
"""
|
|
414
|
-
|
|
415
|
-
cdef OGRFeatureH ogr_feature = NULL
|
|
416
|
-
cdef int feature_count = OGR_L_GetFeatureCount(ogr_layer, force)
|
|
417
|
-
|
|
418
|
-
# if GDAL refuses to give us the feature count, we have to loop over all
|
|
419
|
-
# features ourselves and get the count. This can happen for some drivers
|
|
420
|
-
# (e.g., OSM) or if a where clause is invalid but not rejected as error
|
|
421
|
-
if force and feature_count == -1:
|
|
422
|
-
# make sure layer is read from beginning
|
|
423
|
-
OGR_L_ResetReading(ogr_layer)
|
|
424
|
-
|
|
425
|
-
feature_count = 0
|
|
426
|
-
while True:
|
|
427
|
-
try:
|
|
428
|
-
ogr_feature = exc_wrap_pointer(OGR_L_GetNextFeature(ogr_layer))
|
|
429
|
-
feature_count +=1
|
|
430
|
-
|
|
431
|
-
except NullPointerError:
|
|
432
|
-
# No more rows available, so stop reading
|
|
433
|
-
break
|
|
434
|
-
|
|
435
|
-
# driver may raise other errors, e.g., for OSM if node ids are not
|
|
436
|
-
# increasing, the default config option OSM_USE_CUSTOM_INDEXING=YES
|
|
437
|
-
# causes errors iterating over features
|
|
438
|
-
except CPLE_BaseError as exc:
|
|
439
|
-
# if an invalid where clause is used for a GPKG file, it is not
|
|
440
|
-
# caught as an error until attempting to iterate over features;
|
|
441
|
-
# catch it here
|
|
442
|
-
if "failed to prepare SQL" in str(exc):
|
|
443
|
-
raise ValueError(f"Invalid SQL query: {str(exc)}") from None
|
|
444
|
-
|
|
445
|
-
raise DataLayerError(f"Could not iterate over features: {str(exc)}") from None
|
|
446
|
-
|
|
447
|
-
finally:
|
|
448
|
-
if ogr_feature != NULL:
|
|
449
|
-
OGR_F_Destroy(ogr_feature)
|
|
450
|
-
ogr_feature = NULL
|
|
451
|
-
|
|
452
|
-
return feature_count
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
cdef get_total_bounds(OGRLayerH ogr_layer, int force):
|
|
456
|
-
"""Get the total bounds of a layer.
|
|
457
|
-
|
|
458
|
-
Parameters
|
|
459
|
-
----------
|
|
460
|
-
ogr_layer : pointer to open OGR layer
|
|
461
|
-
force : bool
|
|
462
|
-
True if the total bounds should be computed even if it is expensive
|
|
463
|
-
|
|
464
|
-
Returns
|
|
465
|
-
-------
|
|
466
|
-
tuple of (xmin, ymin, xmax, ymax) or None
|
|
467
|
-
The total bounds of the layer, or None if they could not be determined.
|
|
468
|
-
"""
|
|
469
|
-
|
|
470
|
-
cdef OGREnvelope ogr_envelope
|
|
471
|
-
try:
|
|
472
|
-
exc_wrap_ogrerr(OGR_L_GetExtent(ogr_layer, &ogr_envelope, force))
|
|
473
|
-
bounds = (
|
|
474
|
-
ogr_envelope.MinX, ogr_envelope.MinY, ogr_envelope.MaxX, ogr_envelope.MaxY
|
|
475
|
-
)
|
|
476
|
-
|
|
477
|
-
except CPLE_BaseError:
|
|
478
|
-
bounds = None
|
|
479
|
-
|
|
480
|
-
return bounds
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
cdef set_metadata(GDALMajorObjectH obj, object metadata):
|
|
484
|
-
"""Set metadata on a dataset or layer
|
|
485
|
-
|
|
486
|
-
Parameters
|
|
487
|
-
----------
|
|
488
|
-
obj : pointer to dataset or layer
|
|
489
|
-
metadata : dict, optional (default None)
|
|
490
|
-
keys and values must be strings
|
|
491
|
-
"""
|
|
492
|
-
|
|
493
|
-
cdef char **metadata_items = NULL
|
|
494
|
-
cdef int err = 0
|
|
495
|
-
|
|
496
|
-
metadata_items = dict_to_options(metadata)
|
|
497
|
-
if metadata_items != NULL:
|
|
498
|
-
# only default namepace is currently supported
|
|
499
|
-
err = GDALSetMetadata(obj, metadata_items, NULL)
|
|
500
|
-
|
|
501
|
-
CSLDestroy(metadata_items)
|
|
502
|
-
metadata_items = NULL
|
|
503
|
-
|
|
504
|
-
if err:
|
|
505
|
-
raise RuntimeError("Could not set metadata") from None
|
|
506
|
-
|
|
507
|
-
cdef get_metadata(GDALMajorObjectH obj):
|
|
508
|
-
"""Get metadata for a dataset or layer
|
|
509
|
-
|
|
510
|
-
Parameters
|
|
511
|
-
----------
|
|
512
|
-
obj : pointer to dataset or layer
|
|
513
|
-
|
|
514
|
-
Returns
|
|
515
|
-
-------
|
|
516
|
-
dict or None
|
|
517
|
-
metadata as key, value pairs
|
|
518
|
-
"""
|
|
519
|
-
# only default namespace is currently supported
|
|
520
|
-
cdef char **metadata = GDALGetMetadata(obj, NULL)
|
|
521
|
-
|
|
522
|
-
if metadata != NULL:
|
|
523
|
-
return dict(
|
|
524
|
-
metadata[i].decode('UTF-8').split('=', 1)
|
|
525
|
-
for i in range(CSLCount(metadata))
|
|
526
|
-
)
|
|
527
|
-
|
|
528
|
-
return None
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
cdef detect_encoding(OGRDataSourceH ogr_dataset, OGRLayerH ogr_layer):
|
|
532
|
-
"""Attempt to detect the encoding to use to read/write string values.
|
|
533
|
-
|
|
534
|
-
If the layer/dataset supports reading/writing data in UTF-8, returns UTF-8.
|
|
535
|
-
If UTF-8 is not supported and ESRI Shapefile, returns ISO-8859-1
|
|
536
|
-
Otherwise the system locale preferred encoding is returned.
|
|
537
|
-
|
|
538
|
-
Parameters
|
|
539
|
-
----------
|
|
540
|
-
ogr_dataset : pointer to open OGR dataset
|
|
541
|
-
ogr_layer : pointer to open OGR layer
|
|
542
|
-
|
|
543
|
-
Returns
|
|
544
|
-
-------
|
|
545
|
-
str or None
|
|
546
|
-
"""
|
|
547
|
-
|
|
548
|
-
if OGR_L_TestCapability(ogr_layer, OLCStringsAsUTF8):
|
|
549
|
-
# OGR_L_TestCapability returns True for OLCStringsAsUTF8 if GDAL hides encoding
|
|
550
|
-
# complexities for this layer/driver type. In this case all string attribute
|
|
551
|
-
# values have to be supplied in UTF-8 and values will be returned in UTF-8.
|
|
552
|
-
# The encoding used to read/write under the hood depends on the driver used.
|
|
553
|
-
# For layers/drivers where False is returned, the string values are written and
|
|
554
|
-
# read without recoding. Hence, it is up to you to supply the data in the
|
|
555
|
-
# appropriate encoding. More info:
|
|
556
|
-
# https://gdal.org/development/rfc/rfc23_ogr_unicode.html#oftstring-oftstringlist-fields
|
|
557
|
-
# NOTE: for shapefiles, this always returns False for the layer returned
|
|
558
|
-
# when executing SQL, even when it supports UTF-8 (patched below);
|
|
559
|
-
# this may be fixed by https://github.com/OSGeo/gdal/pull/9649 (GDAL >=3.9.0?)
|
|
560
|
-
return "UTF-8"
|
|
561
|
-
|
|
562
|
-
driver = get_driver(ogr_dataset)
|
|
563
|
-
if driver == "ESRI Shapefile":
|
|
564
|
-
# OGR_L_TestCapability returns True for OLCStringsAsUTF8 (above) for
|
|
565
|
-
# shapefiles when a .cpg file is present with a valid encoding, or GDAL
|
|
566
|
-
# auto-detects the encoding from the code page of the .dbf file, or
|
|
567
|
-
# SHAPE_ENCODING config option is set, or ENCODING layer creation option
|
|
568
|
-
# is specified (shapefiles only). Otherwise, we can only assume that
|
|
569
|
-
# shapefiles are in their default encoding of ISO-8859-1 (which may be
|
|
570
|
-
# incorrect and must be overridden by user-provided encoding)
|
|
571
|
-
|
|
572
|
-
# Always use the first layer to test capabilities until detection for
|
|
573
|
-
# SQL results from shapefiles are fixed (above)
|
|
574
|
-
# This block should only be used for unfixed versions of GDAL (<3.9.0?)
|
|
575
|
-
if OGR_L_TestCapability(GDALDatasetGetLayer(ogr_dataset, 0), OLCStringsAsUTF8):
|
|
576
|
-
return "UTF-8"
|
|
577
|
-
|
|
578
|
-
return "ISO-8859-1"
|
|
579
|
-
|
|
580
|
-
if driver == "OSM":
|
|
581
|
-
# always set OSM data to UTF-8
|
|
582
|
-
# per https://help.openstreetmap.org/questions/2172/what-encoding-does-openstreetmap-use
|
|
583
|
-
return "UTF-8"
|
|
584
|
-
|
|
585
|
-
if driver in ("XLSX", "ODS"):
|
|
586
|
-
# TestCapability for OLCStringsAsUTF8 for XLSX and ODS was False for new files
|
|
587
|
-
# being created for GDAL < 3.8.5. Once these versions of GDAL are no longer
|
|
588
|
-
# supported, this can be removed.
|
|
589
|
-
return "UTF-8"
|
|
590
|
-
|
|
591
|
-
if driver == "GeoJSONSeq":
|
|
592
|
-
# In old gdal versions, OLCStringsAsUTF8 wasn't advertised yet.
|
|
593
|
-
return "UTF-8"
|
|
594
|
-
|
|
595
|
-
return locale.getpreferredencoding()
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
cdef get_fields(OGRLayerH ogr_layer, str encoding, use_arrow=False):
|
|
599
|
-
"""Get field names and types for layer.
|
|
600
|
-
|
|
601
|
-
Parameters
|
|
602
|
-
----------
|
|
603
|
-
ogr_layer : pointer to open OGR layer
|
|
604
|
-
encoding : str
|
|
605
|
-
encoding to use when reading field name
|
|
606
|
-
use_arrow : bool, default False
|
|
607
|
-
If using arrow, all types are supported, and we don't have to
|
|
608
|
-
raise warnings
|
|
609
|
-
|
|
610
|
-
Returns
|
|
611
|
-
-------
|
|
612
|
-
ndarray(n, 4)
|
|
613
|
-
array of index, ogr type, name, numpy type
|
|
614
|
-
"""
|
|
615
|
-
cdef int i
|
|
616
|
-
cdef int field_count
|
|
617
|
-
cdef OGRFeatureDefnH ogr_featuredef = NULL
|
|
618
|
-
cdef OGRFieldDefnH ogr_fielddef = NULL
|
|
619
|
-
cdef int field_subtype
|
|
620
|
-
cdef const char *key_c
|
|
621
|
-
|
|
622
|
-
try:
|
|
623
|
-
ogr_featuredef = exc_wrap_pointer(OGR_L_GetLayerDefn(ogr_layer))
|
|
624
|
-
|
|
625
|
-
except NullPointerError:
|
|
626
|
-
raise DataLayerError("Could not get layer definition") from None
|
|
627
|
-
|
|
628
|
-
except CPLE_BaseError as exc:
|
|
629
|
-
raise DataLayerError(str(exc))
|
|
630
|
-
|
|
631
|
-
field_count = OGR_FD_GetFieldCount(ogr_featuredef)
|
|
632
|
-
|
|
633
|
-
fields = np.empty(shape=(field_count, 4), dtype=object)
|
|
634
|
-
fields_view = fields[:,:]
|
|
635
|
-
|
|
636
|
-
skipped_fields = False
|
|
637
|
-
|
|
638
|
-
for i in range(field_count):
|
|
639
|
-
try:
|
|
640
|
-
ogr_fielddef = exc_wrap_pointer(OGR_FD_GetFieldDefn(ogr_featuredef, i))
|
|
641
|
-
|
|
642
|
-
except NullPointerError:
|
|
643
|
-
raise FieldError(f"Could not get field definition for field at index {i}") from None
|
|
644
|
-
|
|
645
|
-
except CPLE_BaseError as exc:
|
|
646
|
-
raise FieldError(str(exc))
|
|
647
|
-
|
|
648
|
-
field_name = get_string(OGR_Fld_GetNameRef(ogr_fielddef), encoding=encoding)
|
|
649
|
-
|
|
650
|
-
field_type = OGR_Fld_GetType(ogr_fielddef)
|
|
651
|
-
np_type = FIELD_TYPES[field_type]
|
|
652
|
-
if not np_type and not use_arrow:
|
|
653
|
-
skipped_fields = True
|
|
654
|
-
log.warning(
|
|
655
|
-
f"Skipping field {field_name}: unsupported OGR type: {field_type}")
|
|
656
|
-
continue
|
|
657
|
-
|
|
658
|
-
field_subtype = OGR_Fld_GetSubType(ogr_fielddef)
|
|
659
|
-
subtype = FIELD_SUBTYPES.get(field_subtype)
|
|
660
|
-
if subtype is not None:
|
|
661
|
-
# bool, int16, float32 dtypes
|
|
662
|
-
np_type = subtype
|
|
663
|
-
|
|
664
|
-
fields_view[i,0] = i
|
|
665
|
-
fields_view[i,1] = field_type
|
|
666
|
-
fields_view[i,2] = field_name
|
|
667
|
-
fields_view[i,3] = np_type
|
|
668
|
-
|
|
669
|
-
if skipped_fields:
|
|
670
|
-
# filter out skipped fields
|
|
671
|
-
mask = np.array([idx is not None for idx in fields[:, 0]])
|
|
672
|
-
fields = fields[mask]
|
|
673
|
-
|
|
674
|
-
return fields
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
cdef apply_where_filter(OGRLayerH ogr_layer, str where):
|
|
678
|
-
"""Applies where filter to layer.
|
|
679
|
-
|
|
680
|
-
WARNING: GDAL does not raise an error for GPKG when SQL query is invalid
|
|
681
|
-
but instead only logs to stderr.
|
|
682
|
-
|
|
683
|
-
Parameters
|
|
684
|
-
----------
|
|
685
|
-
ogr_layer : pointer to open OGR layer
|
|
686
|
-
where : str
|
|
687
|
-
See http://ogdi.sourceforge.net/prop/6.2.CapabilitiesMetadata.html
|
|
688
|
-
restricted_where for more information about valid expressions.
|
|
689
|
-
|
|
690
|
-
Raises
|
|
691
|
-
------
|
|
692
|
-
ValueError: if SQL query is not valid
|
|
693
|
-
"""
|
|
694
|
-
|
|
695
|
-
where_b = where.encode('utf-8')
|
|
696
|
-
where_c = where_b
|
|
697
|
-
err = OGR_L_SetAttributeFilter(ogr_layer, where_c)
|
|
698
|
-
# WARNING: GDAL does not raise this error for GPKG but instead only
|
|
699
|
-
# logs to stderr
|
|
700
|
-
if err != OGRERR_NONE:
|
|
701
|
-
try:
|
|
702
|
-
exc_check()
|
|
703
|
-
except CPLE_BaseError as exc:
|
|
704
|
-
raise ValueError(str(exc))
|
|
705
|
-
|
|
706
|
-
raise ValueError(f"Invalid SQL query for layer '{OGR_L_GetName(ogr_layer)}': '{where}'")
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
cdef apply_bbox_filter(OGRLayerH ogr_layer, bbox):
|
|
710
|
-
"""Applies bounding box spatial filter to layer.
|
|
711
|
-
|
|
712
|
-
Parameters
|
|
713
|
-
----------
|
|
714
|
-
ogr_layer : pointer to open OGR layer
|
|
715
|
-
bbox : list or tuple of xmin, ymin, xmax, ymax
|
|
716
|
-
|
|
717
|
-
Raises
|
|
718
|
-
------
|
|
719
|
-
ValueError: if bbox is not a list or tuple or does not have proper number of
|
|
720
|
-
items
|
|
721
|
-
"""
|
|
722
|
-
|
|
723
|
-
if not (isinstance(bbox, (tuple, list)) and len(bbox) == 4):
|
|
724
|
-
raise ValueError(f"Invalid bbox: {bbox}")
|
|
725
|
-
|
|
726
|
-
xmin, ymin, xmax, ymax = bbox
|
|
727
|
-
OGR_L_SetSpatialFilterRect(ogr_layer, xmin, ymin, xmax, ymax)
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
cdef apply_geometry_filter(OGRLayerH ogr_layer, wkb):
|
|
731
|
-
"""Applies geometry spatial filter to layer.
|
|
732
|
-
|
|
733
|
-
Parameters
|
|
734
|
-
----------
|
|
735
|
-
ogr_layer : pointer to open OGR layer
|
|
736
|
-
wkb : WKB encoding of geometry
|
|
737
|
-
"""
|
|
738
|
-
|
|
739
|
-
cdef OGRGeometryH ogr_geometry = NULL
|
|
740
|
-
cdef unsigned char *wkb_buffer = wkb
|
|
741
|
-
|
|
742
|
-
err = OGR_G_CreateFromWkb(wkb_buffer, NULL, &ogr_geometry, len(wkb))
|
|
743
|
-
if err:
|
|
744
|
-
if ogr_geometry != NULL:
|
|
745
|
-
OGR_G_DestroyGeometry(ogr_geometry)
|
|
746
|
-
raise GeometryError("Could not create mask geometry") from None
|
|
747
|
-
|
|
748
|
-
OGR_L_SetSpatialFilter(ogr_layer, ogr_geometry)
|
|
749
|
-
OGR_G_DestroyGeometry(ogr_geometry)
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
cdef validate_feature_range(OGRLayerH ogr_layer, int skip_features=0, int max_features=0):
|
|
753
|
-
"""Limit skip_features and max_features to bounds available for dataset.
|
|
754
|
-
|
|
755
|
-
This is typically performed after applying where and spatial filters, which
|
|
756
|
-
reduce the available range of features.
|
|
757
|
-
|
|
758
|
-
Parameters
|
|
759
|
-
----------
|
|
760
|
-
ogr_layer : pointer to open OGR layer
|
|
761
|
-
skip_features : number of features to skip from beginning of available range
|
|
762
|
-
max_features : maximum number of features to read from available range
|
|
763
|
-
"""
|
|
764
|
-
|
|
765
|
-
feature_count = get_feature_count(ogr_layer, 1)
|
|
766
|
-
num_features = max_features
|
|
767
|
-
|
|
768
|
-
if feature_count == 0:
|
|
769
|
-
return 0, 0
|
|
770
|
-
|
|
771
|
-
if skip_features >= feature_count:
|
|
772
|
-
skip_features = feature_count
|
|
773
|
-
|
|
774
|
-
elif max_features == 0:
|
|
775
|
-
num_features = feature_count - skip_features
|
|
776
|
-
|
|
777
|
-
elif max_features > feature_count:
|
|
778
|
-
num_features = feature_count
|
|
779
|
-
|
|
780
|
-
return skip_features, num_features
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
@cython.boundscheck(False) # Deactivate bounds checking
|
|
784
|
-
@cython.wraparound(False) # Deactivate negative indexing.
|
|
785
|
-
cdef process_geometry(OGRFeatureH ogr_feature, int i, geom_view, uint8_t force_2d):
|
|
786
|
-
|
|
787
|
-
cdef OGRGeometryH ogr_geometry = NULL
|
|
788
|
-
cdef OGRwkbGeometryType ogr_geometry_type
|
|
789
|
-
|
|
790
|
-
cdef unsigned char *wkb = NULL
|
|
791
|
-
cdef int ret_length
|
|
792
|
-
|
|
793
|
-
ogr_geometry = OGR_F_GetGeometryRef(ogr_feature)
|
|
794
|
-
|
|
795
|
-
if ogr_geometry == NULL:
|
|
796
|
-
geom_view[i] = None
|
|
797
|
-
else:
|
|
798
|
-
try:
|
|
799
|
-
ogr_geometry_type = OGR_G_GetGeometryType(ogr_geometry)
|
|
800
|
-
|
|
801
|
-
# if geometry has M values, these need to be removed first
|
|
802
|
-
if (OGR_G_IsMeasured(ogr_geometry)):
|
|
803
|
-
OGR_G_SetMeasured(ogr_geometry, 0)
|
|
804
|
-
|
|
805
|
-
if force_2d and OGR_G_Is3D(ogr_geometry):
|
|
806
|
-
OGR_G_Set3D(ogr_geometry, 0)
|
|
807
|
-
|
|
808
|
-
# if non-linear (e.g., curve), force to linear type
|
|
809
|
-
if OGR_GT_IsNonLinear(ogr_geometry_type):
|
|
810
|
-
ogr_geometry = OGR_G_GetLinearGeometry(ogr_geometry, 0, NULL)
|
|
811
|
-
|
|
812
|
-
ret_length = OGR_G_WkbSize(ogr_geometry)
|
|
813
|
-
wkb = <unsigned char*>malloc(sizeof(unsigned char)*ret_length)
|
|
814
|
-
OGR_G_ExportToWkb(ogr_geometry, 1, wkb)
|
|
815
|
-
geom_view[i] = wkb[:ret_length]
|
|
816
|
-
|
|
817
|
-
finally:
|
|
818
|
-
free(wkb)
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
@cython.boundscheck(False) # Deactivate bounds checking
|
|
822
|
-
@cython.wraparound(False) # Deactivate negative indexing.
|
|
823
|
-
cdef process_fields(
|
|
824
|
-
OGRFeatureH ogr_feature,
|
|
825
|
-
int i,
|
|
826
|
-
int n_fields,
|
|
827
|
-
object field_data,
|
|
828
|
-
object field_data_view,
|
|
829
|
-
object field_indexes,
|
|
830
|
-
object field_ogr_types,
|
|
831
|
-
encoding,
|
|
832
|
-
bint datetime_as_string
|
|
833
|
-
):
|
|
834
|
-
cdef int j
|
|
835
|
-
cdef int success
|
|
836
|
-
cdef int field_index
|
|
837
|
-
cdef int ret_length
|
|
838
|
-
cdef GByte *bin_value
|
|
839
|
-
cdef int year = 0
|
|
840
|
-
cdef int month = 0
|
|
841
|
-
cdef int day = 0
|
|
842
|
-
cdef int hour = 0
|
|
843
|
-
cdef int minute = 0
|
|
844
|
-
cdef float fsecond = 0.0
|
|
845
|
-
cdef int timezone = 0
|
|
846
|
-
|
|
847
|
-
for j in range(n_fields):
|
|
848
|
-
field_index = field_indexes[j]
|
|
849
|
-
field_type = field_ogr_types[j]
|
|
850
|
-
data = field_data_view[j]
|
|
851
|
-
|
|
852
|
-
isnull = OGR_F_IsFieldSetAndNotNull(ogr_feature, field_index) == 0
|
|
853
|
-
if isnull:
|
|
854
|
-
if field_type in (OFTInteger, OFTInteger64, OFTReal):
|
|
855
|
-
# if a boolean or integer type, have to cast to float to hold
|
|
856
|
-
# NaN values
|
|
857
|
-
if data.dtype.kind in ('b', 'i', 'u'):
|
|
858
|
-
field_data[j] = field_data[j].astype(np.float64)
|
|
859
|
-
field_data_view[j] = field_data[j][:]
|
|
860
|
-
field_data_view[j][i] = np.nan
|
|
861
|
-
else:
|
|
862
|
-
data[i] = np.nan
|
|
863
|
-
|
|
864
|
-
elif field_type in ( OFTDate, OFTDateTime) and not datetime_as_string:
|
|
865
|
-
data[i] = np.datetime64('NaT')
|
|
866
|
-
|
|
867
|
-
else:
|
|
868
|
-
data[i] = None
|
|
869
|
-
|
|
870
|
-
continue
|
|
871
|
-
|
|
872
|
-
if field_type == OFTInteger:
|
|
873
|
-
data[i] = OGR_F_GetFieldAsInteger(ogr_feature, field_index)
|
|
874
|
-
|
|
875
|
-
elif field_type == OFTInteger64:
|
|
876
|
-
data[i] = OGR_F_GetFieldAsInteger64(ogr_feature, field_index)
|
|
877
|
-
|
|
878
|
-
elif field_type == OFTReal:
|
|
879
|
-
data[i] = OGR_F_GetFieldAsDouble(ogr_feature, field_index)
|
|
880
|
-
|
|
881
|
-
elif field_type == OFTString:
|
|
882
|
-
value = get_string(OGR_F_GetFieldAsString(ogr_feature, field_index), encoding=encoding)
|
|
883
|
-
data[i] = value
|
|
884
|
-
|
|
885
|
-
elif field_type == OFTBinary:
|
|
886
|
-
bin_value = OGR_F_GetFieldAsBinary(ogr_feature, field_index, &ret_length)
|
|
887
|
-
data[i] = bin_value[:ret_length]
|
|
888
|
-
|
|
889
|
-
elif field_type == OFTDateTime or field_type == OFTDate:
|
|
890
|
-
|
|
891
|
-
if datetime_as_string:
|
|
892
|
-
# defer datetime parsing to user/ pandas layer
|
|
893
|
-
# Update to OGR_F_GetFieldAsISO8601DateTime when GDAL 3.7+ only
|
|
894
|
-
data[i] = get_string(OGR_F_GetFieldAsString(ogr_feature, field_index), encoding=encoding)
|
|
895
|
-
else:
|
|
896
|
-
success = OGR_F_GetFieldAsDateTimeEx(
|
|
897
|
-
ogr_feature, field_index, &year, &month, &day, &hour, &minute, &fsecond, &timezone)
|
|
898
|
-
|
|
899
|
-
ms, ss = math.modf(fsecond)
|
|
900
|
-
second = int(ss)
|
|
901
|
-
# fsecond has millisecond accuracy
|
|
902
|
-
microsecond = round(ms * 1000) * 1000
|
|
903
|
-
|
|
904
|
-
if not success:
|
|
905
|
-
data[i] = np.datetime64('NaT')
|
|
906
|
-
|
|
907
|
-
elif field_type == OFTDate:
|
|
908
|
-
data[i] = datetime.date(year, month, day).isoformat()
|
|
909
|
-
|
|
910
|
-
elif field_type == OFTDateTime:
|
|
911
|
-
data[i] = datetime.datetime(year, month, day, hour, minute, second, microsecond).isoformat()
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
@cython.boundscheck(False) # Deactivate bounds checking
|
|
915
|
-
@cython.wraparound(False) # Deactivate negative indexing.
|
|
916
|
-
cdef get_features(
|
|
917
|
-
OGRLayerH ogr_layer,
|
|
918
|
-
object[:,:] fields,
|
|
919
|
-
encoding,
|
|
920
|
-
uint8_t read_geometry,
|
|
921
|
-
uint8_t force_2d,
|
|
922
|
-
int skip_features,
|
|
923
|
-
int num_features,
|
|
924
|
-
uint8_t return_fids,
|
|
925
|
-
bint datetime_as_string
|
|
926
|
-
):
|
|
927
|
-
|
|
928
|
-
cdef OGRFeatureH ogr_feature = NULL
|
|
929
|
-
cdef int n_fields
|
|
930
|
-
cdef int i
|
|
931
|
-
cdef int field_index
|
|
932
|
-
|
|
933
|
-
# make sure layer is read from beginning
|
|
934
|
-
OGR_L_ResetReading(ogr_layer)
|
|
935
|
-
|
|
936
|
-
if skip_features > 0:
|
|
937
|
-
OGR_L_SetNextByIndex(ogr_layer, skip_features)
|
|
938
|
-
|
|
939
|
-
if return_fids:
|
|
940
|
-
fid_data = np.empty(shape=(num_features), dtype=np.int64)
|
|
941
|
-
fid_view = fid_data[:]
|
|
942
|
-
else:
|
|
943
|
-
fid_data = None
|
|
944
|
-
|
|
945
|
-
if read_geometry:
|
|
946
|
-
geometries = np.empty(shape=(num_features, ), dtype='object')
|
|
947
|
-
geom_view = geometries[:]
|
|
948
|
-
|
|
949
|
-
else:
|
|
950
|
-
geometries = None
|
|
951
|
-
|
|
952
|
-
n_fields = fields.shape[0]
|
|
953
|
-
field_indexes = fields[:,0]
|
|
954
|
-
field_ogr_types = fields[:,1]
|
|
955
|
-
|
|
956
|
-
field_data = [
|
|
957
|
-
np.empty(shape=(num_features, ),
|
|
958
|
-
dtype = ("object" if datetime_as_string and
|
|
959
|
-
fields[field_index,3].startswith("datetime") else fields[field_index,3])
|
|
960
|
-
) for field_index in range(n_fields)
|
|
961
|
-
]
|
|
962
|
-
|
|
963
|
-
field_data_view = [field_data[field_index][:] for field_index in range(n_fields)]
|
|
964
|
-
|
|
965
|
-
if num_features == 0:
|
|
966
|
-
return fid_data, geometries, field_data
|
|
967
|
-
|
|
968
|
-
i = 0
|
|
969
|
-
while True:
|
|
970
|
-
try:
|
|
971
|
-
if num_features > 0 and i == num_features:
|
|
972
|
-
break
|
|
973
|
-
|
|
974
|
-
try:
|
|
975
|
-
ogr_feature = exc_wrap_pointer(OGR_L_GetNextFeature(ogr_layer))
|
|
976
|
-
|
|
977
|
-
except NullPointerError:
|
|
978
|
-
# No more rows available, so stop reading
|
|
979
|
-
break
|
|
980
|
-
|
|
981
|
-
except CPLE_BaseError as exc:
|
|
982
|
-
raise FeatureError(str(exc))
|
|
983
|
-
|
|
984
|
-
if i >= num_features:
|
|
985
|
-
raise FeatureError(
|
|
986
|
-
"GDAL returned more records than expected based on the count of "
|
|
987
|
-
"records that may meet your combination of filters against this "
|
|
988
|
-
"dataset. Please open an issue on Github "
|
|
989
|
-
"(https://github.com/geopandas/pyogrio/issues) to report encountering "
|
|
990
|
-
"this error."
|
|
991
|
-
) from None
|
|
992
|
-
|
|
993
|
-
if return_fids:
|
|
994
|
-
fid_view[i] = OGR_F_GetFID(ogr_feature)
|
|
995
|
-
|
|
996
|
-
if read_geometry:
|
|
997
|
-
process_geometry(ogr_feature, i, geom_view, force_2d)
|
|
998
|
-
|
|
999
|
-
process_fields(
|
|
1000
|
-
ogr_feature, i, n_fields, field_data, field_data_view,
|
|
1001
|
-
field_indexes, field_ogr_types, encoding, datetime_as_string
|
|
1002
|
-
)
|
|
1003
|
-
i += 1
|
|
1004
|
-
finally:
|
|
1005
|
-
if ogr_feature != NULL:
|
|
1006
|
-
OGR_F_Destroy(ogr_feature)
|
|
1007
|
-
ogr_feature = NULL
|
|
1008
|
-
|
|
1009
|
-
# There may be fewer rows available than expected from OGR_L_GetFeatureCount,
|
|
1010
|
-
# such as features with bounding boxes that intersect the bbox
|
|
1011
|
-
# but do not themselves intersect the bbox.
|
|
1012
|
-
# Empty rows are dropped.
|
|
1013
|
-
if i < num_features:
|
|
1014
|
-
if return_fids:
|
|
1015
|
-
fid_data = fid_data[:i]
|
|
1016
|
-
if read_geometry:
|
|
1017
|
-
geometries = geometries[:i]
|
|
1018
|
-
field_data = [data_field[:i] for data_field in field_data]
|
|
1019
|
-
|
|
1020
|
-
return fid_data, geometries, field_data
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
@cython.boundscheck(False) # Deactivate bounds checking
|
|
1024
|
-
@cython.wraparound(False) # Deactivate negative indexing.
|
|
1025
|
-
cdef get_features_by_fid(
|
|
1026
|
-
OGRLayerH ogr_layer,
|
|
1027
|
-
int[:] fids,
|
|
1028
|
-
object[:,:] fields,
|
|
1029
|
-
encoding,
|
|
1030
|
-
uint8_t read_geometry,
|
|
1031
|
-
uint8_t force_2d,
|
|
1032
|
-
bint datetime_as_string
|
|
1033
|
-
):
|
|
1034
|
-
|
|
1035
|
-
cdef OGRFeatureH ogr_feature = NULL
|
|
1036
|
-
cdef int n_fields
|
|
1037
|
-
cdef int i
|
|
1038
|
-
cdef int fid
|
|
1039
|
-
cdef int field_index
|
|
1040
|
-
cdef int count = len(fids)
|
|
1041
|
-
|
|
1042
|
-
# make sure layer is read from beginning
|
|
1043
|
-
OGR_L_ResetReading(ogr_layer)
|
|
1044
|
-
|
|
1045
|
-
if read_geometry:
|
|
1046
|
-
geometries = np.empty(shape=(count, ), dtype='object')
|
|
1047
|
-
geom_view = geometries[:]
|
|
1048
|
-
|
|
1049
|
-
else:
|
|
1050
|
-
geometries = None
|
|
1051
|
-
|
|
1052
|
-
n_fields = fields.shape[0]
|
|
1053
|
-
field_indexes = fields[:,0]
|
|
1054
|
-
field_ogr_types = fields[:,1]
|
|
1055
|
-
field_data = [
|
|
1056
|
-
np.empty(shape=(count, ),
|
|
1057
|
-
dtype=("object" if datetime_as_string and fields[field_index,3].startswith("datetime")
|
|
1058
|
-
else fields[field_index,3]))
|
|
1059
|
-
for field_index in range(n_fields)
|
|
1060
|
-
]
|
|
1061
|
-
|
|
1062
|
-
field_data_view = [field_data[field_index][:] for field_index in range(n_fields)]
|
|
1063
|
-
|
|
1064
|
-
for i in range(count):
|
|
1065
|
-
try:
|
|
1066
|
-
fid = fids[i]
|
|
1067
|
-
|
|
1068
|
-
try:
|
|
1069
|
-
ogr_feature = exc_wrap_pointer(OGR_L_GetFeature(ogr_layer, fid))
|
|
1070
|
-
|
|
1071
|
-
except NullPointerError:
|
|
1072
|
-
raise FeatureError(f"Could not read feature with fid {fid}") from None
|
|
1073
|
-
|
|
1074
|
-
except CPLE_BaseError as exc:
|
|
1075
|
-
raise FeatureError(str(exc))
|
|
1076
|
-
|
|
1077
|
-
if read_geometry:
|
|
1078
|
-
process_geometry(ogr_feature, i, geom_view, force_2d)
|
|
1079
|
-
|
|
1080
|
-
process_fields(
|
|
1081
|
-
ogr_feature, i, n_fields, field_data, field_data_view,
|
|
1082
|
-
field_indexes, field_ogr_types, encoding, datetime_as_string
|
|
1083
|
-
)
|
|
1084
|
-
finally:
|
|
1085
|
-
if ogr_feature != NULL:
|
|
1086
|
-
OGR_F_Destroy(ogr_feature)
|
|
1087
|
-
ogr_feature = NULL
|
|
1088
|
-
|
|
1089
|
-
|
|
1090
|
-
return (geometries, field_data)
|
|
1091
|
-
|
|
1092
|
-
|
|
1093
|
-
@cython.boundscheck(False) # Deactivate bounds checking
|
|
1094
|
-
@cython.wraparound(False) # Deactivate negative indexing.
|
|
1095
|
-
cdef get_bounds(
|
|
1096
|
-
OGRLayerH ogr_layer,
|
|
1097
|
-
int skip_features,
|
|
1098
|
-
int num_features):
|
|
1099
|
-
|
|
1100
|
-
cdef OGRFeatureH ogr_feature = NULL
|
|
1101
|
-
cdef OGRGeometryH ogr_geometry = NULL
|
|
1102
|
-
cdef OGREnvelope ogr_envelope # = NULL
|
|
1103
|
-
cdef int i
|
|
1104
|
-
|
|
1105
|
-
# make sure layer is read from beginning
|
|
1106
|
-
OGR_L_ResetReading(ogr_layer)
|
|
1107
|
-
|
|
1108
|
-
if skip_features > 0:
|
|
1109
|
-
OGR_L_SetNextByIndex(ogr_layer, skip_features)
|
|
1110
|
-
|
|
1111
|
-
fid_data = np.empty(shape=(num_features), dtype=np.int64)
|
|
1112
|
-
fid_view = fid_data[:]
|
|
1113
|
-
|
|
1114
|
-
bounds_data = np.empty(shape=(4, num_features), dtype='float64')
|
|
1115
|
-
bounds_view = bounds_data[:]
|
|
1116
|
-
|
|
1117
|
-
i = 0
|
|
1118
|
-
while True:
|
|
1119
|
-
try:
|
|
1120
|
-
if num_features > 0 and i == num_features:
|
|
1121
|
-
break
|
|
1122
|
-
|
|
1123
|
-
try:
|
|
1124
|
-
ogr_feature = exc_wrap_pointer(OGR_L_GetNextFeature(ogr_layer))
|
|
1125
|
-
|
|
1126
|
-
except NullPointerError:
|
|
1127
|
-
# No more rows available, so stop reading
|
|
1128
|
-
break
|
|
1129
|
-
|
|
1130
|
-
except CPLE_BaseError as exc:
|
|
1131
|
-
raise FeatureError(str(exc))
|
|
1132
|
-
|
|
1133
|
-
if i >= num_features:
|
|
1134
|
-
raise FeatureError(
|
|
1135
|
-
"Reading more features than indicated by OGR_L_GetFeatureCount is not supported"
|
|
1136
|
-
) from None
|
|
1137
|
-
|
|
1138
|
-
fid_view[i] = OGR_F_GetFID(ogr_feature)
|
|
1139
|
-
|
|
1140
|
-
ogr_geometry = OGR_F_GetGeometryRef(ogr_feature)
|
|
1141
|
-
|
|
1142
|
-
if ogr_geometry == NULL:
|
|
1143
|
-
bounds_view[:,i] = np.nan
|
|
1144
|
-
|
|
1145
|
-
else:
|
|
1146
|
-
OGR_G_GetEnvelope(ogr_geometry, &ogr_envelope)
|
|
1147
|
-
bounds_view[0, i] = ogr_envelope.MinX
|
|
1148
|
-
bounds_view[1, i] = ogr_envelope.MinY
|
|
1149
|
-
bounds_view[2, i] = ogr_envelope.MaxX
|
|
1150
|
-
bounds_view[3, i] = ogr_envelope.MaxY
|
|
1151
|
-
|
|
1152
|
-
i += 1
|
|
1153
|
-
finally:
|
|
1154
|
-
if ogr_feature != NULL:
|
|
1155
|
-
OGR_F_Destroy(ogr_feature)
|
|
1156
|
-
ogr_feature = NULL
|
|
1157
|
-
|
|
1158
|
-
# Less rows read than anticipated, so drop empty rows
|
|
1159
|
-
if i < num_features:
|
|
1160
|
-
fid_data = fid_data[:i]
|
|
1161
|
-
bounds_data = bounds_data[:, :i]
|
|
1162
|
-
|
|
1163
|
-
return fid_data, bounds_data
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
def ogr_read(
|
|
1167
|
-
object path_or_buffer,
|
|
1168
|
-
object dataset_kwargs,
|
|
1169
|
-
object layer=None,
|
|
1170
|
-
object encoding=None,
|
|
1171
|
-
int read_geometry=True,
|
|
1172
|
-
int force_2d=False,
|
|
1173
|
-
object columns=None,
|
|
1174
|
-
int skip_features=0,
|
|
1175
|
-
int max_features=0,
|
|
1176
|
-
object where=None,
|
|
1177
|
-
tuple bbox=None,
|
|
1178
|
-
object mask=None,
|
|
1179
|
-
object fids=None,
|
|
1180
|
-
str sql=None,
|
|
1181
|
-
str sql_dialect=None,
|
|
1182
|
-
int return_fids=False,
|
|
1183
|
-
bint datetime_as_string=False
|
|
1184
|
-
):
|
|
1185
|
-
|
|
1186
|
-
cdef int err = 0
|
|
1187
|
-
cdef bint is_vsimem = isinstance(path_or_buffer, bytes)
|
|
1188
|
-
cdef const char *path_c = NULL
|
|
1189
|
-
cdef char **dataset_options = NULL
|
|
1190
|
-
cdef const char *where_c = NULL
|
|
1191
|
-
cdef const char *field_c = NULL
|
|
1192
|
-
cdef char **fields_c = NULL
|
|
1193
|
-
cdef OGRDataSourceH ogr_dataset = NULL
|
|
1194
|
-
cdef OGRLayerH ogr_layer = NULL
|
|
1195
|
-
cdef int feature_count = 0
|
|
1196
|
-
cdef double xmin, ymin, xmax, ymax
|
|
1197
|
-
cdef const char *prev_shape_encoding = NULL
|
|
1198
|
-
cdef bint override_shape_encoding = False
|
|
1199
|
-
|
|
1200
|
-
if fids is not None:
|
|
1201
|
-
if where is not None or bbox is not None or mask is not None or sql is not None or skip_features or max_features:
|
|
1202
|
-
raise ValueError(
|
|
1203
|
-
"cannot set both 'fids' and any of 'where', 'bbox', 'mask', "
|
|
1204
|
-
"'sql', 'skip_features' or 'max_features'"
|
|
1205
|
-
)
|
|
1206
|
-
fids = np.asarray(fids, dtype=np.intc)
|
|
1207
|
-
|
|
1208
|
-
if sql is not None and layer is not None:
|
|
1209
|
-
raise ValueError("'sql' paramater cannot be combined with 'layer'")
|
|
1210
|
-
|
|
1211
|
-
if not (read_geometry or return_fids or columns is None or len(columns) > 0):
|
|
1212
|
-
raise ValueError(
|
|
1213
|
-
"at least one of read_geometry or return_fids must be True or columns must "
|
|
1214
|
-
"be None or non-empty"
|
|
1215
|
-
)
|
|
1216
|
-
|
|
1217
|
-
if bbox and mask:
|
|
1218
|
-
raise ValueError("cannot set both 'bbox' and 'mask'")
|
|
1219
|
-
|
|
1220
|
-
if skip_features < 0:
|
|
1221
|
-
raise ValueError("'skip_features' must be >= 0")
|
|
1222
|
-
|
|
1223
|
-
if max_features < 0:
|
|
1224
|
-
raise ValueError("'max_features' must be >= 0")
|
|
1225
|
-
|
|
1226
|
-
try:
|
|
1227
|
-
path = read_buffer_to_vsimem(path_or_buffer) if is_vsimem else path_or_buffer
|
|
1228
|
-
|
|
1229
|
-
if encoding:
|
|
1230
|
-
# for shapefiles, SHAPE_ENCODING must be set before opening the file
|
|
1231
|
-
# to prevent automatic decoding to UTF-8 by GDAL, so we save previous
|
|
1232
|
-
# SHAPE_ENCODING so that it can be restored later
|
|
1233
|
-
# (we do this for all data sources where encoding is set because
|
|
1234
|
-
# we don't know the driver until after it is opened, which is too late)
|
|
1235
|
-
override_shape_encoding = True
|
|
1236
|
-
prev_shape_encoding = override_threadlocal_config_option("SHAPE_ENCODING", encoding)
|
|
1237
|
-
|
|
1238
|
-
dataset_options = dict_to_options(dataset_kwargs)
|
|
1239
|
-
ogr_dataset = ogr_open(path.encode('UTF-8'), 0, dataset_options)
|
|
1240
|
-
|
|
1241
|
-
if sql is None:
|
|
1242
|
-
if layer is None:
|
|
1243
|
-
layer = get_default_layer(ogr_dataset)
|
|
1244
|
-
ogr_layer = get_ogr_layer(ogr_dataset, layer)
|
|
1245
|
-
else:
|
|
1246
|
-
ogr_layer = execute_sql(ogr_dataset, sql, sql_dialect)
|
|
1247
|
-
|
|
1248
|
-
crs = get_crs(ogr_layer)
|
|
1249
|
-
|
|
1250
|
-
# Encoding is derived from the user, from the dataset capabilities / type,
|
|
1251
|
-
# or from the system locale
|
|
1252
|
-
if encoding:
|
|
1253
|
-
if get_driver(ogr_dataset) == "ESRI Shapefile":
|
|
1254
|
-
# NOTE: SHAPE_ENCODING is a configuration option whereas ENCODING is the dataset open option
|
|
1255
|
-
if "ENCODING" in dataset_kwargs:
|
|
1256
|
-
raise ValueError('cannot provide both encoding parameter and "ENCODING" option; use encoding parameter to specify correct encoding for data source')
|
|
1257
|
-
|
|
1258
|
-
# Because SHAPE_ENCODING is set above, GDAL will automatically
|
|
1259
|
-
# decode shapefiles to UTF-8; ignore any encoding set by user
|
|
1260
|
-
encoding = "UTF-8"
|
|
1261
|
-
|
|
1262
|
-
else:
|
|
1263
|
-
encoding = detect_encoding(ogr_dataset, ogr_layer)
|
|
1264
|
-
|
|
1265
|
-
fields = get_fields(ogr_layer, encoding)
|
|
1266
|
-
|
|
1267
|
-
ignored_fields = []
|
|
1268
|
-
if columns is not None:
|
|
1269
|
-
# identify ignored fields first
|
|
1270
|
-
ignored_fields = list(set(fields[:,2]) - set(columns))
|
|
1271
|
-
|
|
1272
|
-
# Fields are matched exactly by name, duplicates are dropped.
|
|
1273
|
-
# Find index of each field into fields
|
|
1274
|
-
idx = np.intersect1d(fields[:,2], columns, return_indices=True)[1]
|
|
1275
|
-
fields = fields[idx, :]
|
|
1276
|
-
|
|
1277
|
-
if not read_geometry:
|
|
1278
|
-
ignored_fields.append("OGR_GEOMETRY")
|
|
1279
|
-
|
|
1280
|
-
# Instruct GDAL to ignore reading fields not
|
|
1281
|
-
# included in output columns for faster I/O
|
|
1282
|
-
if ignored_fields:
|
|
1283
|
-
for field in ignored_fields:
|
|
1284
|
-
field_b = field.encode("utf-8")
|
|
1285
|
-
field_c = field_b
|
|
1286
|
-
fields_c = CSLAddString(fields_c, field_c)
|
|
1287
|
-
|
|
1288
|
-
OGR_L_SetIgnoredFields(ogr_layer, <const char**>fields_c)
|
|
1289
|
-
|
|
1290
|
-
geometry_type = get_geometry_type(ogr_layer)
|
|
1291
|
-
|
|
1292
|
-
if fids is not None:
|
|
1293
|
-
geometries, field_data = get_features_by_fid(
|
|
1294
|
-
ogr_layer,
|
|
1295
|
-
fids,
|
|
1296
|
-
fields,
|
|
1297
|
-
encoding,
|
|
1298
|
-
read_geometry=read_geometry and geometry_type is not None,
|
|
1299
|
-
force_2d=force_2d,
|
|
1300
|
-
datetime_as_string=datetime_as_string
|
|
1301
|
-
)
|
|
1302
|
-
|
|
1303
|
-
# bypass reading fids since these should match fids used for read
|
|
1304
|
-
if return_fids:
|
|
1305
|
-
fid_data = fids.astype(np.int64)
|
|
1306
|
-
else:
|
|
1307
|
-
fid_data = None
|
|
1308
|
-
else:
|
|
1309
|
-
# Apply the attribute filter
|
|
1310
|
-
if where is not None and where != "":
|
|
1311
|
-
apply_where_filter(ogr_layer, where)
|
|
1312
|
-
|
|
1313
|
-
# Apply the spatial filter
|
|
1314
|
-
if bbox is not None:
|
|
1315
|
-
apply_bbox_filter(ogr_layer, bbox)
|
|
1316
|
-
|
|
1317
|
-
elif mask is not None:
|
|
1318
|
-
apply_geometry_filter(ogr_layer, mask)
|
|
1319
|
-
|
|
1320
|
-
# Limit feature range to available range
|
|
1321
|
-
skip_features, num_features = validate_feature_range(
|
|
1322
|
-
ogr_layer, skip_features, max_features
|
|
1323
|
-
)
|
|
1324
|
-
|
|
1325
|
-
fid_data, geometries, field_data = get_features(
|
|
1326
|
-
ogr_layer,
|
|
1327
|
-
fields,
|
|
1328
|
-
encoding,
|
|
1329
|
-
read_geometry=read_geometry and geometry_type is not None,
|
|
1330
|
-
force_2d=force_2d,
|
|
1331
|
-
skip_features=skip_features,
|
|
1332
|
-
num_features=num_features,
|
|
1333
|
-
return_fids=return_fids,
|
|
1334
|
-
datetime_as_string=datetime_as_string
|
|
1335
|
-
)
|
|
1336
|
-
|
|
1337
|
-
meta = {
|
|
1338
|
-
'crs': crs,
|
|
1339
|
-
'encoding': encoding,
|
|
1340
|
-
'fields': fields[:,2], # return only names
|
|
1341
|
-
'dtypes':fields[:,3],
|
|
1342
|
-
'geometry_type': geometry_type,
|
|
1343
|
-
}
|
|
1344
|
-
|
|
1345
|
-
finally:
|
|
1346
|
-
if dataset_options != NULL:
|
|
1347
|
-
CSLDestroy(dataset_options)
|
|
1348
|
-
dataset_options = NULL
|
|
1349
|
-
|
|
1350
|
-
if ogr_dataset != NULL:
|
|
1351
|
-
if sql is not None:
|
|
1352
|
-
GDALDatasetReleaseResultSet(ogr_dataset, ogr_layer)
|
|
1353
|
-
|
|
1354
|
-
GDALClose(ogr_dataset)
|
|
1355
|
-
ogr_dataset = NULL
|
|
1356
|
-
|
|
1357
|
-
# reset SHAPE_ENCODING config parameter if temporarily set above
|
|
1358
|
-
if override_shape_encoding:
|
|
1359
|
-
CPLSetThreadLocalConfigOption("SHAPE_ENCODING", prev_shape_encoding)
|
|
1360
|
-
|
|
1361
|
-
if prev_shape_encoding != NULL:
|
|
1362
|
-
CPLFree(<void*>prev_shape_encoding)
|
|
1363
|
-
prev_shape_encoding = NULL
|
|
1364
|
-
|
|
1365
|
-
if is_vsimem:
|
|
1366
|
-
delete_vsimem_file(path)
|
|
1367
|
-
|
|
1368
|
-
return (
|
|
1369
|
-
meta,
|
|
1370
|
-
fid_data,
|
|
1371
|
-
geometries,
|
|
1372
|
-
field_data
|
|
1373
|
-
)
|
|
1374
|
-
|
|
1375
|
-
|
|
1376
|
-
cdef void pycapsule_array_stream_deleter(object stream_capsule) noexcept:
|
|
1377
|
-
cdef ArrowArrayStream* stream = <ArrowArrayStream*>PyCapsule_GetPointer(
|
|
1378
|
-
stream_capsule, 'arrow_array_stream'
|
|
1379
|
-
)
|
|
1380
|
-
# Do not invoke the deleter on a used/moved capsule
|
|
1381
|
-
if stream.release != NULL:
|
|
1382
|
-
stream.release(stream)
|
|
1383
|
-
|
|
1384
|
-
free(stream)
|
|
1385
|
-
|
|
1386
|
-
|
|
1387
|
-
cdef object alloc_c_stream(ArrowArrayStream** c_stream):
|
|
1388
|
-
c_stream[0] = <ArrowArrayStream*> malloc(sizeof(ArrowArrayStream))
|
|
1389
|
-
# Ensure the capsule destructor doesn't call a random release pointer
|
|
1390
|
-
c_stream[0].release = NULL
|
|
1391
|
-
return PyCapsule_New(c_stream[0], 'arrow_array_stream', &pycapsule_array_stream_deleter)
|
|
1392
|
-
|
|
1393
|
-
|
|
1394
|
-
class _ArrowStream:
|
|
1395
|
-
def __init__(self, capsule):
|
|
1396
|
-
self._capsule = capsule
|
|
1397
|
-
|
|
1398
|
-
def __arrow_c_stream__(self, requested_schema=None):
|
|
1399
|
-
if requested_schema is not None:
|
|
1400
|
-
raise NotImplementedError("requested_schema is not supported")
|
|
1401
|
-
return self._capsule
|
|
1402
|
-
|
|
1403
|
-
|
|
1404
|
-
@contextlib.contextmanager
|
|
1405
|
-
def ogr_open_arrow(
|
|
1406
|
-
object path_or_buffer,
|
|
1407
|
-
dataset_kwargs,
|
|
1408
|
-
object layer=None,
|
|
1409
|
-
object encoding=None,
|
|
1410
|
-
int read_geometry=True,
|
|
1411
|
-
int force_2d=False,
|
|
1412
|
-
object columns=None,
|
|
1413
|
-
int skip_features=0,
|
|
1414
|
-
int max_features=0,
|
|
1415
|
-
object where=None,
|
|
1416
|
-
tuple bbox=None,
|
|
1417
|
-
object mask=None,
|
|
1418
|
-
object fids=None,
|
|
1419
|
-
str sql=None,
|
|
1420
|
-
str sql_dialect=None,
|
|
1421
|
-
int return_fids=False,
|
|
1422
|
-
int batch_size=0,
|
|
1423
|
-
use_pyarrow=False,
|
|
1424
|
-
):
|
|
1425
|
-
|
|
1426
|
-
cdef int err = 0
|
|
1427
|
-
cdef bint is_vsimem = isinstance(path_or_buffer, bytes)
|
|
1428
|
-
cdef const char *path_c = NULL
|
|
1429
|
-
cdef char **dataset_options = NULL
|
|
1430
|
-
cdef const char *where_c = NULL
|
|
1431
|
-
cdef OGRDataSourceH ogr_dataset = NULL
|
|
1432
|
-
cdef OGRLayerH ogr_layer = NULL
|
|
1433
|
-
cdef void *ogr_driver = NULL
|
|
1434
|
-
cdef char **fields_c = NULL
|
|
1435
|
-
cdef const char *field_c = NULL
|
|
1436
|
-
cdef char **options = NULL
|
|
1437
|
-
cdef const char *prev_shape_encoding = NULL
|
|
1438
|
-
cdef bint override_shape_encoding = False
|
|
1439
|
-
cdef ArrowArrayStream* stream
|
|
1440
|
-
cdef ArrowSchema schema
|
|
1441
|
-
|
|
1442
|
-
IF CTE_GDAL_VERSION < (3, 6, 0):
|
|
1443
|
-
raise RuntimeError("Need GDAL>=3.6 for Arrow support")
|
|
1444
|
-
|
|
1445
|
-
if force_2d:
|
|
1446
|
-
raise ValueError("forcing 2D is not supported for Arrow")
|
|
1447
|
-
|
|
1448
|
-
if fids is not None:
|
|
1449
|
-
if where is not None or bbox is not None or mask is not None or sql is not None or skip_features or max_features:
|
|
1450
|
-
raise ValueError(
|
|
1451
|
-
"cannot set both 'fids' and any of 'where', 'bbox', 'mask', "
|
|
1452
|
-
"'sql', 'skip_features', or 'max_features'"
|
|
1453
|
-
)
|
|
1454
|
-
|
|
1455
|
-
IF CTE_GDAL_VERSION < (3, 8, 0):
|
|
1456
|
-
if skip_features:
|
|
1457
|
-
raise ValueError(
|
|
1458
|
-
"specifying 'skip_features' is not supported for Arrow for GDAL<3.8.0"
|
|
1459
|
-
)
|
|
1460
|
-
|
|
1461
|
-
if skip_features < 0:
|
|
1462
|
-
raise ValueError("'skip_features' must be >= 0")
|
|
1463
|
-
|
|
1464
|
-
if max_features:
|
|
1465
|
-
raise ValueError(
|
|
1466
|
-
"specifying 'max_features' is not supported for Arrow"
|
|
1467
|
-
)
|
|
1468
|
-
|
|
1469
|
-
if sql is not None and layer is not None:
|
|
1470
|
-
raise ValueError("'sql' paramater cannot be combined with 'layer'")
|
|
1471
|
-
|
|
1472
|
-
if not (read_geometry or return_fids or columns is None or len(columns) > 0):
|
|
1473
|
-
raise ValueError(
|
|
1474
|
-
"at least one of read_geometry or return_fids must be True or columns must "
|
|
1475
|
-
"be None or non-empty"
|
|
1476
|
-
)
|
|
1477
|
-
|
|
1478
|
-
if bbox and mask:
|
|
1479
|
-
raise ValueError("cannot set both 'bbox' and 'mask'")
|
|
1480
|
-
|
|
1481
|
-
reader = None
|
|
1482
|
-
try:
|
|
1483
|
-
path = read_buffer_to_vsimem(path_or_buffer) if is_vsimem else path_or_buffer
|
|
1484
|
-
|
|
1485
|
-
if encoding:
|
|
1486
|
-
override_shape_encoding = True
|
|
1487
|
-
prev_shape_encoding = override_threadlocal_config_option("SHAPE_ENCODING", encoding)
|
|
1488
|
-
|
|
1489
|
-
dataset_options = dict_to_options(dataset_kwargs)
|
|
1490
|
-
ogr_dataset = ogr_open(path.encode('UTF-8'), 0, dataset_options)
|
|
1491
|
-
|
|
1492
|
-
if sql is None:
|
|
1493
|
-
if layer is None:
|
|
1494
|
-
layer = get_default_layer(ogr_dataset)
|
|
1495
|
-
ogr_layer = get_ogr_layer(ogr_dataset, layer)
|
|
1496
|
-
else:
|
|
1497
|
-
ogr_layer = execute_sql(ogr_dataset, sql, sql_dialect)
|
|
1498
|
-
|
|
1499
|
-
crs = get_crs(ogr_layer)
|
|
1500
|
-
|
|
1501
|
-
# Encoding is derived from the user, from the dataset capabilities / type,
|
|
1502
|
-
# or from the system locale
|
|
1503
|
-
if encoding:
|
|
1504
|
-
if get_driver(ogr_dataset) == "ESRI Shapefile":
|
|
1505
|
-
if "ENCODING" in dataset_kwargs:
|
|
1506
|
-
raise ValueError('cannot provide both encoding parameter and "ENCODING" option; use encoding parameter to specify correct encoding for data source')
|
|
1507
|
-
|
|
1508
|
-
encoding = "UTF-8"
|
|
1509
|
-
|
|
1510
|
-
elif encoding.replace('-','').upper() != 'UTF8':
|
|
1511
|
-
raise ValueError("non-UTF-8 encoding is not supported for Arrow; use the non-Arrow interface instead")
|
|
1512
|
-
|
|
1513
|
-
else:
|
|
1514
|
-
encoding = detect_encoding(ogr_dataset, ogr_layer)
|
|
1515
|
-
|
|
1516
|
-
fields = get_fields(ogr_layer, encoding, use_arrow=True)
|
|
1517
|
-
|
|
1518
|
-
ignored_fields = []
|
|
1519
|
-
if columns is not None:
|
|
1520
|
-
# Fields are matched exactly by name, duplicates are dropped.
|
|
1521
|
-
ignored_fields = list(set(fields[:,2]) - set(columns))
|
|
1522
|
-
if not read_geometry:
|
|
1523
|
-
ignored_fields.append("OGR_GEOMETRY")
|
|
1524
|
-
|
|
1525
|
-
# raise error if schema has bool values for FGB / GPKG and GDAL <3.8.3
|
|
1526
|
-
# due to https://github.com/OSGeo/gdal/issues/8998
|
|
1527
|
-
IF CTE_GDAL_VERSION < (3, 8, 3):
|
|
1528
|
-
|
|
1529
|
-
driver = get_driver(ogr_dataset)
|
|
1530
|
-
if driver in {'FlatGeobuf', 'GPKG'}:
|
|
1531
|
-
ignored = set(ignored_fields)
|
|
1532
|
-
for f in fields:
|
|
1533
|
-
if f[2] not in ignored and f[3] == 'bool':
|
|
1534
|
-
raise RuntimeError(
|
|
1535
|
-
"GDAL < 3.8.3 does not correctly read boolean data values using the "
|
|
1536
|
-
"Arrow API. Do not use read_arrow() / use_arrow=True for this dataset."
|
|
1537
|
-
)
|
|
1538
|
-
|
|
1539
|
-
geometry_type = get_geometry_type(ogr_layer)
|
|
1540
|
-
|
|
1541
|
-
geometry_name = get_string(OGR_L_GetGeometryColumn(ogr_layer))
|
|
1542
|
-
|
|
1543
|
-
fid_column = get_string(OGR_L_GetFIDColumn(ogr_layer))
|
|
1544
|
-
fid_column_where = fid_column
|
|
1545
|
-
# OGR_L_GetFIDColumn returns the column name if it is a custom column,
|
|
1546
|
-
# or "" if not. For arrow, the default column name used to return the FID data
|
|
1547
|
-
# read is "OGC_FID". When accessing the underlying datasource like when using a
|
|
1548
|
-
# where clause, the default column name is "FID".
|
|
1549
|
-
if fid_column == "":
|
|
1550
|
-
fid_column = "OGC_FID"
|
|
1551
|
-
fid_column_where = "FID"
|
|
1552
|
-
|
|
1553
|
-
# Use fids list to create a where clause, as arrow doesn't support direct fid
|
|
1554
|
-
# filtering.
|
|
1555
|
-
if fids is not None:
|
|
1556
|
-
IF CTE_GDAL_VERSION < (3, 8, 0):
|
|
1557
|
-
driver = get_driver(ogr_dataset)
|
|
1558
|
-
if driver not in {"GPKG", "GeoJSON"}:
|
|
1559
|
-
warnings.warn(
|
|
1560
|
-
"Using 'fids' and 'use_arrow=True' with GDAL < 3.8 can be slow "
|
|
1561
|
-
"for some drivers. Upgrading GDAL or using 'use_arrow=False' "
|
|
1562
|
-
"can avoid this.",
|
|
1563
|
-
stacklevel=2,
|
|
1564
|
-
)
|
|
1565
|
-
|
|
1566
|
-
fids_str = ",".join([str(fid) for fid in fids])
|
|
1567
|
-
where = f"{fid_column_where} IN ({fids_str})"
|
|
1568
|
-
|
|
1569
|
-
# Apply the attribute filter
|
|
1570
|
-
if where is not None and where != "":
|
|
1571
|
-
try:
|
|
1572
|
-
apply_where_filter(ogr_layer, where)
|
|
1573
|
-
except ValueError as ex:
|
|
1574
|
-
if fids is not None and str(ex).startswith("Invalid SQL query"):
|
|
1575
|
-
# If fids is not None, the where being applied is the one formatted
|
|
1576
|
-
# above.
|
|
1577
|
-
raise ValueError(
|
|
1578
|
-
f"error applying filter for {len(fids)} fids; max. number for "
|
|
1579
|
-
f"drivers with default SQL dialect 'OGRSQL' is 4997"
|
|
1580
|
-
) from ex
|
|
1581
|
-
|
|
1582
|
-
raise
|
|
1583
|
-
|
|
1584
|
-
# Apply the spatial filter
|
|
1585
|
-
if bbox is not None:
|
|
1586
|
-
apply_bbox_filter(ogr_layer, bbox)
|
|
1587
|
-
|
|
1588
|
-
elif mask is not None:
|
|
1589
|
-
apply_geometry_filter(ogr_layer, mask)
|
|
1590
|
-
|
|
1591
|
-
# Limit to specified columns
|
|
1592
|
-
if ignored_fields:
|
|
1593
|
-
for field in ignored_fields:
|
|
1594
|
-
field_b = field.encode("utf-8")
|
|
1595
|
-
field_c = field_b
|
|
1596
|
-
fields_c = CSLAddString(fields_c, field_c)
|
|
1597
|
-
|
|
1598
|
-
OGR_L_SetIgnoredFields(ogr_layer, <const char**>fields_c)
|
|
1599
|
-
|
|
1600
|
-
if not return_fids:
|
|
1601
|
-
options = CSLSetNameValue(options, "INCLUDE_FID", "NO")
|
|
1602
|
-
|
|
1603
|
-
if batch_size > 0:
|
|
1604
|
-
options = CSLSetNameValue(
|
|
1605
|
-
options,
|
|
1606
|
-
"MAX_FEATURES_IN_BATCH",
|
|
1607
|
-
str(batch_size).encode('UTF-8')
|
|
1608
|
-
)
|
|
1609
|
-
|
|
1610
|
-
# Default to geoarrow metadata encoding
|
|
1611
|
-
IF CTE_GDAL_VERSION >= (3, 8, 0):
|
|
1612
|
-
options = CSLSetNameValue(
|
|
1613
|
-
options,
|
|
1614
|
-
"GEOMETRY_METADATA_ENCODING",
|
|
1615
|
-
"GEOARROW".encode('UTF-8')
|
|
1616
|
-
)
|
|
1617
|
-
|
|
1618
|
-
# make sure layer is read from beginning
|
|
1619
|
-
OGR_L_ResetReading(ogr_layer)
|
|
1620
|
-
|
|
1621
|
-
# allocate the stream struct and wrap in capsule to ensure clean-up on error
|
|
1622
|
-
capsule = alloc_c_stream(&stream)
|
|
1623
|
-
|
|
1624
|
-
if not OGR_L_GetArrowStream(ogr_layer, stream, options):
|
|
1625
|
-
raise RuntimeError("Failed to open ArrowArrayStream from Layer")
|
|
1626
|
-
|
|
1627
|
-
if skip_features:
|
|
1628
|
-
# only supported for GDAL >= 3.8.0; have to do this after getting
|
|
1629
|
-
# the Arrow stream
|
|
1630
|
-
OGR_L_SetNextByIndex(ogr_layer, skip_features)
|
|
1631
|
-
|
|
1632
|
-
if use_pyarrow:
|
|
1633
|
-
import pyarrow as pa
|
|
1634
|
-
|
|
1635
|
-
reader = pa.RecordBatchStreamReader._import_from_c(<uintptr_t> stream)
|
|
1636
|
-
else:
|
|
1637
|
-
reader = _ArrowStream(capsule)
|
|
1638
|
-
|
|
1639
|
-
meta = {
|
|
1640
|
-
'crs': crs,
|
|
1641
|
-
'encoding': encoding,
|
|
1642
|
-
'fields': fields[:,2], # return only names
|
|
1643
|
-
'geometry_type': geometry_type,
|
|
1644
|
-
'geometry_name': geometry_name,
|
|
1645
|
-
'fid_column': fid_column,
|
|
1646
|
-
}
|
|
1647
|
-
|
|
1648
|
-
# stream has to be consumed before the Dataset is closed
|
|
1649
|
-
yield meta, reader
|
|
1650
|
-
|
|
1651
|
-
finally:
|
|
1652
|
-
if use_pyarrow and reader is not None:
|
|
1653
|
-
# Mark reader as closed to prevent reading batches
|
|
1654
|
-
reader.close()
|
|
1655
|
-
|
|
1656
|
-
# `stream` will be freed through `capsule` destructor
|
|
1657
|
-
|
|
1658
|
-
CSLDestroy(options)
|
|
1659
|
-
if fields_c != NULL:
|
|
1660
|
-
CSLDestroy(fields_c)
|
|
1661
|
-
fields_c = NULL
|
|
1662
|
-
|
|
1663
|
-
if dataset_options != NULL:
|
|
1664
|
-
CSLDestroy(dataset_options)
|
|
1665
|
-
dataset_options = NULL
|
|
1666
|
-
|
|
1667
|
-
if ogr_dataset != NULL:
|
|
1668
|
-
if sql is not None:
|
|
1669
|
-
GDALDatasetReleaseResultSet(ogr_dataset, ogr_layer)
|
|
1670
|
-
|
|
1671
|
-
GDALClose(ogr_dataset)
|
|
1672
|
-
ogr_dataset = NULL
|
|
1673
|
-
|
|
1674
|
-
# reset SHAPE_ENCODING config parameter if temporarily set above
|
|
1675
|
-
if override_shape_encoding:
|
|
1676
|
-
CPLSetThreadLocalConfigOption("SHAPE_ENCODING", prev_shape_encoding)
|
|
1677
|
-
|
|
1678
|
-
if prev_shape_encoding != NULL:
|
|
1679
|
-
CPLFree(<void*>prev_shape_encoding)
|
|
1680
|
-
prev_shape_encoding = NULL
|
|
1681
|
-
|
|
1682
|
-
if is_vsimem:
|
|
1683
|
-
delete_vsimem_file(path)
|
|
1684
|
-
|
|
1685
|
-
|
|
1686
|
-
def ogr_read_bounds(
|
|
1687
|
-
object path_or_buffer,
|
|
1688
|
-
object layer=None,
|
|
1689
|
-
object encoding=None,
|
|
1690
|
-
int read_geometry=True,
|
|
1691
|
-
int force_2d=False,
|
|
1692
|
-
object columns=None,
|
|
1693
|
-
int skip_features=0,
|
|
1694
|
-
int max_features=0,
|
|
1695
|
-
object where=None,
|
|
1696
|
-
tuple bbox=None,
|
|
1697
|
-
object mask=None):
|
|
1698
|
-
|
|
1699
|
-
cdef int err = 0
|
|
1700
|
-
cdef bint is_vsimem = isinstance(path_or_buffer, bytes)
|
|
1701
|
-
cdef const char *path_c = NULL
|
|
1702
|
-
cdef const char *where_c = NULL
|
|
1703
|
-
cdef OGRDataSourceH ogr_dataset = NULL
|
|
1704
|
-
cdef OGRLayerH ogr_layer = NULL
|
|
1705
|
-
cdef int feature_count = 0
|
|
1706
|
-
cdef double xmin, ymin, xmax, ymax
|
|
1707
|
-
|
|
1708
|
-
if bbox and mask:
|
|
1709
|
-
raise ValueError("cannot set both 'bbox' and 'mask'")
|
|
1710
|
-
|
|
1711
|
-
if skip_features < 0:
|
|
1712
|
-
raise ValueError("'skip_features' must be >= 0")
|
|
1713
|
-
|
|
1714
|
-
if max_features < 0:
|
|
1715
|
-
raise ValueError("'max_features' must be >= 0")
|
|
1716
|
-
|
|
1717
|
-
try:
|
|
1718
|
-
path = read_buffer_to_vsimem(path_or_buffer) if is_vsimem else path_or_buffer
|
|
1719
|
-
ogr_dataset = ogr_open(path.encode('UTF-8'), 0, NULL)
|
|
1720
|
-
|
|
1721
|
-
if layer is None:
|
|
1722
|
-
layer = get_default_layer(ogr_dataset)
|
|
1723
|
-
|
|
1724
|
-
ogr_layer = get_ogr_layer(ogr_dataset, layer)
|
|
1725
|
-
|
|
1726
|
-
# Apply the attribute filter
|
|
1727
|
-
if where is not None and where != "":
|
|
1728
|
-
apply_where_filter(ogr_layer, where)
|
|
1729
|
-
|
|
1730
|
-
# Apply the spatial filter
|
|
1731
|
-
if bbox is not None:
|
|
1732
|
-
apply_bbox_filter(ogr_layer, bbox)
|
|
1733
|
-
|
|
1734
|
-
elif mask is not None:
|
|
1735
|
-
apply_geometry_filter(ogr_layer, mask)
|
|
1736
|
-
|
|
1737
|
-
# Limit feature range to available range
|
|
1738
|
-
skip_features, num_features = validate_feature_range(ogr_layer, skip_features, max_features)
|
|
1739
|
-
|
|
1740
|
-
bounds = get_bounds(ogr_layer, skip_features, num_features)
|
|
1741
|
-
|
|
1742
|
-
finally:
|
|
1743
|
-
if ogr_dataset != NULL:
|
|
1744
|
-
GDALClose(ogr_dataset)
|
|
1745
|
-
ogr_dataset = NULL
|
|
1746
|
-
|
|
1747
|
-
if is_vsimem:
|
|
1748
|
-
delete_vsimem_file(path)
|
|
1749
|
-
|
|
1750
|
-
return bounds
|
|
1751
|
-
|
|
1752
|
-
|
|
1753
|
-
def ogr_read_info(
|
|
1754
|
-
object path_or_buffer,
|
|
1755
|
-
dataset_kwargs,
|
|
1756
|
-
object layer=None,
|
|
1757
|
-
object encoding=None,
|
|
1758
|
-
int force_feature_count=False,
|
|
1759
|
-
int force_total_bounds=False):
|
|
1760
|
-
|
|
1761
|
-
cdef bint is_vsimem = isinstance(path_or_buffer, bytes)
|
|
1762
|
-
cdef const char *path_c = NULL
|
|
1763
|
-
cdef char **dataset_options = NULL
|
|
1764
|
-
cdef OGRDataSourceH ogr_dataset = NULL
|
|
1765
|
-
cdef OGRLayerH ogr_layer = NULL
|
|
1766
|
-
cdef const char *prev_shape_encoding = NULL
|
|
1767
|
-
cdef bint override_shape_encoding = False
|
|
1768
|
-
|
|
1769
|
-
try:
|
|
1770
|
-
path = read_buffer_to_vsimem(path_or_buffer) if is_vsimem else path_or_buffer
|
|
1771
|
-
|
|
1772
|
-
if encoding:
|
|
1773
|
-
override_shape_encoding = True
|
|
1774
|
-
prev_shape_encoding = override_threadlocal_config_option("SHAPE_ENCODING", encoding)
|
|
1775
|
-
|
|
1776
|
-
dataset_options = dict_to_options(dataset_kwargs)
|
|
1777
|
-
ogr_dataset = ogr_open(path.encode('UTF-8'), 0, dataset_options)
|
|
1778
|
-
|
|
1779
|
-
if layer is None:
|
|
1780
|
-
layer = get_default_layer(ogr_dataset)
|
|
1781
|
-
ogr_layer = get_ogr_layer(ogr_dataset, layer)
|
|
1782
|
-
|
|
1783
|
-
if encoding and get_driver(ogr_dataset) == "ESRI Shapefile":
|
|
1784
|
-
encoding = "UTF-8"
|
|
1785
|
-
else:
|
|
1786
|
-
encoding = encoding or detect_encoding(ogr_dataset, ogr_layer)
|
|
1787
|
-
|
|
1788
|
-
fields = get_fields(ogr_layer, encoding)
|
|
1789
|
-
|
|
1790
|
-
meta = {
|
|
1791
|
-
"layer_name": get_string(OGR_L_GetName(ogr_layer)),
|
|
1792
|
-
"crs": get_crs(ogr_layer),
|
|
1793
|
-
"encoding": encoding,
|
|
1794
|
-
"fields": fields[:,2], # return only names
|
|
1795
|
-
"dtypes": fields[:,3],
|
|
1796
|
-
"fid_column": get_string(OGR_L_GetFIDColumn(ogr_layer)),
|
|
1797
|
-
"geometry_name": get_string(OGR_L_GetGeometryColumn(ogr_layer)),
|
|
1798
|
-
"geometry_type": get_geometry_type(ogr_layer),
|
|
1799
|
-
"features": get_feature_count(ogr_layer, force_feature_count),
|
|
1800
|
-
"total_bounds": get_total_bounds(ogr_layer, force_total_bounds),
|
|
1801
|
-
"driver": get_driver(ogr_dataset),
|
|
1802
|
-
"capabilities": {
|
|
1803
|
-
"random_read": OGR_L_TestCapability(ogr_layer, OLCRandomRead) == 1,
|
|
1804
|
-
"fast_set_next_by_index": OGR_L_TestCapability(ogr_layer, OLCFastSetNextByIndex) == 1,
|
|
1805
|
-
"fast_spatial_filter": OGR_L_TestCapability(ogr_layer, OLCFastSpatialFilter) == 1,
|
|
1806
|
-
"fast_feature_count": OGR_L_TestCapability(ogr_layer, OLCFastFeatureCount) == 1,
|
|
1807
|
-
"fast_total_bounds": OGR_L_TestCapability(ogr_layer, OLCFastGetExtent) == 1,
|
|
1808
|
-
},
|
|
1809
|
-
"layer_metadata": get_metadata(ogr_layer),
|
|
1810
|
-
"dataset_metadata": get_metadata(ogr_dataset),
|
|
1811
|
-
}
|
|
1812
|
-
|
|
1813
|
-
finally:
|
|
1814
|
-
if dataset_options != NULL:
|
|
1815
|
-
CSLDestroy(dataset_options)
|
|
1816
|
-
dataset_options = NULL
|
|
1817
|
-
|
|
1818
|
-
if ogr_dataset != NULL:
|
|
1819
|
-
GDALClose(ogr_dataset)
|
|
1820
|
-
ogr_dataset = NULL
|
|
1821
|
-
|
|
1822
|
-
# reset SHAPE_ENCODING config parameter if temporarily set above
|
|
1823
|
-
if override_shape_encoding:
|
|
1824
|
-
CPLSetThreadLocalConfigOption("SHAPE_ENCODING", prev_shape_encoding)
|
|
1825
|
-
|
|
1826
|
-
if prev_shape_encoding != NULL:
|
|
1827
|
-
CPLFree(<void*>prev_shape_encoding)
|
|
1828
|
-
|
|
1829
|
-
if is_vsimem:
|
|
1830
|
-
delete_vsimem_file(path)
|
|
1831
|
-
|
|
1832
|
-
return meta
|
|
1833
|
-
|
|
1834
|
-
|
|
1835
|
-
def ogr_list_layers(object path_or_buffer):
|
|
1836
|
-
cdef bint is_vsimem = isinstance(path_or_buffer, bytes)
|
|
1837
|
-
cdef const char *path_c = NULL
|
|
1838
|
-
cdef OGRDataSourceH ogr_dataset = NULL
|
|
1839
|
-
|
|
1840
|
-
try:
|
|
1841
|
-
path = read_buffer_to_vsimem(path_or_buffer) if is_vsimem else path_or_buffer
|
|
1842
|
-
ogr_dataset = ogr_open(path.encode('UTF-8'), 0, NULL)
|
|
1843
|
-
layers = get_layer_names(ogr_dataset)
|
|
1844
|
-
|
|
1845
|
-
finally:
|
|
1846
|
-
if ogr_dataset != NULL:
|
|
1847
|
-
GDALClose(ogr_dataset)
|
|
1848
|
-
ogr_dataset = NULL
|
|
1849
|
-
|
|
1850
|
-
if is_vsimem:
|
|
1851
|
-
delete_vsimem_file(path)
|
|
1852
|
-
|
|
1853
|
-
return layers
|
|
1854
|
-
|
|
1855
|
-
|
|
1856
|
-
cdef str get_default_layer(OGRDataSourceH ogr_dataset):
|
|
1857
|
-
""" Get the layer in the dataset that is read by default.
|
|
1858
|
-
|
|
1859
|
-
The caller is responsible for closing the dataset.
|
|
1860
|
-
|
|
1861
|
-
Parameters
|
|
1862
|
-
----------
|
|
1863
|
-
ogr_dataset : pointer to open OGR dataset
|
|
1864
|
-
|
|
1865
|
-
Returns
|
|
1866
|
-
-------
|
|
1867
|
-
str
|
|
1868
|
-
the name of the default layer to be read.
|
|
1869
|
-
|
|
1870
|
-
"""
|
|
1871
|
-
layers = get_layer_names(ogr_dataset)
|
|
1872
|
-
first_layer_name = layers[0][0]
|
|
1873
|
-
|
|
1874
|
-
if len(layers) > 1:
|
|
1875
|
-
dataset_name = os.path.basename(get_string(OGR_DS_GetName(ogr_dataset)))
|
|
1876
|
-
|
|
1877
|
-
other_layer_names = ', '.join([f"'{l}'" for l in layers[1:, 0]])
|
|
1878
|
-
warnings.warn(
|
|
1879
|
-
f"More than one layer found in '{dataset_name}': '{first_layer_name}' "
|
|
1880
|
-
f"(default), {other_layer_names}. Specify layer parameter to avoid this "
|
|
1881
|
-
"warning.",
|
|
1882
|
-
stacklevel=2,
|
|
1883
|
-
)
|
|
1884
|
-
|
|
1885
|
-
return first_layer_name
|
|
1886
|
-
|
|
1887
|
-
|
|
1888
|
-
cdef get_layer_names(OGRDataSourceH ogr_dataset):
|
|
1889
|
-
""" Get the layers in the dataset.
|
|
1890
|
-
|
|
1891
|
-
The caller is responsible for closing the dataset.
|
|
1892
|
-
|
|
1893
|
-
Parameters
|
|
1894
|
-
----------
|
|
1895
|
-
ogr_dataset : pointer to open OGR dataset
|
|
1896
|
-
|
|
1897
|
-
Returns
|
|
1898
|
-
-------
|
|
1899
|
-
ndarray(n)
|
|
1900
|
-
array of layer names
|
|
1901
|
-
|
|
1902
|
-
"""
|
|
1903
|
-
cdef OGRLayerH ogr_layer = NULL
|
|
1904
|
-
|
|
1905
|
-
layer_count = GDALDatasetGetLayerCount(ogr_dataset)
|
|
1906
|
-
|
|
1907
|
-
data = np.empty(shape=(layer_count, 2), dtype=object)
|
|
1908
|
-
data_view = data[:]
|
|
1909
|
-
for i in range(layer_count):
|
|
1910
|
-
ogr_layer = GDALDatasetGetLayer(ogr_dataset, i)
|
|
1911
|
-
|
|
1912
|
-
data_view[i, 0] = get_string(OGR_L_GetName(ogr_layer))
|
|
1913
|
-
data_view[i, 1] = get_geometry_type(ogr_layer)
|
|
1914
|
-
|
|
1915
|
-
return data
|
|
1916
|
-
|
|
1917
|
-
|
|
1918
|
-
# NOTE: all modes are write-only
|
|
1919
|
-
# some data sources have multiple layers
|
|
1920
|
-
cdef void * ogr_create(const char* path_c, const char* driver_c, char** options) except NULL:
|
|
1921
|
-
cdef void *ogr_driver = NULL
|
|
1922
|
-
cdef OGRDataSourceH ogr_dataset = NULL
|
|
1923
|
-
|
|
1924
|
-
# Get the driver
|
|
1925
|
-
try:
|
|
1926
|
-
ogr_driver = exc_wrap_pointer(GDALGetDriverByName(driver_c))
|
|
1927
|
-
|
|
1928
|
-
except NullPointerError:
|
|
1929
|
-
raise DataSourceError(f"Could not obtain driver: {driver_c.decode('utf-8')} (check that it was installed correctly into GDAL)")
|
|
1930
|
-
|
|
1931
|
-
except CPLE_BaseError as exc:
|
|
1932
|
-
raise DataSourceError(str(exc))
|
|
1933
|
-
|
|
1934
|
-
# Create the dataset
|
|
1935
|
-
try:
|
|
1936
|
-
ogr_dataset = exc_wrap_pointer(GDALCreate(ogr_driver, path_c, 0, 0, 0, GDT_Unknown, options))
|
|
1937
|
-
|
|
1938
|
-
except NullPointerError:
|
|
1939
|
-
raise DataSourceError(f"Failed to create dataset with driver: {path_c.decode('utf-8')} {driver_c.decode('utf-8')}") from None
|
|
1940
|
-
|
|
1941
|
-
except CPLE_NotSupportedError as exc:
|
|
1942
|
-
raise DataSourceError(f"Driver {driver_c.decode('utf-8')} does not support write functionality") from None
|
|
1943
|
-
|
|
1944
|
-
except CPLE_BaseError as exc:
|
|
1945
|
-
raise DataSourceError(str(exc))
|
|
1946
|
-
|
|
1947
|
-
return ogr_dataset
|
|
1948
|
-
|
|
1949
|
-
|
|
1950
|
-
cdef void * create_crs(str crs) except NULL:
|
|
1951
|
-
cdef char *crs_c = NULL
|
|
1952
|
-
cdef void *ogr_crs = NULL
|
|
1953
|
-
|
|
1954
|
-
crs_b = crs.encode('UTF-8')
|
|
1955
|
-
crs_c = crs_b
|
|
1956
|
-
|
|
1957
|
-
try:
|
|
1958
|
-
ogr_crs = exc_wrap_pointer(OSRNewSpatialReference(NULL))
|
|
1959
|
-
err = OSRSetFromUserInput(ogr_crs, crs_c)
|
|
1960
|
-
if err:
|
|
1961
|
-
raise CRSError("Could not set CRS: {}".format(crs_c.decode('UTF-8'))) from None
|
|
1962
|
-
|
|
1963
|
-
except CPLE_BaseError as exc:
|
|
1964
|
-
OSRRelease(ogr_crs)
|
|
1965
|
-
raise CRSError("Could not set CRS: {}".format(exc))
|
|
1966
|
-
|
|
1967
|
-
return ogr_crs
|
|
1968
|
-
|
|
1969
|
-
|
|
1970
|
-
cdef infer_field_types(list dtypes):
|
|
1971
|
-
cdef int field_type = 0
|
|
1972
|
-
cdef int field_subtype = 0
|
|
1973
|
-
cdef int width = 0
|
|
1974
|
-
cdef int precision = 0
|
|
1975
|
-
|
|
1976
|
-
field_types = np.zeros(shape=(len(dtypes), 4), dtype=int)
|
|
1977
|
-
field_types_view = field_types[:]
|
|
1978
|
-
|
|
1979
|
-
for i in range(len(dtypes)):
|
|
1980
|
-
dtype = dtypes[i]
|
|
1981
|
-
|
|
1982
|
-
if dtype.name in DTYPE_OGR_FIELD_TYPES:
|
|
1983
|
-
field_type, field_subtype = DTYPE_OGR_FIELD_TYPES[dtype.name]
|
|
1984
|
-
field_types_view[i, 0] = field_type
|
|
1985
|
-
field_types_view[i, 1] = field_subtype
|
|
1986
|
-
|
|
1987
|
-
# Determine field type from ndarray values
|
|
1988
|
-
elif dtype == np.dtype('O'):
|
|
1989
|
-
# Object type is ambiguous: could be a string or binary data
|
|
1990
|
-
# TODO: handle binary or other types
|
|
1991
|
-
# for now fall back to string (same as Geopandas)
|
|
1992
|
-
field_types_view[i, 0] = OFTString
|
|
1993
|
-
# Convert to unicode string then take itemsize
|
|
1994
|
-
# TODO: better implementation of this
|
|
1995
|
-
# width = values.astype(np.str_).dtype.itemsize // 4
|
|
1996
|
-
# DO WE NEED WIDTH HERE?
|
|
1997
|
-
|
|
1998
|
-
elif dtype.type is np.str_ or dtype.type is np.bytes_:
|
|
1999
|
-
field_types_view[i, 0] = OFTString
|
|
2000
|
-
field_types_view[i, 2] = int(dtype.itemsize // 4)
|
|
2001
|
-
|
|
2002
|
-
elif dtype.name.startswith("datetime64"):
|
|
2003
|
-
# datetime dtype precision is specified with eg. [ms], but this isn't
|
|
2004
|
-
# usefull when writing to gdal.
|
|
2005
|
-
field_type, field_subtype = DTYPE_OGR_FIELD_TYPES["datetime64"]
|
|
2006
|
-
field_types_view[i, 0] = field_type
|
|
2007
|
-
field_types_view[i, 1] = field_subtype
|
|
2008
|
-
|
|
2009
|
-
else:
|
|
2010
|
-
raise NotImplementedError(f"field type is not supported {dtype.name} (field index: {i})")
|
|
2011
|
-
|
|
2012
|
-
return field_types
|
|
2013
|
-
|
|
2014
|
-
|
|
2015
|
-
cdef create_ogr_dataset_layer(
|
|
2016
|
-
str path,
|
|
2017
|
-
bint is_vsi,
|
|
2018
|
-
str layer,
|
|
2019
|
-
str driver,
|
|
2020
|
-
str crs,
|
|
2021
|
-
str geometry_type,
|
|
2022
|
-
str encoding,
|
|
2023
|
-
object dataset_kwargs,
|
|
2024
|
-
object layer_kwargs,
|
|
2025
|
-
bint append,
|
|
2026
|
-
dataset_metadata,
|
|
2027
|
-
layer_metadata,
|
|
2028
|
-
OGRDataSourceH* ogr_dataset_out,
|
|
2029
|
-
OGRLayerH* ogr_layer_out,
|
|
2030
|
-
):
|
|
2031
|
-
"""
|
|
2032
|
-
Construct the OGRDataSource and OGRLayer objects based on input
|
|
2033
|
-
path and layer.
|
|
2034
|
-
|
|
2035
|
-
If the file already exists, will open the existing dataset and overwrite
|
|
2036
|
-
or append the layer (depending on `append`), otherwise will create a new
|
|
2037
|
-
dataset.
|
|
2038
|
-
|
|
2039
|
-
Fills in the `ogr_dataset_out` and `ogr_layer_out` pointers passed as
|
|
2040
|
-
parameter with initialized objects (or raise error is it fails to do so).
|
|
2041
|
-
It is the responsibility of the caller to clean up those objects after use.
|
|
2042
|
-
Returns whether a new layer was created or not (when the layer was created,
|
|
2043
|
-
the caller still needs to set up the layer definition, i.e. create the
|
|
2044
|
-
fields).
|
|
2045
|
-
|
|
2046
|
-
Parameters
|
|
2047
|
-
----------
|
|
2048
|
-
encoding : str
|
|
2049
|
-
Only used if `driver` is "ESRI Shapefile". If not None, it overrules the default
|
|
2050
|
-
shapefile encoding, which is "UTF-8" in pyogrio.
|
|
2051
|
-
|
|
2052
|
-
Returns
|
|
2053
|
-
-------
|
|
2054
|
-
bool :
|
|
2055
|
-
Whether a new layer was created, or False if we are appending to an
|
|
2056
|
-
existing layer.
|
|
2057
|
-
"""
|
|
2058
|
-
cdef const char *path_c = NULL
|
|
2059
|
-
cdef const char *layer_c = NULL
|
|
2060
|
-
cdef const char *driver_c = NULL
|
|
2061
|
-
cdef const char *crs_c = NULL
|
|
2062
|
-
cdef const char *encoding_c = NULL
|
|
2063
|
-
cdef char **dataset_options = NULL
|
|
2064
|
-
cdef char **layer_options = NULL
|
|
2065
|
-
cdef const char *ogr_name = NULL
|
|
2066
|
-
cdef OGRDataSourceH ogr_dataset = NULL
|
|
2067
|
-
cdef OGRLayerH ogr_layer = NULL
|
|
2068
|
-
cdef OGRSpatialReferenceH ogr_crs = NULL
|
|
2069
|
-
cdef OGRwkbGeometryType geometry_code
|
|
2070
|
-
cdef int layer_idx = -1
|
|
2071
|
-
|
|
2072
|
-
path_b = path.encode('UTF-8')
|
|
2073
|
-
path_c = path_b
|
|
2074
|
-
|
|
2075
|
-
driver_b = driver.encode('UTF-8')
|
|
2076
|
-
driver_c = driver_b
|
|
2077
|
-
|
|
2078
|
-
# in-memory dataset is always created from scratch
|
|
2079
|
-
path_exists = os.path.exists(path) if not is_vsi else False
|
|
2080
|
-
|
|
2081
|
-
if not layer:
|
|
2082
|
-
layer = os.path.splitext(os.path.split(path)[1])[0]
|
|
2083
|
-
|
|
2084
|
-
# if shapefile, GeoJSON, or FlatGeobuf, always delete first
|
|
2085
|
-
# for other types, check if we can create layers
|
|
2086
|
-
# GPKG might be the only multi-layer writeable type. TODO: check this
|
|
2087
|
-
if driver in ('ESRI Shapefile', 'GeoJSON', 'GeoJSONSeq', 'FlatGeobuf') and path_exists:
|
|
2088
|
-
if not append:
|
|
2089
|
-
os.unlink(path)
|
|
2090
|
-
path_exists = False
|
|
2091
|
-
|
|
2092
|
-
layer_exists = False
|
|
2093
|
-
if path_exists:
|
|
2094
|
-
try:
|
|
2095
|
-
ogr_dataset = ogr_open(path_c, 1, NULL)
|
|
2096
|
-
|
|
2097
|
-
for i in range(GDALDatasetGetLayerCount(ogr_dataset)):
|
|
2098
|
-
name = OGR_L_GetName(GDALDatasetGetLayer(ogr_dataset, i))
|
|
2099
|
-
if layer == name.decode('UTF-8'):
|
|
2100
|
-
layer_idx = i
|
|
2101
|
-
break
|
|
2102
|
-
|
|
2103
|
-
if layer_idx >= 0:
|
|
2104
|
-
layer_exists = True
|
|
2105
|
-
|
|
2106
|
-
if not append:
|
|
2107
|
-
GDALDatasetDeleteLayer(ogr_dataset, layer_idx)
|
|
2108
|
-
|
|
2109
|
-
except DataSourceError as exc:
|
|
2110
|
-
# open failed
|
|
2111
|
-
if append:
|
|
2112
|
-
raise exc
|
|
2113
|
-
|
|
2114
|
-
# otherwise create from scratch
|
|
2115
|
-
if is_vsi:
|
|
2116
|
-
VSIUnlink(path_c)
|
|
2117
|
-
else:
|
|
2118
|
-
os.unlink(path)
|
|
2119
|
-
|
|
2120
|
-
ogr_dataset = NULL
|
|
2121
|
-
|
|
2122
|
-
# either it didn't exist or could not open it in write mode
|
|
2123
|
-
if ogr_dataset == NULL:
|
|
2124
|
-
dataset_options = dict_to_options(dataset_kwargs)
|
|
2125
|
-
ogr_dataset = ogr_create(path_c, driver_c, dataset_options)
|
|
2126
|
-
|
|
2127
|
-
# if we are not appending to an existing layer, we need to create
|
|
2128
|
-
# the layer and all associated properties (CRS, field defs, etc)
|
|
2129
|
-
create_layer = not (append and layer_exists)
|
|
2130
|
-
|
|
2131
|
-
### Create the layer
|
|
2132
|
-
if create_layer:
|
|
2133
|
-
# Create the CRS
|
|
2134
|
-
if crs is not None:
|
|
2135
|
-
try:
|
|
2136
|
-
ogr_crs = create_crs(crs)
|
|
2137
|
-
# force geographic CRS to use lon, lat order and ignore axis order specified by CRS, in order
|
|
2138
|
-
# to correctly write KML and GeoJSON coordinates in correct order
|
|
2139
|
-
OSRSetAxisMappingStrategy(ogr_crs, OAMS_TRADITIONAL_GIS_ORDER)
|
|
2140
|
-
|
|
2141
|
-
|
|
2142
|
-
except Exception as exc:
|
|
2143
|
-
if dataset_options != NULL:
|
|
2144
|
-
CSLDestroy(dataset_options)
|
|
2145
|
-
dataset_options = NULL
|
|
2146
|
-
|
|
2147
|
-
GDALClose(ogr_dataset)
|
|
2148
|
-
ogr_dataset = NULL
|
|
2149
|
-
|
|
2150
|
-
raise exc
|
|
2151
|
-
|
|
2152
|
-
# Setup other layer creation options
|
|
2153
|
-
for k, v in layer_kwargs.items():
|
|
2154
|
-
k = k.encode('UTF-8')
|
|
2155
|
-
v = v.encode('UTF-8')
|
|
2156
|
-
layer_options = CSLAddNameValue(layer_options, <const char *>k, <const char *>v)
|
|
2157
|
-
|
|
2158
|
-
if driver == 'ESRI Shapefile':
|
|
2159
|
-
# ENCODING option must be set for shapefiles to properly write *.cpg
|
|
2160
|
-
# file containing the encoding; this is not a supported option for
|
|
2161
|
-
# other drivers. This is done after setting general options above
|
|
2162
|
-
# to override ENCODING if passed by the user as a layer option.
|
|
2163
|
-
if encoding and "ENCODING" in layer_kwargs:
|
|
2164
|
-
raise ValueError('cannot provide both encoding parameter and "ENCODING" layer creation option; use the encoding parameter')
|
|
2165
|
-
|
|
2166
|
-
# always write to UTF-8 if encoding is not set
|
|
2167
|
-
encoding = encoding or "UTF-8"
|
|
2168
|
-
encoding_b = encoding.upper().encode('UTF-8')
|
|
2169
|
-
encoding_c = encoding_b
|
|
2170
|
-
layer_options = CSLSetNameValue(layer_options, "ENCODING", encoding_c)
|
|
2171
|
-
|
|
2172
|
-
|
|
2173
|
-
### Get geometry type
|
|
2174
|
-
# TODO: this is brittle for 3D / ZM / M types
|
|
2175
|
-
# TODO: fail on M / ZM types
|
|
2176
|
-
geometry_code = get_geometry_type_code(geometry_type)
|
|
2177
|
-
|
|
2178
|
-
try:
|
|
2179
|
-
if create_layer:
|
|
2180
|
-
layer_b = layer.encode('UTF-8')
|
|
2181
|
-
layer_c = layer_b
|
|
2182
|
-
|
|
2183
|
-
ogr_layer = exc_wrap_pointer(
|
|
2184
|
-
GDALDatasetCreateLayer(ogr_dataset, layer_c, ogr_crs,
|
|
2185
|
-
geometry_code, layer_options))
|
|
2186
|
-
|
|
2187
|
-
else:
|
|
2188
|
-
ogr_layer = exc_wrap_pointer(get_ogr_layer(ogr_dataset, layer))
|
|
2189
|
-
|
|
2190
|
-
# Set dataset and layer metadata
|
|
2191
|
-
set_metadata(ogr_dataset, dataset_metadata)
|
|
2192
|
-
set_metadata(ogr_layer, layer_metadata)
|
|
2193
|
-
|
|
2194
|
-
except Exception as exc:
|
|
2195
|
-
GDALClose(ogr_dataset)
|
|
2196
|
-
ogr_dataset = NULL
|
|
2197
|
-
raise DataLayerError(str(exc))
|
|
2198
|
-
|
|
2199
|
-
finally:
|
|
2200
|
-
if ogr_crs != NULL:
|
|
2201
|
-
OSRRelease(ogr_crs)
|
|
2202
|
-
ogr_crs = NULL
|
|
2203
|
-
|
|
2204
|
-
if dataset_options != NULL:
|
|
2205
|
-
CSLDestroy(dataset_options)
|
|
2206
|
-
dataset_options = NULL
|
|
2207
|
-
|
|
2208
|
-
if layer_options != NULL:
|
|
2209
|
-
CSLDestroy(layer_options)
|
|
2210
|
-
layer_options = NULL
|
|
2211
|
-
|
|
2212
|
-
ogr_dataset_out[0] = ogr_dataset
|
|
2213
|
-
ogr_layer_out[0] = ogr_layer
|
|
2214
|
-
|
|
2215
|
-
return create_layer
|
|
2216
|
-
|
|
2217
|
-
|
|
2218
|
-
# TODO: set geometry and field data as memory views?
|
|
2219
|
-
def ogr_write(
|
|
2220
|
-
object path_or_fp,
|
|
2221
|
-
str layer,
|
|
2222
|
-
str driver,
|
|
2223
|
-
geometry,
|
|
2224
|
-
fields,
|
|
2225
|
-
field_data,
|
|
2226
|
-
field_mask,
|
|
2227
|
-
str crs,
|
|
2228
|
-
str geometry_type,
|
|
2229
|
-
str encoding,
|
|
2230
|
-
object dataset_kwargs,
|
|
2231
|
-
object layer_kwargs,
|
|
2232
|
-
bint promote_to_multi=False,
|
|
2233
|
-
bint nan_as_null=True,
|
|
2234
|
-
bint append=False,
|
|
2235
|
-
dataset_metadata=None,
|
|
2236
|
-
layer_metadata=None,
|
|
2237
|
-
gdal_tz_offsets=None
|
|
2238
|
-
):
|
|
2239
|
-
cdef OGRDataSourceH ogr_dataset = NULL
|
|
2240
|
-
cdef OGRLayerH ogr_layer = NULL
|
|
2241
|
-
cdef OGRFeatureH ogr_feature = NULL
|
|
2242
|
-
cdef OGRGeometryH ogr_geometry = NULL
|
|
2243
|
-
cdef OGRGeometryH ogr_geometry_multi = NULL
|
|
2244
|
-
cdef OGRFeatureDefnH ogr_featuredef = NULL
|
|
2245
|
-
cdef OGRFieldDefnH ogr_fielddef = NULL
|
|
2246
|
-
cdef unsigned char *wkb_buffer = NULL
|
|
2247
|
-
cdef int supports_transactions = 0
|
|
2248
|
-
cdef int err = 0
|
|
2249
|
-
cdef int i = 0
|
|
2250
|
-
cdef int num_records = -1
|
|
2251
|
-
cdef int num_field_data = len(field_data) if field_data is not None else 0
|
|
2252
|
-
cdef int num_fields = len(fields) if fields is not None else 0
|
|
2253
|
-
cdef bint is_vsi = False
|
|
2254
|
-
|
|
2255
|
-
if num_fields != num_field_data:
|
|
2256
|
-
raise ValueError("field_data array needs to be same length as fields array")
|
|
2257
|
-
|
|
2258
|
-
if num_fields == 0 and geometry is None:
|
|
2259
|
-
raise ValueError("You must provide at least a geometry column or a field")
|
|
2260
|
-
|
|
2261
|
-
if num_fields > 0:
|
|
2262
|
-
num_records = len(field_data[0])
|
|
2263
|
-
for i in range(1, len(field_data)):
|
|
2264
|
-
if len(field_data[i]) != num_records:
|
|
2265
|
-
raise ValueError("field_data arrays must be same length")
|
|
2266
|
-
|
|
2267
|
-
if geometry is None:
|
|
2268
|
-
# If no geometry data, we ignore the geometry_type and don't create a geometry
|
|
2269
|
-
# column
|
|
2270
|
-
geometry_type = None
|
|
2271
|
-
else:
|
|
2272
|
-
if num_fields > 0:
|
|
2273
|
-
if len(geometry) != num_records:
|
|
2274
|
-
raise ValueError(
|
|
2275
|
-
"field_data arrays must be same length as geometry array"
|
|
2276
|
-
)
|
|
2277
|
-
else:
|
|
2278
|
-
num_records = len(geometry)
|
|
2279
|
-
|
|
2280
|
-
if field_mask is not None:
|
|
2281
|
-
if len(field_data) != len(field_mask):
|
|
2282
|
-
raise ValueError("field_data and field_mask must be same length")
|
|
2283
|
-
for i in range(0, len(field_mask)):
|
|
2284
|
-
if field_mask[i] is not None and len(field_mask[i]) != num_records:
|
|
2285
|
-
raise ValueError("field_mask arrays must be same length as geometry array")
|
|
2286
|
-
else:
|
|
2287
|
-
field_mask = [None] * num_fields
|
|
2288
|
-
|
|
2289
|
-
if gdal_tz_offsets is None:
|
|
2290
|
-
gdal_tz_offsets = {}
|
|
2291
|
-
|
|
2292
|
-
try:
|
|
2293
|
-
# Setup in-memory handler if needed
|
|
2294
|
-
path = get_ogr_vsimem_write_path(path_or_fp, driver)
|
|
2295
|
-
is_vsi = path.startswith('/vsimem/')
|
|
2296
|
-
|
|
2297
|
-
# Setup dataset and layer
|
|
2298
|
-
layer_created = create_ogr_dataset_layer(
|
|
2299
|
-
path, is_vsi, layer, driver, crs, geometry_type, encoding,
|
|
2300
|
-
dataset_kwargs, layer_kwargs, append,
|
|
2301
|
-
dataset_metadata, layer_metadata,
|
|
2302
|
-
&ogr_dataset, &ogr_layer,
|
|
2303
|
-
)
|
|
2304
|
-
|
|
2305
|
-
if driver == 'ESRI Shapefile':
|
|
2306
|
-
# force encoding for remaining operations to be in UTF-8 (even if user
|
|
2307
|
-
# provides an encoding) because GDAL will automatically convert those to
|
|
2308
|
-
# the target encoding because ENCODING is set as a layer creation option
|
|
2309
|
-
encoding = "UTF-8"
|
|
2310
|
-
|
|
2311
|
-
else:
|
|
2312
|
-
# Now the dataset and layer have been created, we can properly determine the
|
|
2313
|
-
# encoding. It is derived from the user, from the dataset capabilities / type,
|
|
2314
|
-
# or from the system locale
|
|
2315
|
-
encoding = encoding or detect_encoding(ogr_dataset, ogr_layer)
|
|
2316
|
-
|
|
2317
|
-
### Create the fields
|
|
2318
|
-
field_types = None
|
|
2319
|
-
if num_fields > 0:
|
|
2320
|
-
field_types = infer_field_types([field.dtype for field in field_data])
|
|
2321
|
-
|
|
2322
|
-
if layer_created:
|
|
2323
|
-
for i in range(num_fields):
|
|
2324
|
-
field_type, field_subtype, width, precision = field_types[i]
|
|
2325
|
-
|
|
2326
|
-
name_b = fields[i].encode(encoding)
|
|
2327
|
-
try:
|
|
2328
|
-
ogr_fielddef = exc_wrap_pointer(OGR_Fld_Create(name_b, field_type))
|
|
2329
|
-
|
|
2330
|
-
# subtypes, see: https://gdal.org/development/rfc/rfc50_ogr_field_subtype.html
|
|
2331
|
-
if field_subtype != OFSTNone:
|
|
2332
|
-
OGR_Fld_SetSubType(ogr_fielddef, field_subtype)
|
|
2333
|
-
|
|
2334
|
-
if width:
|
|
2335
|
-
OGR_Fld_SetWidth(ogr_fielddef, width)
|
|
2336
|
-
|
|
2337
|
-
# TODO: set precision
|
|
2338
|
-
|
|
2339
|
-
exc_wrap_int(OGR_L_CreateField(ogr_layer, ogr_fielddef, 1))
|
|
2340
|
-
|
|
2341
|
-
except:
|
|
2342
|
-
raise FieldError(f"Error adding field '{fields[i]}' to layer") from None
|
|
2343
|
-
|
|
2344
|
-
finally:
|
|
2345
|
-
if ogr_fielddef != NULL:
|
|
2346
|
-
OGR_Fld_Destroy(ogr_fielddef)
|
|
2347
|
-
ogr_fielddef = NULL
|
|
2348
|
-
|
|
2349
|
-
|
|
2350
|
-
### Create the features
|
|
2351
|
-
ogr_featuredef = OGR_L_GetLayerDefn(ogr_layer)
|
|
2352
|
-
|
|
2353
|
-
supports_transactions = OGR_L_TestCapability(ogr_layer, OLCTransactions)
|
|
2354
|
-
if supports_transactions:
|
|
2355
|
-
start_transaction(ogr_dataset, 0)
|
|
2356
|
-
|
|
2357
|
-
for i in range(num_records):
|
|
2358
|
-
# create the feature
|
|
2359
|
-
ogr_feature = OGR_F_Create(ogr_featuredef)
|
|
2360
|
-
if ogr_feature == NULL:
|
|
2361
|
-
raise FeatureError(f"Could not create feature at index {i}") from None
|
|
2362
|
-
|
|
2363
|
-
# create the geometry based on specific WKB type (there might be mixed types in geometries)
|
|
2364
|
-
# TODO: geometry must not be null or errors
|
|
2365
|
-
wkb = None if geometry is None else geometry[i]
|
|
2366
|
-
if wkb is not None:
|
|
2367
|
-
wkbtype = <int>bytearray(wkb)[1]
|
|
2368
|
-
# may need to consider all 4 bytes: int.from_bytes(wkb[0][1:4], byteorder="little")
|
|
2369
|
-
# use "little" if the first byte == 1
|
|
2370
|
-
ogr_geometry = OGR_G_CreateGeometry(<OGRwkbGeometryType>wkbtype)
|
|
2371
|
-
if ogr_geometry == NULL:
|
|
2372
|
-
raise GeometryError(f"Could not create geometry at index {i} for WKB type {wkbtype}") from None
|
|
2373
|
-
|
|
2374
|
-
# import the WKB
|
|
2375
|
-
wkb_buffer = wkb
|
|
2376
|
-
err = OGR_G_ImportFromWkb(ogr_geometry, wkb_buffer, len(wkb))
|
|
2377
|
-
if err:
|
|
2378
|
-
raise GeometryError(f"Could not create geometry from WKB at index {i}") from None
|
|
2379
|
-
|
|
2380
|
-
# Convert to multi type
|
|
2381
|
-
if promote_to_multi:
|
|
2382
|
-
if wkbtype in (wkbPoint, wkbPoint25D, wkbPointM, wkbPointZM):
|
|
2383
|
-
ogr_geometry = OGR_G_ForceToMultiPoint(ogr_geometry)
|
|
2384
|
-
elif wkbtype in (wkbLineString, wkbLineString25D, wkbLineStringM, wkbLineStringZM):
|
|
2385
|
-
ogr_geometry = OGR_G_ForceToMultiLineString(ogr_geometry)
|
|
2386
|
-
elif wkbtype in (wkbPolygon, wkbPolygon25D, wkbPolygonM, wkbPolygonZM):
|
|
2387
|
-
ogr_geometry = OGR_G_ForceToMultiPolygon(ogr_geometry)
|
|
2388
|
-
|
|
2389
|
-
# Set the geometry on the feature
|
|
2390
|
-
# this assumes ownership of the geometry and it's cleanup
|
|
2391
|
-
err = OGR_F_SetGeometryDirectly(ogr_feature, ogr_geometry)
|
|
2392
|
-
ogr_geometry = NULL # to prevent cleanup after this point
|
|
2393
|
-
if err:
|
|
2394
|
-
raise GeometryError(f"Could not set geometry for feature at index {i}") from None
|
|
2395
|
-
|
|
2396
|
-
# Set field values
|
|
2397
|
-
for field_idx in range(num_fields):
|
|
2398
|
-
field_value = field_data[field_idx][i]
|
|
2399
|
-
field_type = field_types[field_idx][0]
|
|
2400
|
-
|
|
2401
|
-
mask = field_mask[field_idx]
|
|
2402
|
-
if mask is not None and mask[i]:
|
|
2403
|
-
OGR_F_SetFieldNull(ogr_feature, field_idx)
|
|
2404
|
-
|
|
2405
|
-
elif field_type == OFTString:
|
|
2406
|
-
if (
|
|
2407
|
-
field_value is None
|
|
2408
|
-
or (isinstance(field_value, float) and isnan(field_value))
|
|
2409
|
-
):
|
|
2410
|
-
OGR_F_SetFieldNull(ogr_feature, field_idx)
|
|
2411
|
-
|
|
2412
|
-
else:
|
|
2413
|
-
if not isinstance(field_value, str):
|
|
2414
|
-
field_value = str(field_value)
|
|
2415
|
-
|
|
2416
|
-
try:
|
|
2417
|
-
value_b = field_value.encode(encoding)
|
|
2418
|
-
OGR_F_SetFieldString(ogr_feature, field_idx, value_b)
|
|
2419
|
-
|
|
2420
|
-
except AttributeError:
|
|
2421
|
-
raise ValueError(f"Could not encode value '{field_value}' in field '{fields[field_idx]}' to string")
|
|
2422
|
-
|
|
2423
|
-
except Exception:
|
|
2424
|
-
raise
|
|
2425
|
-
|
|
2426
|
-
elif field_type == OFTInteger:
|
|
2427
|
-
OGR_F_SetFieldInteger(ogr_feature, field_idx, field_value)
|
|
2428
|
-
|
|
2429
|
-
elif field_type == OFTInteger64:
|
|
2430
|
-
OGR_F_SetFieldInteger64(ogr_feature, field_idx, field_value)
|
|
2431
|
-
|
|
2432
|
-
elif field_type == OFTReal:
|
|
2433
|
-
if nan_as_null and isnan(field_value):
|
|
2434
|
-
OGR_F_SetFieldNull(ogr_feature, field_idx)
|
|
2435
|
-
else:
|
|
2436
|
-
OGR_F_SetFieldDouble(ogr_feature, field_idx, field_value)
|
|
2437
|
-
|
|
2438
|
-
elif field_type == OFTDate:
|
|
2439
|
-
if np.isnat(field_value):
|
|
2440
|
-
OGR_F_SetFieldNull(ogr_feature, field_idx)
|
|
2441
|
-
else:
|
|
2442
|
-
datetime = field_value.item()
|
|
2443
|
-
OGR_F_SetFieldDateTimeEx(
|
|
2444
|
-
ogr_feature,
|
|
2445
|
-
field_idx,
|
|
2446
|
-
datetime.year,
|
|
2447
|
-
datetime.month,
|
|
2448
|
-
datetime.day,
|
|
2449
|
-
0,
|
|
2450
|
-
0,
|
|
2451
|
-
0.0,
|
|
2452
|
-
0
|
|
2453
|
-
)
|
|
2454
|
-
|
|
2455
|
-
elif field_type == OFTDateTime:
|
|
2456
|
-
if np.isnat(field_value):
|
|
2457
|
-
OGR_F_SetFieldNull(ogr_feature, field_idx)
|
|
2458
|
-
else:
|
|
2459
|
-
datetime = field_value.astype("datetime64[ms]").item()
|
|
2460
|
-
tz_array = gdal_tz_offsets.get(fields[field_idx], None)
|
|
2461
|
-
if tz_array is None:
|
|
2462
|
-
gdal_tz = 0
|
|
2463
|
-
else:
|
|
2464
|
-
gdal_tz = tz_array[i]
|
|
2465
|
-
OGR_F_SetFieldDateTimeEx(
|
|
2466
|
-
ogr_feature,
|
|
2467
|
-
field_idx,
|
|
2468
|
-
datetime.year,
|
|
2469
|
-
datetime.month,
|
|
2470
|
-
datetime.day,
|
|
2471
|
-
datetime.hour,
|
|
2472
|
-
datetime.minute,
|
|
2473
|
-
datetime.second + datetime.microsecond / 10**6,
|
|
2474
|
-
gdal_tz
|
|
2475
|
-
)
|
|
2476
|
-
|
|
2477
|
-
else:
|
|
2478
|
-
raise NotImplementedError(f"OGR field type is not supported for writing: {field_type}")
|
|
2479
|
-
|
|
2480
|
-
|
|
2481
|
-
# Add feature to the layer
|
|
2482
|
-
try:
|
|
2483
|
-
exc_wrap_int(OGR_L_CreateFeature(ogr_layer, ogr_feature))
|
|
2484
|
-
|
|
2485
|
-
except CPLE_BaseError as exc:
|
|
2486
|
-
raise FeatureError(f"Could not add feature to layer at index {i}: {exc}") from None
|
|
2487
|
-
|
|
2488
|
-
OGR_F_Destroy(ogr_feature)
|
|
2489
|
-
ogr_feature = NULL
|
|
2490
|
-
|
|
2491
|
-
|
|
2492
|
-
if supports_transactions:
|
|
2493
|
-
commit_transaction(ogr_dataset)
|
|
2494
|
-
|
|
2495
|
-
log.info(f"Created {num_records:,} records" )
|
|
2496
|
-
|
|
2497
|
-
# close dataset to force driver to flush data
|
|
2498
|
-
exc = ogr_close(ogr_dataset)
|
|
2499
|
-
ogr_dataset = NULL
|
|
2500
|
-
if exc:
|
|
2501
|
-
raise DataSourceError(f"Failed to write features to dataset {path}; {exc}")
|
|
2502
|
-
|
|
2503
|
-
# copy in-memory file back to path_or_fp object
|
|
2504
|
-
if is_vsi:
|
|
2505
|
-
read_vsimem_to_buffer(path, path_or_fp)
|
|
2506
|
-
|
|
2507
|
-
finally:
|
|
2508
|
-
### Final cleanup
|
|
2509
|
-
# make sure that all objects allocated above are released if exceptions
|
|
2510
|
-
# are raised, and the dataset is closed
|
|
2511
|
-
if ogr_fielddef != NULL:
|
|
2512
|
-
OGR_Fld_Destroy(ogr_fielddef)
|
|
2513
|
-
ogr_fielddef = NULL
|
|
2514
|
-
|
|
2515
|
-
if ogr_feature != NULL:
|
|
2516
|
-
OGR_F_Destroy(ogr_feature)
|
|
2517
|
-
ogr_feature = NULL
|
|
2518
|
-
|
|
2519
|
-
if ogr_geometry != NULL:
|
|
2520
|
-
OGR_G_DestroyGeometry(ogr_geometry)
|
|
2521
|
-
ogr_geometry = NULL
|
|
2522
|
-
|
|
2523
|
-
if ogr_dataset != NULL:
|
|
2524
|
-
ogr_close(ogr_dataset)
|
|
2525
|
-
|
|
2526
|
-
if is_vsi:
|
|
2527
|
-
delete_vsimem_file(path)
|
|
2528
|
-
|
|
2529
|
-
|
|
2530
|
-
def ogr_write_arrow(
|
|
2531
|
-
object path_or_fp,
|
|
2532
|
-
str layer,
|
|
2533
|
-
str driver,
|
|
2534
|
-
object arrow_obj,
|
|
2535
|
-
str crs,
|
|
2536
|
-
str geometry_type,
|
|
2537
|
-
str geometry_name,
|
|
2538
|
-
str encoding,
|
|
2539
|
-
object dataset_kwargs,
|
|
2540
|
-
object layer_kwargs,
|
|
2541
|
-
bint append=False,
|
|
2542
|
-
dataset_metadata=None,
|
|
2543
|
-
layer_metadata=None,
|
|
2544
|
-
):
|
|
2545
|
-
IF CTE_GDAL_VERSION < (3, 8, 0):
|
|
2546
|
-
raise RuntimeError("Need GDAL>=3.8 for Arrow write support")
|
|
2547
|
-
|
|
2548
|
-
cdef OGRDataSourceH ogr_dataset = NULL
|
|
2549
|
-
cdef OGRLayerH ogr_layer = NULL
|
|
2550
|
-
cdef char **options = NULL
|
|
2551
|
-
cdef bint is_vsi = False
|
|
2552
|
-
cdef ArrowArrayStream* stream = NULL
|
|
2553
|
-
cdef ArrowSchema schema
|
|
2554
|
-
cdef ArrowArray array
|
|
2555
|
-
|
|
2556
|
-
schema.release = NULL
|
|
2557
|
-
array.release = NULL
|
|
2558
|
-
|
|
2559
|
-
try:
|
|
2560
|
-
path = get_ogr_vsimem_write_path(path_or_fp, driver)
|
|
2561
|
-
is_vsi = path.startswith('/vsimem/')
|
|
2562
|
-
|
|
2563
|
-
layer_created = create_ogr_dataset_layer(
|
|
2564
|
-
path, is_vsi, layer, driver, crs, geometry_type, encoding,
|
|
2565
|
-
dataset_kwargs, layer_kwargs, append,
|
|
2566
|
-
dataset_metadata, layer_metadata,
|
|
2567
|
-
&ogr_dataset, &ogr_layer,
|
|
2568
|
-
)
|
|
2569
|
-
|
|
2570
|
-
# only shapefile supports non-UTF encoding because ENCODING option is set
|
|
2571
|
-
# during dataset creation and GDAL auto-translates from UTF-8 values to that
|
|
2572
|
-
# encoding
|
|
2573
|
-
if encoding and encoding.replace('-','').upper() != 'UTF8' and driver != 'ESRI Shapefile':
|
|
2574
|
-
raise ValueError("non-UTF-8 encoding is not supported for Arrow; use the non-Arrow interface instead")
|
|
2575
|
-
|
|
2576
|
-
if geometry_name:
|
|
2577
|
-
opts = {"GEOMETRY_NAME": geometry_name}
|
|
2578
|
-
else:
|
|
2579
|
-
opts = {}
|
|
2580
|
-
|
|
2581
|
-
options = dict_to_options(opts)
|
|
2582
|
-
|
|
2583
|
-
stream_capsule = arrow_obj.__arrow_c_stream__()
|
|
2584
|
-
stream = <ArrowArrayStream*>PyCapsule_GetPointer(
|
|
2585
|
-
stream_capsule, "arrow_array_stream"
|
|
2586
|
-
)
|
|
2587
|
-
|
|
2588
|
-
if stream == NULL:
|
|
2589
|
-
raise RuntimeError("Could not extract valid Arrow array stream.")
|
|
2590
|
-
|
|
2591
|
-
if stream.release == NULL:
|
|
2592
|
-
raise RuntimeError("Arrow array stream was already released.")
|
|
2593
|
-
|
|
2594
|
-
if stream.get_schema(stream, &schema) != 0:
|
|
2595
|
-
raise RuntimeError("Could not get Arrow schema from stream.")
|
|
2596
|
-
|
|
2597
|
-
if layer_created:
|
|
2598
|
-
create_fields_from_arrow_schema(ogr_layer, &schema, options, geometry_name)
|
|
2599
|
-
|
|
2600
|
-
while True:
|
|
2601
|
-
if stream.get_next(stream, &array) != 0:
|
|
2602
|
-
raise RuntimeError("Error while accessing batch from stream.")
|
|
2603
|
-
|
|
2604
|
-
# We've reached the end of the stream
|
|
2605
|
-
if array.release == NULL:
|
|
2606
|
-
break
|
|
2607
|
-
|
|
2608
|
-
if not OGR_L_WriteArrowBatch(ogr_layer, &schema, &array, options):
|
|
2609
|
-
exc = exc_check()
|
|
2610
|
-
gdal_msg = f": {str(exc)}" if exc else "."
|
|
2611
|
-
raise DataLayerError(
|
|
2612
|
-
f"Error while writing batch to OGR layer{gdal_msg}"
|
|
2613
|
-
)
|
|
2614
|
-
|
|
2615
|
-
if array.release != NULL:
|
|
2616
|
-
array.release(&array)
|
|
2617
|
-
|
|
2618
|
-
# close dataset to force driver to flush data
|
|
2619
|
-
exc = ogr_close(ogr_dataset)
|
|
2620
|
-
ogr_dataset = NULL
|
|
2621
|
-
if exc:
|
|
2622
|
-
raise DataSourceError(f"Failed to write features to dataset {path}; {exc}")
|
|
2623
|
-
|
|
2624
|
-
# copy in-memory file back to path_or_fp object
|
|
2625
|
-
if is_vsi:
|
|
2626
|
-
read_vsimem_to_buffer(path, path_or_fp)
|
|
2627
|
-
|
|
2628
|
-
finally:
|
|
2629
|
-
if stream != NULL and stream.release != NULL:
|
|
2630
|
-
stream.release(stream)
|
|
2631
|
-
|
|
2632
|
-
if schema.release != NULL:
|
|
2633
|
-
schema.release(&schema)
|
|
2634
|
-
|
|
2635
|
-
if array.release != NULL:
|
|
2636
|
-
array.release(&array)
|
|
2637
|
-
|
|
2638
|
-
if options != NULL:
|
|
2639
|
-
CSLDestroy(options)
|
|
2640
|
-
options = NULL
|
|
2641
|
-
|
|
2642
|
-
if ogr_dataset != NULL:
|
|
2643
|
-
ogr_close(ogr_dataset)
|
|
2644
|
-
|
|
2645
|
-
if is_vsi:
|
|
2646
|
-
delete_vsimem_file(path)
|
|
2647
|
-
|
|
2648
|
-
|
|
2649
|
-
cdef get_arrow_extension_metadata(const ArrowSchema* schema):
|
|
2650
|
-
"""
|
|
2651
|
-
Parse the metadata of the ArrowSchema and extract extension type
|
|
2652
|
-
metadata (extension name and metadata).
|
|
2653
|
-
|
|
2654
|
-
For the exact layout of the bytes, see
|
|
2655
|
-
https://arrow.apache.org/docs/dev/format/CDataInterface.html#c.ArrowSchema.metadata
|
|
2656
|
-
"""
|
|
2657
|
-
cdef const char *metadata = schema.metadata
|
|
2658
|
-
|
|
2659
|
-
extension_name = None
|
|
2660
|
-
extension_metadata = None
|
|
2661
|
-
|
|
2662
|
-
if metadata == NULL:
|
|
2663
|
-
return extension_name, extension_metadata
|
|
2664
|
-
|
|
2665
|
-
# the number of metadata key/value pairs is stored
|
|
2666
|
-
# as an int32 value in the first 4 bytes
|
|
2667
|
-
n = int.from_bytes(metadata[:4], byteorder=sys.byteorder)
|
|
2668
|
-
pos = 4
|
|
2669
|
-
|
|
2670
|
-
for i in range(n):
|
|
2671
|
-
# for each metadata key/value pair, the first 4 bytes is the byte length
|
|
2672
|
-
# of the key as an int32, then follows the key (not null-terminated),
|
|
2673
|
-
# and then the same for the value length and bytes
|
|
2674
|
-
key_length = int.from_bytes(
|
|
2675
|
-
metadata[pos:pos+4], byteorder=sys.byteorder, signed=True
|
|
2676
|
-
)
|
|
2677
|
-
pos += 4
|
|
2678
|
-
key = metadata[pos:pos+key_length]
|
|
2679
|
-
pos += key_length
|
|
2680
|
-
value_length = int.from_bytes(
|
|
2681
|
-
metadata[pos:pos+4], byteorder=sys.byteorder, signed=True
|
|
2682
|
-
)
|
|
2683
|
-
pos += 4
|
|
2684
|
-
value = metadata[pos:pos+value_length]
|
|
2685
|
-
pos += value_length
|
|
2686
|
-
|
|
2687
|
-
if key == b"ARROW:extension:name":
|
|
2688
|
-
extension_name = value
|
|
2689
|
-
elif key == b"ARROW:extension:metadata":
|
|
2690
|
-
extension_metadata = value
|
|
2691
|
-
|
|
2692
|
-
if extension_name is not None and extension_metadata is not None:
|
|
2693
|
-
break
|
|
2694
|
-
|
|
2695
|
-
return extension_name, extension_metadata
|
|
2696
|
-
|
|
2697
|
-
|
|
2698
|
-
cdef is_arrow_geometry_field(const ArrowSchema* schema):
|
|
2699
|
-
name, _ = get_arrow_extension_metadata(schema)
|
|
2700
|
-
if name is not None:
|
|
2701
|
-
if name == b"geoarrow.wkb" or name == b"ogc.wkb":
|
|
2702
|
-
return True
|
|
2703
|
-
|
|
2704
|
-
# raise an error for other geoarrow types
|
|
2705
|
-
if name.startswith(b"geoarrow."):
|
|
2706
|
-
raise NotImplementedError(
|
|
2707
|
-
f"Writing a geometry column of type {name.decode()} is not yet "
|
|
2708
|
-
"supported. Only WKB is currently supported ('geoarrow.wkb' or "
|
|
2709
|
-
"'ogc.wkb' types)."
|
|
2710
|
-
)
|
|
2711
|
-
|
|
2712
|
-
return False
|
|
2713
|
-
|
|
2714
|
-
|
|
2715
|
-
cdef create_fields_from_arrow_schema(
|
|
2716
|
-
OGRLayerH destLayer, const ArrowSchema* schema, char** options, str geometry_name
|
|
2717
|
-
):
|
|
2718
|
-
"""Create output fields using CreateFieldFromArrowSchema()"""
|
|
2719
|
-
|
|
2720
|
-
IF CTE_GDAL_VERSION < (3, 8, 0):
|
|
2721
|
-
raise RuntimeError("Need GDAL>=3.8 for Arrow write support")
|
|
2722
|
-
|
|
2723
|
-
# The schema object is a struct type where each child is a column.
|
|
2724
|
-
cdef ArrowSchema* child
|
|
2725
|
-
for i in range(schema.n_children):
|
|
2726
|
-
child = schema.children[i]
|
|
2727
|
-
|
|
2728
|
-
if child == NULL:
|
|
2729
|
-
raise RuntimeError("Received invalid Arrow schema (null child)")
|
|
2730
|
-
|
|
2731
|
-
# Don't create property for geometry column
|
|
2732
|
-
if get_string(child.name) == geometry_name or is_arrow_geometry_field(child):
|
|
2733
|
-
continue
|
|
2734
|
-
|
|
2735
|
-
if not OGR_L_CreateFieldFromArrowSchema(destLayer, child, options):
|
|
2736
|
-
exc = exc_check()
|
|
2737
|
-
gdal_msg = f" ({str(exc)})" if exc else ""
|
|
2738
|
-
raise FieldError(
|
|
2739
|
-
f"Error while creating field from Arrow for field {i} with name "
|
|
2740
|
-
f"'{get_string(child.name)}' and type {get_string(child.format)}"
|
|
2741
|
-
f"{gdal_msg}."
|
|
2742
|
-
)
|