pyogrio 0.9.0__cp312-cp312-win_amd64.whl → 0.11.0__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyogrio might be problematic. Click here for more details.

Files changed (125) hide show
  1. pyogrio/__init__.py +35 -28
  2. pyogrio/_compat.py +15 -1
  3. pyogrio/_env.py +4 -6
  4. pyogrio/_err.c +8941 -3106
  5. pyogrio/_err.cp312-win_amd64.pyd +0 -0
  6. pyogrio/_geometry.c +1008 -807
  7. pyogrio/_geometry.cp312-win_amd64.pyd +0 -0
  8. pyogrio/_io.c +10678 -9440
  9. pyogrio/_io.cp312-win_amd64.pyd +0 -0
  10. pyogrio/_ogr.c +1950 -1873
  11. pyogrio/_ogr.cp312-win_amd64.pyd +0 -0
  12. pyogrio/_version.py +3 -3
  13. pyogrio/_vsi.c +7558 -2509
  14. pyogrio/_vsi.cp312-win_amd64.pyd +0 -0
  15. pyogrio/core.py +86 -20
  16. pyogrio/errors.py +9 -16
  17. pyogrio/gdal_data/GDAL-targets.cmake +2 -2
  18. pyogrio/gdal_data/GDALConfig.cmake +0 -1
  19. pyogrio/gdal_data/GDALConfigVersion.cmake +3 -3
  20. pyogrio/gdal_data/MM_m_idofic.csv +321 -0
  21. pyogrio/gdal_data/gdalinfo_output.schema.json +3 -3
  22. pyogrio/gdal_data/gdaltileindex.xsd +253 -0
  23. pyogrio/gdal_data/gdalvrt.xsd +178 -63
  24. pyogrio/gdal_data/nitf_spec.xml +1 -17
  25. pyogrio/gdal_data/nitf_spec.xsd +1 -17
  26. pyogrio/gdal_data/ogrinfo_output.schema.json +23 -0
  27. pyogrio/gdal_data/ogrvrt.xsd +4 -17
  28. pyogrio/gdal_data/osmconf.ini +3 -1
  29. pyogrio/gdal_data/pci_datum.txt +222 -155
  30. pyogrio/gdal_data/pci_ellips.txt +90 -38
  31. pyogrio/gdal_data/pdfcomposition.xsd +1 -17
  32. pyogrio/gdal_data/vcpkg.spdx.json +29 -24
  33. pyogrio/gdal_data/vcpkg_abi_info.txt +32 -31
  34. pyogrio/gdal_data/vdv452.xml +1 -17
  35. pyogrio/gdal_data/vdv452.xsd +1 -17
  36. pyogrio/geopandas.py +122 -66
  37. pyogrio/proj_data/ITRF2014 +1 -1
  38. pyogrio/proj_data/ITRF2020 +91 -0
  39. pyogrio/proj_data/proj-config-version.cmake +2 -2
  40. pyogrio/proj_data/proj-config.cmake +1 -1
  41. pyogrio/proj_data/proj-targets-release.cmake +3 -2
  42. pyogrio/proj_data/proj-targets.cmake +2 -2
  43. pyogrio/proj_data/proj.db +0 -0
  44. pyogrio/proj_data/proj.ini +11 -3
  45. pyogrio/proj_data/proj4-targets-release.cmake +3 -2
  46. pyogrio/proj_data/proj4-targets.cmake +2 -2
  47. pyogrio/proj_data/projjson.schema.json +1 -1
  48. pyogrio/proj_data/usage +7 -2
  49. pyogrio/proj_data/vcpkg.spdx.json +26 -21
  50. pyogrio/proj_data/vcpkg_abi_info.txt +19 -18
  51. pyogrio/raw.py +46 -30
  52. pyogrio/tests/conftest.py +214 -12
  53. pyogrio/tests/fixtures/README.md +32 -13
  54. pyogrio/tests/fixtures/curve.gpkg +0 -0
  55. pyogrio/tests/fixtures/{test_multisurface.gpkg → curvepolygon.gpkg} +0 -0
  56. pyogrio/tests/fixtures/line_zm.gpkg +0 -0
  57. pyogrio/tests/fixtures/multisurface.gpkg +0 -0
  58. pyogrio/tests/test_arrow.py +181 -24
  59. pyogrio/tests/test_core.py +170 -76
  60. pyogrio/tests/test_geopandas_io.py +483 -135
  61. pyogrio/tests/test_path.py +39 -17
  62. pyogrio/tests/test_raw_io.py +170 -55
  63. pyogrio/tests/test_util.py +56 -0
  64. pyogrio/util.py +69 -32
  65. pyogrio-0.11.0.dist-info/DELVEWHEEL +2 -0
  66. pyogrio-0.11.0.dist-info/METADATA +124 -0
  67. {pyogrio-0.9.0.dist-info → pyogrio-0.11.0.dist-info}/RECORD +90 -102
  68. {pyogrio-0.9.0.dist-info → pyogrio-0.11.0.dist-info}/WHEEL +1 -1
  69. {pyogrio-0.9.0.dist-info → pyogrio-0.11.0.dist-info/licenses}/LICENSE +1 -1
  70. pyogrio.libs/{Lerc-5e4d8cbeeabca06f95e2270792304dc3.dll → Lerc-0a4c85fb364eca6153da109568898e6c.dll} +0 -0
  71. pyogrio.libs/{gdal-b434963605a006e01c486c0df6dea4e0.dll → gdal-34e3e080255b205ef706390e097fa4dc.dll} +0 -0
  72. pyogrio.libs/geos-d8f20037634583c2efcd6ea1f4153169.dll +0 -0
  73. pyogrio.libs/geos_c-046e8885887192fced9516bda554471d.dll +0 -0
  74. pyogrio.libs/geotiff-1f2f76a5d30685a8e0497d9dbf8a79cf.dll +0 -0
  75. pyogrio.libs/{iconv-2-8fcc23ddc6f096c45871011b6e008b44.dll → iconv-2-4b71ebbdf6834234e0c64cb2439b77cf.dll} +0 -0
  76. pyogrio.libs/{jpeg62-2f9b7af22d78338e8f0be0058503dc35.dll → jpeg62-bf2a75b1f2695748cd86238ddb19c65b.dll} +0 -0
  77. pyogrio.libs/json-c-4bc6781090e73c9b22d8dc057618a277.dll +0 -0
  78. pyogrio.libs/libcrypto-3-x64-c8f1692ed45cf55faecce2c448056b2e.dll +0 -0
  79. pyogrio.libs/libcurl-f3604410cd467a44d927794ebdce81b8.dll +0 -0
  80. pyogrio.libs/libexpat-385074fd54deb4b640baafa42cbb3146.dll +0 -0
  81. pyogrio.libs/liblzma-8968f0bfd463b7fe612b20d07adc3c1e.dll +0 -0
  82. pyogrio.libs/libpng16-44105208fe941d03e9f0c17bdbb952f7.dll +0 -0
  83. pyogrio.libs/libssl-3-x64-58c364315f431ab1f0e48b311b8cf105.dll +0 -0
  84. pyogrio.libs/msvcp140-99aa35e2033bb8d388bc35c8a68b77e3.dll +0 -0
  85. pyogrio.libs/proj_9-ee59474f99643c112eb02aa34a910237.dll +0 -0
  86. pyogrio.libs/{qhull_r-c45abde5d0c92faf723cc2942138af77.dll → qhull_r-eaac2f11a3d8241f082e54447c7504d7.dll} +0 -0
  87. pyogrio.libs/sqlite3-dc748e3452944fd41001abacdd783569.dll +0 -0
  88. pyogrio.libs/tiff-c409ddbe87b39639b83fee50d4aea318.dll +0 -0
  89. pyogrio.libs/{zlib1-e1272810861a13dd8d6cff3beac47f17.dll → zlib1-094085b7b78666197dcc8e1fce2d835d.dll} +0 -0
  90. pyogrio/_err.pxd +0 -4
  91. pyogrio/_err.pyx +0 -250
  92. pyogrio/_geometry.pxd +0 -4
  93. pyogrio/_geometry.pyx +0 -129
  94. pyogrio/_io.pxd +0 -0
  95. pyogrio/_io.pyx +0 -2742
  96. pyogrio/_ogr.pxd +0 -444
  97. pyogrio/_ogr.pyx +0 -346
  98. pyogrio/_vsi.pxd +0 -4
  99. pyogrio/_vsi.pyx +0 -140
  100. pyogrio/arrow_bridge.h +0 -115
  101. pyogrio/gdal_data/bag_template.xml +0 -201
  102. pyogrio/gdal_data/gmlasconf.xml +0 -169
  103. pyogrio/gdal_data/gmlasconf.xsd +0 -1066
  104. pyogrio/gdal_data/netcdf_config.xsd +0 -143
  105. pyogrio/tests/fixtures/poly_not_enough_points.shp.zip +0 -0
  106. pyogrio/tests/fixtures/test_datetime.geojson +0 -7
  107. pyogrio/tests/fixtures/test_datetime_tz.geojson +0 -8
  108. pyogrio/tests/fixtures/test_fgdb.gdb.zip +0 -0
  109. pyogrio/tests/fixtures/test_nested.geojson +0 -18
  110. pyogrio/tests/fixtures/test_ogr_types_list.geojson +0 -12
  111. pyogrio-0.9.0.dist-info/DELVEWHEEL +0 -2
  112. pyogrio-0.9.0.dist-info/METADATA +0 -100
  113. pyogrio.libs/geos-f0622d0794b81c937a851b2e6fa9b712.dll +0 -0
  114. pyogrio.libs/geos_c-0e16bf70612fc3301d077b9d863a3fdb.dll +0 -0
  115. pyogrio.libs/geotiff-772e7c705fb15ddf91b432adb4eb1f6c.dll +0 -0
  116. pyogrio.libs/json-c-e52a077545e4057de42beb4948289b41.dll +0 -0
  117. pyogrio.libs/libcurl-bc81cd8afe15b10c0821b181b6af8bd0.dll +0 -0
  118. pyogrio.libs/libexpat-fbe03ca8917dfda776562d4338b289b8.dll +0 -0
  119. pyogrio.libs/liblzma-6b36f24d54d3dd45f274a2aebef81085.dll +0 -0
  120. pyogrio.libs/libpng16-13928571ad910705eae8d7dd8eef8b11.dll +0 -0
  121. pyogrio.libs/msvcp140-46db46e967c8db2cb7a20fc75872a57e.dll +0 -0
  122. pyogrio.libs/proj-8a30239ef2dfc3b9dd2bb48e8abb330f.dll +0 -0
  123. pyogrio.libs/sqlite3-df30c3cf230727e23c43c40126a530f7.dll +0 -0
  124. pyogrio.libs/tiff-43630f30487a9015213475ae86ed3fa3.dll +0 -0
  125. {pyogrio-0.9.0.dist-info → pyogrio-0.11.0.dist-info}/top_level.txt +0 -0
pyogrio/_io.pyx DELETED
@@ -1,2742 +0,0 @@
1
- #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
2
-
3
- """IO support for OGR vector data sources
4
- """
5
-
6
-
7
- import contextlib
8
- import datetime
9
- import locale
10
- import logging
11
- import math
12
- import os
13
- import sys
14
- import warnings
15
-
16
- from libc.stdint cimport uint8_t, uintptr_t
17
- from libc.stdlib cimport malloc, free
18
- from libc.string cimport strlen
19
- from libc.math cimport isnan
20
- from cpython.pycapsule cimport PyCapsule_GetPointer
21
-
22
- cimport cython
23
- from cpython.pycapsule cimport PyCapsule_New, PyCapsule_GetPointer
24
-
25
- import numpy as np
26
-
27
- from pyogrio._ogr cimport *
28
- from pyogrio._err cimport *
29
- from pyogrio._vsi cimport *
30
- from pyogrio._err import CPLE_BaseError, CPLE_NotSupportedError, NullPointerError
31
- from pyogrio._geometry cimport get_geometry_type, get_geometry_type_code
32
- from pyogrio.errors import CRSError, DataSourceError, DataLayerError, GeometryError, FieldError, FeatureError
33
-
34
- log = logging.getLogger(__name__)
35
-
36
-
37
- # Mapping of OGR integer field types to Python field type names
38
- # (index in array is the integer field type)
39
- FIELD_TYPES = [
40
- 'int32', # OFTInteger, Simple 32bit integer
41
- None, # OFTIntegerList, List of 32bit integers, not supported
42
- 'float64', # OFTReal, Double Precision floating point
43
- None, # OFTRealList, List of doubles, not supported
44
- 'object', # OFTString, String of UTF-8 chars
45
- None, # OFTStringList, Array of strings, not supported
46
- None, # OFTWideString, deprecated, not supported
47
- None, # OFTWideStringList, deprecated, not supported
48
- 'object', # OFTBinary, Raw Binary data
49
- 'datetime64[D]', # OFTDate, Date
50
- None, # OFTTime, Time, NOTE: not directly supported in numpy
51
- 'datetime64[ms]',# OFTDateTime, Date and Time
52
- 'int64', # OFTInteger64, Single 64bit integer
53
- None # OFTInteger64List, List of 64bit integers, not supported
54
- ]
55
-
56
- FIELD_SUBTYPES = {
57
- OFSTNone: None, # No subtype
58
- OFSTBoolean: "bool", # Boolean integer
59
- OFSTInt16: "int16", # Signed 16-bit integer
60
- OFSTFloat32: "float32", # Single precision (32 bit) floating point
61
- }
62
-
63
- # Mapping of numpy ndarray dtypes to (field type, subtype)
64
- DTYPE_OGR_FIELD_TYPES = {
65
- 'int8': (OFTInteger, OFSTInt16),
66
- 'int16': (OFTInteger, OFSTInt16),
67
- 'int32': (OFTInteger, OFSTNone),
68
- 'int': (OFTInteger64, OFSTNone),
69
- 'int64': (OFTInteger64, OFSTNone),
70
- # unsigned ints have to be converted to ints; these are converted
71
- # to the next largest integer size
72
- 'uint8': (OFTInteger, OFSTInt16),
73
- 'uint16': (OFTInteger, OFSTNone),
74
- 'uint32': (OFTInteger64, OFSTNone),
75
- # TODO: these might get truncated, check maximum value and raise error
76
- 'uint': (OFTInteger64, OFSTNone),
77
- 'uint64': (OFTInteger64, OFSTNone),
78
-
79
- # bool is handled as integer with boolean subtype
80
- 'bool': (OFTInteger, OFSTBoolean),
81
-
82
- 'float32': (OFTReal,OFSTFloat32),
83
- 'float': (OFTReal, OFSTNone),
84
- 'float64': (OFTReal, OFSTNone),
85
-
86
- 'datetime64[D]': (OFTDate, OFSTNone),
87
- 'datetime64': (OFTDateTime, OFSTNone),
88
- }
89
-
90
-
91
- cdef int start_transaction(OGRDataSourceH ogr_dataset, int force) except 1:
92
- cdef int err = GDALDatasetStartTransaction(ogr_dataset, force)
93
- if err == OGRERR_FAILURE:
94
- raise DataSourceError("Failed to start transaction")
95
-
96
- return 0
97
-
98
-
99
- cdef int commit_transaction(OGRDataSourceH ogr_dataset) except 1:
100
- cdef int err = GDALDatasetCommitTransaction(ogr_dataset)
101
- if err == OGRERR_FAILURE:
102
- raise DataSourceError("Failed to commit transaction")
103
-
104
- return 0
105
-
106
-
107
- # Not currently used; uncomment when used
108
- # cdef int rollback_transaction(OGRDataSourceH ogr_dataset) except 1:
109
- # cdef int err = GDALDatasetRollbackTransaction(ogr_dataset)
110
- # if err == OGRERR_FAILURE:
111
- # raise DataSourceError("Failed to rollback transaction")
112
-
113
- # return 0
114
-
115
-
116
- cdef char** dict_to_options(object values):
117
- """Convert a python dictionary into name / value pairs (stored in a char**)
118
-
119
- Parameters
120
- ----------
121
- values: dict
122
- all keys and values must be strings
123
-
124
- Returns
125
- -------
126
- char**
127
- """
128
- cdef char **options = NULL
129
-
130
- if values is None:
131
- return NULL
132
-
133
- for k, v in values.items():
134
- k = k.encode('UTF-8')
135
- v = v.encode('UTF-8')
136
- options = CSLAddNameValue(options, <const char *>k, <const char *>v)
137
-
138
- return options
139
-
140
-
141
- cdef const char* override_threadlocal_config_option(str key, str value):
142
- """Set the CPLSetThreadLocalConfigOption for key=value
143
-
144
- Parameters
145
- ----------
146
- key : str
147
- value : str
148
-
149
- Returns
150
- -------
151
- const char*
152
- value previously set for key, so that it can be later restored. Caller
153
- is responsible for freeing this via CPLFree() if not NULL.
154
- """
155
-
156
- key_b = key.encode("UTF-8")
157
- cdef const char* key_c = key_b
158
-
159
- value_b = value.encode("UTF-8")
160
- cdef const char* value_c = value_b
161
-
162
-
163
- cdef const char *prev_value = CPLGetThreadLocalConfigOption(key_c, NULL)
164
- if prev_value != NULL:
165
- # strings returned from config options may be replaced via
166
- # CPLSetConfigOption() below; GDAL instructs us to save a copy
167
- # in a new string
168
- prev_value = CPLStrdup(prev_value)
169
-
170
- CPLSetThreadLocalConfigOption(key_c, value_c)
171
-
172
- return prev_value
173
-
174
-
175
- cdef void* ogr_open(const char* path_c, int mode, char** options) except NULL:
176
- """Open an existing OGR data source
177
-
178
- Parameters
179
- ----------
180
- path_c : char *
181
- input path, including an in-memory path (/vsimem/...)
182
- mode : int
183
- set to 1 to allow updating data source
184
- options : char **, optional
185
- dataset open options
186
- """
187
- cdef void* ogr_dataset = NULL
188
-
189
- # Force linear approximations in all cases
190
- OGRSetNonLinearGeometriesEnabledFlag(0)
191
-
192
- flags = GDAL_OF_VECTOR | GDAL_OF_VERBOSE_ERROR
193
- if mode == 1:
194
- flags |= GDAL_OF_UPDATE
195
- else:
196
- flags |= GDAL_OF_READONLY
197
-
198
-
199
- try:
200
- # WARNING: GDAL logs warnings about invalid open options to stderr
201
- # instead of raising an error
202
- ogr_dataset = exc_wrap_pointer(
203
- GDALOpenEx(path_c, flags, NULL, <const char *const *>options, NULL)
204
- )
205
-
206
- return ogr_dataset
207
-
208
- except NullPointerError:
209
- raise DataSourceError(
210
- "Failed to open dataset (mode={}): {}".format(mode, path_c.decode("utf-8"))
211
- ) from None
212
-
213
- except CPLE_BaseError as exc:
214
- if str(exc).endswith("a supported file format."):
215
- raise DataSourceError(
216
- f"{str(exc)} It might help to specify the correct driver explicitly by "
217
- "prefixing the file path with '<DRIVER>:', e.g. 'CSV:path'."
218
- ) from None
219
- raise DataSourceError(str(exc)) from None
220
-
221
-
222
- cdef ogr_close(GDALDatasetH ogr_dataset):
223
- """Close the dataset and raise exception if that fails.
224
- NOTE: some drivers only raise errors on write when calling GDALClose()
225
- """
226
- if ogr_dataset != NULL:
227
- IF CTE_GDAL_VERSION >= (3, 7, 0):
228
- if GDALClose(ogr_dataset) != CE_None:
229
- return exc_check()
230
-
231
- return
232
-
233
- ELSE:
234
- GDALClose(ogr_dataset)
235
-
236
- # GDAL will set an error if there was an error writing the data source
237
- # on close
238
- return exc_check()
239
-
240
-
241
- cdef OGRLayerH get_ogr_layer(GDALDatasetH ogr_dataset, layer) except NULL:
242
- """Open OGR layer by index or name.
243
-
244
- Parameters
245
- ----------
246
- ogr_dataset : pointer to open OGR dataset
247
- layer : str or int
248
- name or index of layer
249
-
250
- Returns
251
- -------
252
- pointer to OGR layer
253
- """
254
- cdef OGRLayerH ogr_layer = NULL
255
-
256
- try:
257
- if isinstance(layer, str):
258
- name_b = layer.encode('utf-8')
259
- name_c = name_b
260
- ogr_layer = exc_wrap_pointer(GDALDatasetGetLayerByName(ogr_dataset, name_c))
261
-
262
- elif isinstance(layer, int):
263
- ogr_layer = exc_wrap_pointer(GDALDatasetGetLayer(ogr_dataset, layer))
264
-
265
- # GDAL does not always raise exception messages in this case
266
- except NullPointerError:
267
- raise DataLayerError(f"Layer '{layer}' could not be opened") from None
268
-
269
- except CPLE_BaseError as exc:
270
- raise DataLayerError(str(exc))
271
-
272
- # if the driver is OSM, we need to execute SQL to set the layer to read in
273
- # order to read it properly
274
- if get_driver(ogr_dataset) == "OSM":
275
- # Note: this returns NULL and does not need to be freed via
276
- # GDALDatasetReleaseResultSet()
277
- layer_name = get_string(OGR_L_GetName(ogr_layer))
278
- sql_b = f"SET interest_layers = {layer_name}".encode('utf-8')
279
- sql_c = sql_b
280
-
281
- GDALDatasetExecuteSQL(ogr_dataset, sql_c, NULL, NULL)
282
-
283
- return ogr_layer
284
-
285
-
286
- cdef OGRLayerH execute_sql(GDALDatasetH ogr_dataset, str sql, str sql_dialect=None) except NULL:
287
- """Execute an SQL statement on a dataset.
288
-
289
- Parameters
290
- ----------
291
- ogr_dataset : pointer to open OGR dataset
292
- sql : str
293
- The sql statement to execute
294
- sql_dialect : str, optional (default: None)
295
- The sql dialect the sql statement is written in
296
-
297
- Returns
298
- -------
299
- pointer to OGR layer
300
- """
301
-
302
- try:
303
- sql_b = sql.encode('utf-8')
304
- sql_c = sql_b
305
- if sql_dialect is None:
306
- return exc_wrap_pointer(GDALDatasetExecuteSQL(ogr_dataset, sql_c, NULL, NULL))
307
-
308
- sql_dialect_b = sql_dialect.encode('utf-8')
309
- sql_dialect_c = sql_dialect_b
310
- return exc_wrap_pointer(GDALDatasetExecuteSQL(ogr_dataset, sql_c, NULL, sql_dialect_c))
311
-
312
- # GDAL does not always raise exception messages in this case
313
- except NullPointerError:
314
- raise DataLayerError(f"Error executing sql '{sql}'") from None
315
-
316
- except CPLE_BaseError as exc:
317
- raise DataLayerError(str(exc))
318
-
319
-
320
- cdef str get_crs(OGRLayerH ogr_layer):
321
- """Read CRS from layer as EPSG:<code> if available or WKT.
322
-
323
- Parameters
324
- ----------
325
- ogr_layer : pointer to open OGR layer
326
-
327
- Returns
328
- -------
329
- str or None
330
- EPSG:<code> or WKT
331
- """
332
- cdef void *ogr_crs = NULL
333
- cdef const char *authority_key = NULL
334
- cdef const char *authority_val = NULL
335
- cdef char *ogr_wkt = NULL
336
-
337
- try:
338
- ogr_crs = exc_wrap_pointer(OGR_L_GetSpatialRef(ogr_layer))
339
-
340
- except NullPointerError:
341
- # No coordinate system defined.
342
- # This is expected and valid for nonspatial tables.
343
- return None
344
-
345
- except CPLE_BaseError as exc:
346
- raise CRSError(str(exc))
347
-
348
- # If CRS can be decoded to an EPSG code, use that.
349
- # The following pointers will be NULL if it cannot be decoded.
350
- retval = OSRAutoIdentifyEPSG(ogr_crs)
351
- authority_key = <const char *>OSRGetAuthorityName(ogr_crs, NULL)
352
- authority_val = <const char *>OSRGetAuthorityCode(ogr_crs, NULL)
353
-
354
- if authority_key != NULL and authority_val != NULL:
355
- key = get_string(authority_key)
356
- if key == 'EPSG':
357
- value = get_string(authority_val)
358
- return f"EPSG:{value}"
359
-
360
- try:
361
- OSRExportToWkt(ogr_crs, &ogr_wkt)
362
- if ogr_wkt == NULL:
363
- raise CRSError("CRS could not be extracted as WKT") from None
364
-
365
- wkt = get_string(ogr_wkt)
366
-
367
- finally:
368
- CPLFree(ogr_wkt)
369
- return wkt
370
-
371
-
372
- cdef get_driver(OGRDataSourceH ogr_dataset):
373
- """Get the driver for a dataset.
374
-
375
- Parameters
376
- ----------
377
- ogr_dataset : pointer to open OGR dataset
378
- Returns
379
- -------
380
- str or None
381
- """
382
- cdef void *ogr_driver
383
-
384
- try:
385
- ogr_driver = exc_wrap_pointer(GDALGetDatasetDriver(ogr_dataset))
386
-
387
- except NullPointerError:
388
- raise DataLayerError(f"Could not detect driver of dataset") from None
389
-
390
- except CPLE_BaseError as exc:
391
- raise DataLayerError(str(exc))
392
-
393
- driver = OGR_Dr_GetName(ogr_driver).decode("UTF-8")
394
- return driver
395
-
396
-
397
- cdef get_feature_count(OGRLayerH ogr_layer, int force):
398
- """Get the feature count of a layer.
399
-
400
- If GDAL returns an unknown count (-1), this iterates over every feature
401
- to calculate the count.
402
-
403
- Parameters
404
- ----------
405
- ogr_layer : pointer to open OGR layer
406
- force : bool
407
- True if the feature count should be computed even if it is expensive
408
-
409
- Returns
410
- -------
411
- int
412
- count of features
413
- """
414
-
415
- cdef OGRFeatureH ogr_feature = NULL
416
- cdef int feature_count = OGR_L_GetFeatureCount(ogr_layer, force)
417
-
418
- # if GDAL refuses to give us the feature count, we have to loop over all
419
- # features ourselves and get the count. This can happen for some drivers
420
- # (e.g., OSM) or if a where clause is invalid but not rejected as error
421
- if force and feature_count == -1:
422
- # make sure layer is read from beginning
423
- OGR_L_ResetReading(ogr_layer)
424
-
425
- feature_count = 0
426
- while True:
427
- try:
428
- ogr_feature = exc_wrap_pointer(OGR_L_GetNextFeature(ogr_layer))
429
- feature_count +=1
430
-
431
- except NullPointerError:
432
- # No more rows available, so stop reading
433
- break
434
-
435
- # driver may raise other errors, e.g., for OSM if node ids are not
436
- # increasing, the default config option OSM_USE_CUSTOM_INDEXING=YES
437
- # causes errors iterating over features
438
- except CPLE_BaseError as exc:
439
- # if an invalid where clause is used for a GPKG file, it is not
440
- # caught as an error until attempting to iterate over features;
441
- # catch it here
442
- if "failed to prepare SQL" in str(exc):
443
- raise ValueError(f"Invalid SQL query: {str(exc)}") from None
444
-
445
- raise DataLayerError(f"Could not iterate over features: {str(exc)}") from None
446
-
447
- finally:
448
- if ogr_feature != NULL:
449
- OGR_F_Destroy(ogr_feature)
450
- ogr_feature = NULL
451
-
452
- return feature_count
453
-
454
-
455
- cdef get_total_bounds(OGRLayerH ogr_layer, int force):
456
- """Get the total bounds of a layer.
457
-
458
- Parameters
459
- ----------
460
- ogr_layer : pointer to open OGR layer
461
- force : bool
462
- True if the total bounds should be computed even if it is expensive
463
-
464
- Returns
465
- -------
466
- tuple of (xmin, ymin, xmax, ymax) or None
467
- The total bounds of the layer, or None if they could not be determined.
468
- """
469
-
470
- cdef OGREnvelope ogr_envelope
471
- try:
472
- exc_wrap_ogrerr(OGR_L_GetExtent(ogr_layer, &ogr_envelope, force))
473
- bounds = (
474
- ogr_envelope.MinX, ogr_envelope.MinY, ogr_envelope.MaxX, ogr_envelope.MaxY
475
- )
476
-
477
- except CPLE_BaseError:
478
- bounds = None
479
-
480
- return bounds
481
-
482
-
483
- cdef set_metadata(GDALMajorObjectH obj, object metadata):
484
- """Set metadata on a dataset or layer
485
-
486
- Parameters
487
- ----------
488
- obj : pointer to dataset or layer
489
- metadata : dict, optional (default None)
490
- keys and values must be strings
491
- """
492
-
493
- cdef char **metadata_items = NULL
494
- cdef int err = 0
495
-
496
- metadata_items = dict_to_options(metadata)
497
- if metadata_items != NULL:
498
- # only default namepace is currently supported
499
- err = GDALSetMetadata(obj, metadata_items, NULL)
500
-
501
- CSLDestroy(metadata_items)
502
- metadata_items = NULL
503
-
504
- if err:
505
- raise RuntimeError("Could not set metadata") from None
506
-
507
- cdef get_metadata(GDALMajorObjectH obj):
508
- """Get metadata for a dataset or layer
509
-
510
- Parameters
511
- ----------
512
- obj : pointer to dataset or layer
513
-
514
- Returns
515
- -------
516
- dict or None
517
- metadata as key, value pairs
518
- """
519
- # only default namespace is currently supported
520
- cdef char **metadata = GDALGetMetadata(obj, NULL)
521
-
522
- if metadata != NULL:
523
- return dict(
524
- metadata[i].decode('UTF-8').split('=', 1)
525
- for i in range(CSLCount(metadata))
526
- )
527
-
528
- return None
529
-
530
-
531
- cdef detect_encoding(OGRDataSourceH ogr_dataset, OGRLayerH ogr_layer):
532
- """Attempt to detect the encoding to use to read/write string values.
533
-
534
- If the layer/dataset supports reading/writing data in UTF-8, returns UTF-8.
535
- If UTF-8 is not supported and ESRI Shapefile, returns ISO-8859-1
536
- Otherwise the system locale preferred encoding is returned.
537
-
538
- Parameters
539
- ----------
540
- ogr_dataset : pointer to open OGR dataset
541
- ogr_layer : pointer to open OGR layer
542
-
543
- Returns
544
- -------
545
- str or None
546
- """
547
-
548
- if OGR_L_TestCapability(ogr_layer, OLCStringsAsUTF8):
549
- # OGR_L_TestCapability returns True for OLCStringsAsUTF8 if GDAL hides encoding
550
- # complexities for this layer/driver type. In this case all string attribute
551
- # values have to be supplied in UTF-8 and values will be returned in UTF-8.
552
- # The encoding used to read/write under the hood depends on the driver used.
553
- # For layers/drivers where False is returned, the string values are written and
554
- # read without recoding. Hence, it is up to you to supply the data in the
555
- # appropriate encoding. More info:
556
- # https://gdal.org/development/rfc/rfc23_ogr_unicode.html#oftstring-oftstringlist-fields
557
- # NOTE: for shapefiles, this always returns False for the layer returned
558
- # when executing SQL, even when it supports UTF-8 (patched below);
559
- # this may be fixed by https://github.com/OSGeo/gdal/pull/9649 (GDAL >=3.9.0?)
560
- return "UTF-8"
561
-
562
- driver = get_driver(ogr_dataset)
563
- if driver == "ESRI Shapefile":
564
- # OGR_L_TestCapability returns True for OLCStringsAsUTF8 (above) for
565
- # shapefiles when a .cpg file is present with a valid encoding, or GDAL
566
- # auto-detects the encoding from the code page of the .dbf file, or
567
- # SHAPE_ENCODING config option is set, or ENCODING layer creation option
568
- # is specified (shapefiles only). Otherwise, we can only assume that
569
- # shapefiles are in their default encoding of ISO-8859-1 (which may be
570
- # incorrect and must be overridden by user-provided encoding)
571
-
572
- # Always use the first layer to test capabilities until detection for
573
- # SQL results from shapefiles are fixed (above)
574
- # This block should only be used for unfixed versions of GDAL (<3.9.0?)
575
- if OGR_L_TestCapability(GDALDatasetGetLayer(ogr_dataset, 0), OLCStringsAsUTF8):
576
- return "UTF-8"
577
-
578
- return "ISO-8859-1"
579
-
580
- if driver == "OSM":
581
- # always set OSM data to UTF-8
582
- # per https://help.openstreetmap.org/questions/2172/what-encoding-does-openstreetmap-use
583
- return "UTF-8"
584
-
585
- if driver in ("XLSX", "ODS"):
586
- # TestCapability for OLCStringsAsUTF8 for XLSX and ODS was False for new files
587
- # being created for GDAL < 3.8.5. Once these versions of GDAL are no longer
588
- # supported, this can be removed.
589
- return "UTF-8"
590
-
591
- if driver == "GeoJSONSeq":
592
- # In old gdal versions, OLCStringsAsUTF8 wasn't advertised yet.
593
- return "UTF-8"
594
-
595
- return locale.getpreferredencoding()
596
-
597
-
598
- cdef get_fields(OGRLayerH ogr_layer, str encoding, use_arrow=False):
599
- """Get field names and types for layer.
600
-
601
- Parameters
602
- ----------
603
- ogr_layer : pointer to open OGR layer
604
- encoding : str
605
- encoding to use when reading field name
606
- use_arrow : bool, default False
607
- If using arrow, all types are supported, and we don't have to
608
- raise warnings
609
-
610
- Returns
611
- -------
612
- ndarray(n, 4)
613
- array of index, ogr type, name, numpy type
614
- """
615
- cdef int i
616
- cdef int field_count
617
- cdef OGRFeatureDefnH ogr_featuredef = NULL
618
- cdef OGRFieldDefnH ogr_fielddef = NULL
619
- cdef int field_subtype
620
- cdef const char *key_c
621
-
622
- try:
623
- ogr_featuredef = exc_wrap_pointer(OGR_L_GetLayerDefn(ogr_layer))
624
-
625
- except NullPointerError:
626
- raise DataLayerError("Could not get layer definition") from None
627
-
628
- except CPLE_BaseError as exc:
629
- raise DataLayerError(str(exc))
630
-
631
- field_count = OGR_FD_GetFieldCount(ogr_featuredef)
632
-
633
- fields = np.empty(shape=(field_count, 4), dtype=object)
634
- fields_view = fields[:,:]
635
-
636
- skipped_fields = False
637
-
638
- for i in range(field_count):
639
- try:
640
- ogr_fielddef = exc_wrap_pointer(OGR_FD_GetFieldDefn(ogr_featuredef, i))
641
-
642
- except NullPointerError:
643
- raise FieldError(f"Could not get field definition for field at index {i}") from None
644
-
645
- except CPLE_BaseError as exc:
646
- raise FieldError(str(exc))
647
-
648
- field_name = get_string(OGR_Fld_GetNameRef(ogr_fielddef), encoding=encoding)
649
-
650
- field_type = OGR_Fld_GetType(ogr_fielddef)
651
- np_type = FIELD_TYPES[field_type]
652
- if not np_type and not use_arrow:
653
- skipped_fields = True
654
- log.warning(
655
- f"Skipping field {field_name}: unsupported OGR type: {field_type}")
656
- continue
657
-
658
- field_subtype = OGR_Fld_GetSubType(ogr_fielddef)
659
- subtype = FIELD_SUBTYPES.get(field_subtype)
660
- if subtype is not None:
661
- # bool, int16, float32 dtypes
662
- np_type = subtype
663
-
664
- fields_view[i,0] = i
665
- fields_view[i,1] = field_type
666
- fields_view[i,2] = field_name
667
- fields_view[i,3] = np_type
668
-
669
- if skipped_fields:
670
- # filter out skipped fields
671
- mask = np.array([idx is not None for idx in fields[:, 0]])
672
- fields = fields[mask]
673
-
674
- return fields
675
-
676
-
677
- cdef apply_where_filter(OGRLayerH ogr_layer, str where):
678
- """Applies where filter to layer.
679
-
680
- WARNING: GDAL does not raise an error for GPKG when SQL query is invalid
681
- but instead only logs to stderr.
682
-
683
- Parameters
684
- ----------
685
- ogr_layer : pointer to open OGR layer
686
- where : str
687
- See http://ogdi.sourceforge.net/prop/6.2.CapabilitiesMetadata.html
688
- restricted_where for more information about valid expressions.
689
-
690
- Raises
691
- ------
692
- ValueError: if SQL query is not valid
693
- """
694
-
695
- where_b = where.encode('utf-8')
696
- where_c = where_b
697
- err = OGR_L_SetAttributeFilter(ogr_layer, where_c)
698
- # WARNING: GDAL does not raise this error for GPKG but instead only
699
- # logs to stderr
700
- if err != OGRERR_NONE:
701
- try:
702
- exc_check()
703
- except CPLE_BaseError as exc:
704
- raise ValueError(str(exc))
705
-
706
- raise ValueError(f"Invalid SQL query for layer '{OGR_L_GetName(ogr_layer)}': '{where}'")
707
-
708
-
709
- cdef apply_bbox_filter(OGRLayerH ogr_layer, bbox):
710
- """Applies bounding box spatial filter to layer.
711
-
712
- Parameters
713
- ----------
714
- ogr_layer : pointer to open OGR layer
715
- bbox : list or tuple of xmin, ymin, xmax, ymax
716
-
717
- Raises
718
- ------
719
- ValueError: if bbox is not a list or tuple or does not have proper number of
720
- items
721
- """
722
-
723
- if not (isinstance(bbox, (tuple, list)) and len(bbox) == 4):
724
- raise ValueError(f"Invalid bbox: {bbox}")
725
-
726
- xmin, ymin, xmax, ymax = bbox
727
- OGR_L_SetSpatialFilterRect(ogr_layer, xmin, ymin, xmax, ymax)
728
-
729
-
730
- cdef apply_geometry_filter(OGRLayerH ogr_layer, wkb):
731
- """Applies geometry spatial filter to layer.
732
-
733
- Parameters
734
- ----------
735
- ogr_layer : pointer to open OGR layer
736
- wkb : WKB encoding of geometry
737
- """
738
-
739
- cdef OGRGeometryH ogr_geometry = NULL
740
- cdef unsigned char *wkb_buffer = wkb
741
-
742
- err = OGR_G_CreateFromWkb(wkb_buffer, NULL, &ogr_geometry, len(wkb))
743
- if err:
744
- if ogr_geometry != NULL:
745
- OGR_G_DestroyGeometry(ogr_geometry)
746
- raise GeometryError("Could not create mask geometry") from None
747
-
748
- OGR_L_SetSpatialFilter(ogr_layer, ogr_geometry)
749
- OGR_G_DestroyGeometry(ogr_geometry)
750
-
751
-
752
- cdef validate_feature_range(OGRLayerH ogr_layer, int skip_features=0, int max_features=0):
753
- """Limit skip_features and max_features to bounds available for dataset.
754
-
755
- This is typically performed after applying where and spatial filters, which
756
- reduce the available range of features.
757
-
758
- Parameters
759
- ----------
760
- ogr_layer : pointer to open OGR layer
761
- skip_features : number of features to skip from beginning of available range
762
- max_features : maximum number of features to read from available range
763
- """
764
-
765
- feature_count = get_feature_count(ogr_layer, 1)
766
- num_features = max_features
767
-
768
- if feature_count == 0:
769
- return 0, 0
770
-
771
- if skip_features >= feature_count:
772
- skip_features = feature_count
773
-
774
- elif max_features == 0:
775
- num_features = feature_count - skip_features
776
-
777
- elif max_features > feature_count:
778
- num_features = feature_count
779
-
780
- return skip_features, num_features
781
-
782
-
783
- @cython.boundscheck(False) # Deactivate bounds checking
784
- @cython.wraparound(False) # Deactivate negative indexing.
785
- cdef process_geometry(OGRFeatureH ogr_feature, int i, geom_view, uint8_t force_2d):
786
-
787
- cdef OGRGeometryH ogr_geometry = NULL
788
- cdef OGRwkbGeometryType ogr_geometry_type
789
-
790
- cdef unsigned char *wkb = NULL
791
- cdef int ret_length
792
-
793
- ogr_geometry = OGR_F_GetGeometryRef(ogr_feature)
794
-
795
- if ogr_geometry == NULL:
796
- geom_view[i] = None
797
- else:
798
- try:
799
- ogr_geometry_type = OGR_G_GetGeometryType(ogr_geometry)
800
-
801
- # if geometry has M values, these need to be removed first
802
- if (OGR_G_IsMeasured(ogr_geometry)):
803
- OGR_G_SetMeasured(ogr_geometry, 0)
804
-
805
- if force_2d and OGR_G_Is3D(ogr_geometry):
806
- OGR_G_Set3D(ogr_geometry, 0)
807
-
808
- # if non-linear (e.g., curve), force to linear type
809
- if OGR_GT_IsNonLinear(ogr_geometry_type):
810
- ogr_geometry = OGR_G_GetLinearGeometry(ogr_geometry, 0, NULL)
811
-
812
- ret_length = OGR_G_WkbSize(ogr_geometry)
813
- wkb = <unsigned char*>malloc(sizeof(unsigned char)*ret_length)
814
- OGR_G_ExportToWkb(ogr_geometry, 1, wkb)
815
- geom_view[i] = wkb[:ret_length]
816
-
817
- finally:
818
- free(wkb)
819
-
820
-
821
- @cython.boundscheck(False) # Deactivate bounds checking
822
- @cython.wraparound(False) # Deactivate negative indexing.
823
- cdef process_fields(
824
- OGRFeatureH ogr_feature,
825
- int i,
826
- int n_fields,
827
- object field_data,
828
- object field_data_view,
829
- object field_indexes,
830
- object field_ogr_types,
831
- encoding,
832
- bint datetime_as_string
833
- ):
834
- cdef int j
835
- cdef int success
836
- cdef int field_index
837
- cdef int ret_length
838
- cdef GByte *bin_value
839
- cdef int year = 0
840
- cdef int month = 0
841
- cdef int day = 0
842
- cdef int hour = 0
843
- cdef int minute = 0
844
- cdef float fsecond = 0.0
845
- cdef int timezone = 0
846
-
847
- for j in range(n_fields):
848
- field_index = field_indexes[j]
849
- field_type = field_ogr_types[j]
850
- data = field_data_view[j]
851
-
852
- isnull = OGR_F_IsFieldSetAndNotNull(ogr_feature, field_index) == 0
853
- if isnull:
854
- if field_type in (OFTInteger, OFTInteger64, OFTReal):
855
- # if a boolean or integer type, have to cast to float to hold
856
- # NaN values
857
- if data.dtype.kind in ('b', 'i', 'u'):
858
- field_data[j] = field_data[j].astype(np.float64)
859
- field_data_view[j] = field_data[j][:]
860
- field_data_view[j][i] = np.nan
861
- else:
862
- data[i] = np.nan
863
-
864
- elif field_type in ( OFTDate, OFTDateTime) and not datetime_as_string:
865
- data[i] = np.datetime64('NaT')
866
-
867
- else:
868
- data[i] = None
869
-
870
- continue
871
-
872
- if field_type == OFTInteger:
873
- data[i] = OGR_F_GetFieldAsInteger(ogr_feature, field_index)
874
-
875
- elif field_type == OFTInteger64:
876
- data[i] = OGR_F_GetFieldAsInteger64(ogr_feature, field_index)
877
-
878
- elif field_type == OFTReal:
879
- data[i] = OGR_F_GetFieldAsDouble(ogr_feature, field_index)
880
-
881
- elif field_type == OFTString:
882
- value = get_string(OGR_F_GetFieldAsString(ogr_feature, field_index), encoding=encoding)
883
- data[i] = value
884
-
885
- elif field_type == OFTBinary:
886
- bin_value = OGR_F_GetFieldAsBinary(ogr_feature, field_index, &ret_length)
887
- data[i] = bin_value[:ret_length]
888
-
889
- elif field_type == OFTDateTime or field_type == OFTDate:
890
-
891
- if datetime_as_string:
892
- # defer datetime parsing to user/ pandas layer
893
- # Update to OGR_F_GetFieldAsISO8601DateTime when GDAL 3.7+ only
894
- data[i] = get_string(OGR_F_GetFieldAsString(ogr_feature, field_index), encoding=encoding)
895
- else:
896
- success = OGR_F_GetFieldAsDateTimeEx(
897
- ogr_feature, field_index, &year, &month, &day, &hour, &minute, &fsecond, &timezone)
898
-
899
- ms, ss = math.modf(fsecond)
900
- second = int(ss)
901
- # fsecond has millisecond accuracy
902
- microsecond = round(ms * 1000) * 1000
903
-
904
- if not success:
905
- data[i] = np.datetime64('NaT')
906
-
907
- elif field_type == OFTDate:
908
- data[i] = datetime.date(year, month, day).isoformat()
909
-
910
- elif field_type == OFTDateTime:
911
- data[i] = datetime.datetime(year, month, day, hour, minute, second, microsecond).isoformat()
912
-
913
-
914
- @cython.boundscheck(False) # Deactivate bounds checking
915
- @cython.wraparound(False) # Deactivate negative indexing.
916
- cdef get_features(
917
- OGRLayerH ogr_layer,
918
- object[:,:] fields,
919
- encoding,
920
- uint8_t read_geometry,
921
- uint8_t force_2d,
922
- int skip_features,
923
- int num_features,
924
- uint8_t return_fids,
925
- bint datetime_as_string
926
- ):
927
-
928
- cdef OGRFeatureH ogr_feature = NULL
929
- cdef int n_fields
930
- cdef int i
931
- cdef int field_index
932
-
933
- # make sure layer is read from beginning
934
- OGR_L_ResetReading(ogr_layer)
935
-
936
- if skip_features > 0:
937
- OGR_L_SetNextByIndex(ogr_layer, skip_features)
938
-
939
- if return_fids:
940
- fid_data = np.empty(shape=(num_features), dtype=np.int64)
941
- fid_view = fid_data[:]
942
- else:
943
- fid_data = None
944
-
945
- if read_geometry:
946
- geometries = np.empty(shape=(num_features, ), dtype='object')
947
- geom_view = geometries[:]
948
-
949
- else:
950
- geometries = None
951
-
952
- n_fields = fields.shape[0]
953
- field_indexes = fields[:,0]
954
- field_ogr_types = fields[:,1]
955
-
956
- field_data = [
957
- np.empty(shape=(num_features, ),
958
- dtype = ("object" if datetime_as_string and
959
- fields[field_index,3].startswith("datetime") else fields[field_index,3])
960
- ) for field_index in range(n_fields)
961
- ]
962
-
963
- field_data_view = [field_data[field_index][:] for field_index in range(n_fields)]
964
-
965
- if num_features == 0:
966
- return fid_data, geometries, field_data
967
-
968
- i = 0
969
- while True:
970
- try:
971
- if num_features > 0 and i == num_features:
972
- break
973
-
974
- try:
975
- ogr_feature = exc_wrap_pointer(OGR_L_GetNextFeature(ogr_layer))
976
-
977
- except NullPointerError:
978
- # No more rows available, so stop reading
979
- break
980
-
981
- except CPLE_BaseError as exc:
982
- raise FeatureError(str(exc))
983
-
984
- if i >= num_features:
985
- raise FeatureError(
986
- "GDAL returned more records than expected based on the count of "
987
- "records that may meet your combination of filters against this "
988
- "dataset. Please open an issue on Github "
989
- "(https://github.com/geopandas/pyogrio/issues) to report encountering "
990
- "this error."
991
- ) from None
992
-
993
- if return_fids:
994
- fid_view[i] = OGR_F_GetFID(ogr_feature)
995
-
996
- if read_geometry:
997
- process_geometry(ogr_feature, i, geom_view, force_2d)
998
-
999
- process_fields(
1000
- ogr_feature, i, n_fields, field_data, field_data_view,
1001
- field_indexes, field_ogr_types, encoding, datetime_as_string
1002
- )
1003
- i += 1
1004
- finally:
1005
- if ogr_feature != NULL:
1006
- OGR_F_Destroy(ogr_feature)
1007
- ogr_feature = NULL
1008
-
1009
- # There may be fewer rows available than expected from OGR_L_GetFeatureCount,
1010
- # such as features with bounding boxes that intersect the bbox
1011
- # but do not themselves intersect the bbox.
1012
- # Empty rows are dropped.
1013
- if i < num_features:
1014
- if return_fids:
1015
- fid_data = fid_data[:i]
1016
- if read_geometry:
1017
- geometries = geometries[:i]
1018
- field_data = [data_field[:i] for data_field in field_data]
1019
-
1020
- return fid_data, geometries, field_data
1021
-
1022
-
1023
- @cython.boundscheck(False) # Deactivate bounds checking
1024
- @cython.wraparound(False) # Deactivate negative indexing.
1025
- cdef get_features_by_fid(
1026
- OGRLayerH ogr_layer,
1027
- int[:] fids,
1028
- object[:,:] fields,
1029
- encoding,
1030
- uint8_t read_geometry,
1031
- uint8_t force_2d,
1032
- bint datetime_as_string
1033
- ):
1034
-
1035
- cdef OGRFeatureH ogr_feature = NULL
1036
- cdef int n_fields
1037
- cdef int i
1038
- cdef int fid
1039
- cdef int field_index
1040
- cdef int count = len(fids)
1041
-
1042
- # make sure layer is read from beginning
1043
- OGR_L_ResetReading(ogr_layer)
1044
-
1045
- if read_geometry:
1046
- geometries = np.empty(shape=(count, ), dtype='object')
1047
- geom_view = geometries[:]
1048
-
1049
- else:
1050
- geometries = None
1051
-
1052
- n_fields = fields.shape[0]
1053
- field_indexes = fields[:,0]
1054
- field_ogr_types = fields[:,1]
1055
- field_data = [
1056
- np.empty(shape=(count, ),
1057
- dtype=("object" if datetime_as_string and fields[field_index,3].startswith("datetime")
1058
- else fields[field_index,3]))
1059
- for field_index in range(n_fields)
1060
- ]
1061
-
1062
- field_data_view = [field_data[field_index][:] for field_index in range(n_fields)]
1063
-
1064
- for i in range(count):
1065
- try:
1066
- fid = fids[i]
1067
-
1068
- try:
1069
- ogr_feature = exc_wrap_pointer(OGR_L_GetFeature(ogr_layer, fid))
1070
-
1071
- except NullPointerError:
1072
- raise FeatureError(f"Could not read feature with fid {fid}") from None
1073
-
1074
- except CPLE_BaseError as exc:
1075
- raise FeatureError(str(exc))
1076
-
1077
- if read_geometry:
1078
- process_geometry(ogr_feature, i, geom_view, force_2d)
1079
-
1080
- process_fields(
1081
- ogr_feature, i, n_fields, field_data, field_data_view,
1082
- field_indexes, field_ogr_types, encoding, datetime_as_string
1083
- )
1084
- finally:
1085
- if ogr_feature != NULL:
1086
- OGR_F_Destroy(ogr_feature)
1087
- ogr_feature = NULL
1088
-
1089
-
1090
- return (geometries, field_data)
1091
-
1092
-
1093
- @cython.boundscheck(False) # Deactivate bounds checking
1094
- @cython.wraparound(False) # Deactivate negative indexing.
1095
- cdef get_bounds(
1096
- OGRLayerH ogr_layer,
1097
- int skip_features,
1098
- int num_features):
1099
-
1100
- cdef OGRFeatureH ogr_feature = NULL
1101
- cdef OGRGeometryH ogr_geometry = NULL
1102
- cdef OGREnvelope ogr_envelope # = NULL
1103
- cdef int i
1104
-
1105
- # make sure layer is read from beginning
1106
- OGR_L_ResetReading(ogr_layer)
1107
-
1108
- if skip_features > 0:
1109
- OGR_L_SetNextByIndex(ogr_layer, skip_features)
1110
-
1111
- fid_data = np.empty(shape=(num_features), dtype=np.int64)
1112
- fid_view = fid_data[:]
1113
-
1114
- bounds_data = np.empty(shape=(4, num_features), dtype='float64')
1115
- bounds_view = bounds_data[:]
1116
-
1117
- i = 0
1118
- while True:
1119
- try:
1120
- if num_features > 0 and i == num_features:
1121
- break
1122
-
1123
- try:
1124
- ogr_feature = exc_wrap_pointer(OGR_L_GetNextFeature(ogr_layer))
1125
-
1126
- except NullPointerError:
1127
- # No more rows available, so stop reading
1128
- break
1129
-
1130
- except CPLE_BaseError as exc:
1131
- raise FeatureError(str(exc))
1132
-
1133
- if i >= num_features:
1134
- raise FeatureError(
1135
- "Reading more features than indicated by OGR_L_GetFeatureCount is not supported"
1136
- ) from None
1137
-
1138
- fid_view[i] = OGR_F_GetFID(ogr_feature)
1139
-
1140
- ogr_geometry = OGR_F_GetGeometryRef(ogr_feature)
1141
-
1142
- if ogr_geometry == NULL:
1143
- bounds_view[:,i] = np.nan
1144
-
1145
- else:
1146
- OGR_G_GetEnvelope(ogr_geometry, &ogr_envelope)
1147
- bounds_view[0, i] = ogr_envelope.MinX
1148
- bounds_view[1, i] = ogr_envelope.MinY
1149
- bounds_view[2, i] = ogr_envelope.MaxX
1150
- bounds_view[3, i] = ogr_envelope.MaxY
1151
-
1152
- i += 1
1153
- finally:
1154
- if ogr_feature != NULL:
1155
- OGR_F_Destroy(ogr_feature)
1156
- ogr_feature = NULL
1157
-
1158
- # Less rows read than anticipated, so drop empty rows
1159
- if i < num_features:
1160
- fid_data = fid_data[:i]
1161
- bounds_data = bounds_data[:, :i]
1162
-
1163
- return fid_data, bounds_data
1164
-
1165
-
1166
- def ogr_read(
1167
- object path_or_buffer,
1168
- object dataset_kwargs,
1169
- object layer=None,
1170
- object encoding=None,
1171
- int read_geometry=True,
1172
- int force_2d=False,
1173
- object columns=None,
1174
- int skip_features=0,
1175
- int max_features=0,
1176
- object where=None,
1177
- tuple bbox=None,
1178
- object mask=None,
1179
- object fids=None,
1180
- str sql=None,
1181
- str sql_dialect=None,
1182
- int return_fids=False,
1183
- bint datetime_as_string=False
1184
- ):
1185
-
1186
- cdef int err = 0
1187
- cdef bint is_vsimem = isinstance(path_or_buffer, bytes)
1188
- cdef const char *path_c = NULL
1189
- cdef char **dataset_options = NULL
1190
- cdef const char *where_c = NULL
1191
- cdef const char *field_c = NULL
1192
- cdef char **fields_c = NULL
1193
- cdef OGRDataSourceH ogr_dataset = NULL
1194
- cdef OGRLayerH ogr_layer = NULL
1195
- cdef int feature_count = 0
1196
- cdef double xmin, ymin, xmax, ymax
1197
- cdef const char *prev_shape_encoding = NULL
1198
- cdef bint override_shape_encoding = False
1199
-
1200
- if fids is not None:
1201
- if where is not None or bbox is not None or mask is not None or sql is not None or skip_features or max_features:
1202
- raise ValueError(
1203
- "cannot set both 'fids' and any of 'where', 'bbox', 'mask', "
1204
- "'sql', 'skip_features' or 'max_features'"
1205
- )
1206
- fids = np.asarray(fids, dtype=np.intc)
1207
-
1208
- if sql is not None and layer is not None:
1209
- raise ValueError("'sql' paramater cannot be combined with 'layer'")
1210
-
1211
- if not (read_geometry or return_fids or columns is None or len(columns) > 0):
1212
- raise ValueError(
1213
- "at least one of read_geometry or return_fids must be True or columns must "
1214
- "be None or non-empty"
1215
- )
1216
-
1217
- if bbox and mask:
1218
- raise ValueError("cannot set both 'bbox' and 'mask'")
1219
-
1220
- if skip_features < 0:
1221
- raise ValueError("'skip_features' must be >= 0")
1222
-
1223
- if max_features < 0:
1224
- raise ValueError("'max_features' must be >= 0")
1225
-
1226
- try:
1227
- path = read_buffer_to_vsimem(path_or_buffer) if is_vsimem else path_or_buffer
1228
-
1229
- if encoding:
1230
- # for shapefiles, SHAPE_ENCODING must be set before opening the file
1231
- # to prevent automatic decoding to UTF-8 by GDAL, so we save previous
1232
- # SHAPE_ENCODING so that it can be restored later
1233
- # (we do this for all data sources where encoding is set because
1234
- # we don't know the driver until after it is opened, which is too late)
1235
- override_shape_encoding = True
1236
- prev_shape_encoding = override_threadlocal_config_option("SHAPE_ENCODING", encoding)
1237
-
1238
- dataset_options = dict_to_options(dataset_kwargs)
1239
- ogr_dataset = ogr_open(path.encode('UTF-8'), 0, dataset_options)
1240
-
1241
- if sql is None:
1242
- if layer is None:
1243
- layer = get_default_layer(ogr_dataset)
1244
- ogr_layer = get_ogr_layer(ogr_dataset, layer)
1245
- else:
1246
- ogr_layer = execute_sql(ogr_dataset, sql, sql_dialect)
1247
-
1248
- crs = get_crs(ogr_layer)
1249
-
1250
- # Encoding is derived from the user, from the dataset capabilities / type,
1251
- # or from the system locale
1252
- if encoding:
1253
- if get_driver(ogr_dataset) == "ESRI Shapefile":
1254
- # NOTE: SHAPE_ENCODING is a configuration option whereas ENCODING is the dataset open option
1255
- if "ENCODING" in dataset_kwargs:
1256
- raise ValueError('cannot provide both encoding parameter and "ENCODING" option; use encoding parameter to specify correct encoding for data source')
1257
-
1258
- # Because SHAPE_ENCODING is set above, GDAL will automatically
1259
- # decode shapefiles to UTF-8; ignore any encoding set by user
1260
- encoding = "UTF-8"
1261
-
1262
- else:
1263
- encoding = detect_encoding(ogr_dataset, ogr_layer)
1264
-
1265
- fields = get_fields(ogr_layer, encoding)
1266
-
1267
- ignored_fields = []
1268
- if columns is not None:
1269
- # identify ignored fields first
1270
- ignored_fields = list(set(fields[:,2]) - set(columns))
1271
-
1272
- # Fields are matched exactly by name, duplicates are dropped.
1273
- # Find index of each field into fields
1274
- idx = np.intersect1d(fields[:,2], columns, return_indices=True)[1]
1275
- fields = fields[idx, :]
1276
-
1277
- if not read_geometry:
1278
- ignored_fields.append("OGR_GEOMETRY")
1279
-
1280
- # Instruct GDAL to ignore reading fields not
1281
- # included in output columns for faster I/O
1282
- if ignored_fields:
1283
- for field in ignored_fields:
1284
- field_b = field.encode("utf-8")
1285
- field_c = field_b
1286
- fields_c = CSLAddString(fields_c, field_c)
1287
-
1288
- OGR_L_SetIgnoredFields(ogr_layer, <const char**>fields_c)
1289
-
1290
- geometry_type = get_geometry_type(ogr_layer)
1291
-
1292
- if fids is not None:
1293
- geometries, field_data = get_features_by_fid(
1294
- ogr_layer,
1295
- fids,
1296
- fields,
1297
- encoding,
1298
- read_geometry=read_geometry and geometry_type is not None,
1299
- force_2d=force_2d,
1300
- datetime_as_string=datetime_as_string
1301
- )
1302
-
1303
- # bypass reading fids since these should match fids used for read
1304
- if return_fids:
1305
- fid_data = fids.astype(np.int64)
1306
- else:
1307
- fid_data = None
1308
- else:
1309
- # Apply the attribute filter
1310
- if where is not None and where != "":
1311
- apply_where_filter(ogr_layer, where)
1312
-
1313
- # Apply the spatial filter
1314
- if bbox is not None:
1315
- apply_bbox_filter(ogr_layer, bbox)
1316
-
1317
- elif mask is not None:
1318
- apply_geometry_filter(ogr_layer, mask)
1319
-
1320
- # Limit feature range to available range
1321
- skip_features, num_features = validate_feature_range(
1322
- ogr_layer, skip_features, max_features
1323
- )
1324
-
1325
- fid_data, geometries, field_data = get_features(
1326
- ogr_layer,
1327
- fields,
1328
- encoding,
1329
- read_geometry=read_geometry and geometry_type is not None,
1330
- force_2d=force_2d,
1331
- skip_features=skip_features,
1332
- num_features=num_features,
1333
- return_fids=return_fids,
1334
- datetime_as_string=datetime_as_string
1335
- )
1336
-
1337
- meta = {
1338
- 'crs': crs,
1339
- 'encoding': encoding,
1340
- 'fields': fields[:,2], # return only names
1341
- 'dtypes':fields[:,3],
1342
- 'geometry_type': geometry_type,
1343
- }
1344
-
1345
- finally:
1346
- if dataset_options != NULL:
1347
- CSLDestroy(dataset_options)
1348
- dataset_options = NULL
1349
-
1350
- if ogr_dataset != NULL:
1351
- if sql is not None:
1352
- GDALDatasetReleaseResultSet(ogr_dataset, ogr_layer)
1353
-
1354
- GDALClose(ogr_dataset)
1355
- ogr_dataset = NULL
1356
-
1357
- # reset SHAPE_ENCODING config parameter if temporarily set above
1358
- if override_shape_encoding:
1359
- CPLSetThreadLocalConfigOption("SHAPE_ENCODING", prev_shape_encoding)
1360
-
1361
- if prev_shape_encoding != NULL:
1362
- CPLFree(<void*>prev_shape_encoding)
1363
- prev_shape_encoding = NULL
1364
-
1365
- if is_vsimem:
1366
- delete_vsimem_file(path)
1367
-
1368
- return (
1369
- meta,
1370
- fid_data,
1371
- geometries,
1372
- field_data
1373
- )
1374
-
1375
-
1376
- cdef void pycapsule_array_stream_deleter(object stream_capsule) noexcept:
1377
- cdef ArrowArrayStream* stream = <ArrowArrayStream*>PyCapsule_GetPointer(
1378
- stream_capsule, 'arrow_array_stream'
1379
- )
1380
- # Do not invoke the deleter on a used/moved capsule
1381
- if stream.release != NULL:
1382
- stream.release(stream)
1383
-
1384
- free(stream)
1385
-
1386
-
1387
- cdef object alloc_c_stream(ArrowArrayStream** c_stream):
1388
- c_stream[0] = <ArrowArrayStream*> malloc(sizeof(ArrowArrayStream))
1389
- # Ensure the capsule destructor doesn't call a random release pointer
1390
- c_stream[0].release = NULL
1391
- return PyCapsule_New(c_stream[0], 'arrow_array_stream', &pycapsule_array_stream_deleter)
1392
-
1393
-
1394
- class _ArrowStream:
1395
- def __init__(self, capsule):
1396
- self._capsule = capsule
1397
-
1398
- def __arrow_c_stream__(self, requested_schema=None):
1399
- if requested_schema is not None:
1400
- raise NotImplementedError("requested_schema is not supported")
1401
- return self._capsule
1402
-
1403
-
1404
- @contextlib.contextmanager
1405
- def ogr_open_arrow(
1406
- object path_or_buffer,
1407
- dataset_kwargs,
1408
- object layer=None,
1409
- object encoding=None,
1410
- int read_geometry=True,
1411
- int force_2d=False,
1412
- object columns=None,
1413
- int skip_features=0,
1414
- int max_features=0,
1415
- object where=None,
1416
- tuple bbox=None,
1417
- object mask=None,
1418
- object fids=None,
1419
- str sql=None,
1420
- str sql_dialect=None,
1421
- int return_fids=False,
1422
- int batch_size=0,
1423
- use_pyarrow=False,
1424
- ):
1425
-
1426
- cdef int err = 0
1427
- cdef bint is_vsimem = isinstance(path_or_buffer, bytes)
1428
- cdef const char *path_c = NULL
1429
- cdef char **dataset_options = NULL
1430
- cdef const char *where_c = NULL
1431
- cdef OGRDataSourceH ogr_dataset = NULL
1432
- cdef OGRLayerH ogr_layer = NULL
1433
- cdef void *ogr_driver = NULL
1434
- cdef char **fields_c = NULL
1435
- cdef const char *field_c = NULL
1436
- cdef char **options = NULL
1437
- cdef const char *prev_shape_encoding = NULL
1438
- cdef bint override_shape_encoding = False
1439
- cdef ArrowArrayStream* stream
1440
- cdef ArrowSchema schema
1441
-
1442
- IF CTE_GDAL_VERSION < (3, 6, 0):
1443
- raise RuntimeError("Need GDAL>=3.6 for Arrow support")
1444
-
1445
- if force_2d:
1446
- raise ValueError("forcing 2D is not supported for Arrow")
1447
-
1448
- if fids is not None:
1449
- if where is not None or bbox is not None or mask is not None or sql is not None or skip_features or max_features:
1450
- raise ValueError(
1451
- "cannot set both 'fids' and any of 'where', 'bbox', 'mask', "
1452
- "'sql', 'skip_features', or 'max_features'"
1453
- )
1454
-
1455
- IF CTE_GDAL_VERSION < (3, 8, 0):
1456
- if skip_features:
1457
- raise ValueError(
1458
- "specifying 'skip_features' is not supported for Arrow for GDAL<3.8.0"
1459
- )
1460
-
1461
- if skip_features < 0:
1462
- raise ValueError("'skip_features' must be >= 0")
1463
-
1464
- if max_features:
1465
- raise ValueError(
1466
- "specifying 'max_features' is not supported for Arrow"
1467
- )
1468
-
1469
- if sql is not None and layer is not None:
1470
- raise ValueError("'sql' paramater cannot be combined with 'layer'")
1471
-
1472
- if not (read_geometry or return_fids or columns is None or len(columns) > 0):
1473
- raise ValueError(
1474
- "at least one of read_geometry or return_fids must be True or columns must "
1475
- "be None or non-empty"
1476
- )
1477
-
1478
- if bbox and mask:
1479
- raise ValueError("cannot set both 'bbox' and 'mask'")
1480
-
1481
- reader = None
1482
- try:
1483
- path = read_buffer_to_vsimem(path_or_buffer) if is_vsimem else path_or_buffer
1484
-
1485
- if encoding:
1486
- override_shape_encoding = True
1487
- prev_shape_encoding = override_threadlocal_config_option("SHAPE_ENCODING", encoding)
1488
-
1489
- dataset_options = dict_to_options(dataset_kwargs)
1490
- ogr_dataset = ogr_open(path.encode('UTF-8'), 0, dataset_options)
1491
-
1492
- if sql is None:
1493
- if layer is None:
1494
- layer = get_default_layer(ogr_dataset)
1495
- ogr_layer = get_ogr_layer(ogr_dataset, layer)
1496
- else:
1497
- ogr_layer = execute_sql(ogr_dataset, sql, sql_dialect)
1498
-
1499
- crs = get_crs(ogr_layer)
1500
-
1501
- # Encoding is derived from the user, from the dataset capabilities / type,
1502
- # or from the system locale
1503
- if encoding:
1504
- if get_driver(ogr_dataset) == "ESRI Shapefile":
1505
- if "ENCODING" in dataset_kwargs:
1506
- raise ValueError('cannot provide both encoding parameter and "ENCODING" option; use encoding parameter to specify correct encoding for data source')
1507
-
1508
- encoding = "UTF-8"
1509
-
1510
- elif encoding.replace('-','').upper() != 'UTF8':
1511
- raise ValueError("non-UTF-8 encoding is not supported for Arrow; use the non-Arrow interface instead")
1512
-
1513
- else:
1514
- encoding = detect_encoding(ogr_dataset, ogr_layer)
1515
-
1516
- fields = get_fields(ogr_layer, encoding, use_arrow=True)
1517
-
1518
- ignored_fields = []
1519
- if columns is not None:
1520
- # Fields are matched exactly by name, duplicates are dropped.
1521
- ignored_fields = list(set(fields[:,2]) - set(columns))
1522
- if not read_geometry:
1523
- ignored_fields.append("OGR_GEOMETRY")
1524
-
1525
- # raise error if schema has bool values for FGB / GPKG and GDAL <3.8.3
1526
- # due to https://github.com/OSGeo/gdal/issues/8998
1527
- IF CTE_GDAL_VERSION < (3, 8, 3):
1528
-
1529
- driver = get_driver(ogr_dataset)
1530
- if driver in {'FlatGeobuf', 'GPKG'}:
1531
- ignored = set(ignored_fields)
1532
- for f in fields:
1533
- if f[2] not in ignored and f[3] == 'bool':
1534
- raise RuntimeError(
1535
- "GDAL < 3.8.3 does not correctly read boolean data values using the "
1536
- "Arrow API. Do not use read_arrow() / use_arrow=True for this dataset."
1537
- )
1538
-
1539
- geometry_type = get_geometry_type(ogr_layer)
1540
-
1541
- geometry_name = get_string(OGR_L_GetGeometryColumn(ogr_layer))
1542
-
1543
- fid_column = get_string(OGR_L_GetFIDColumn(ogr_layer))
1544
- fid_column_where = fid_column
1545
- # OGR_L_GetFIDColumn returns the column name if it is a custom column,
1546
- # or "" if not. For arrow, the default column name used to return the FID data
1547
- # read is "OGC_FID". When accessing the underlying datasource like when using a
1548
- # where clause, the default column name is "FID".
1549
- if fid_column == "":
1550
- fid_column = "OGC_FID"
1551
- fid_column_where = "FID"
1552
-
1553
- # Use fids list to create a where clause, as arrow doesn't support direct fid
1554
- # filtering.
1555
- if fids is not None:
1556
- IF CTE_GDAL_VERSION < (3, 8, 0):
1557
- driver = get_driver(ogr_dataset)
1558
- if driver not in {"GPKG", "GeoJSON"}:
1559
- warnings.warn(
1560
- "Using 'fids' and 'use_arrow=True' with GDAL < 3.8 can be slow "
1561
- "for some drivers. Upgrading GDAL or using 'use_arrow=False' "
1562
- "can avoid this.",
1563
- stacklevel=2,
1564
- )
1565
-
1566
- fids_str = ",".join([str(fid) for fid in fids])
1567
- where = f"{fid_column_where} IN ({fids_str})"
1568
-
1569
- # Apply the attribute filter
1570
- if where is not None and where != "":
1571
- try:
1572
- apply_where_filter(ogr_layer, where)
1573
- except ValueError as ex:
1574
- if fids is not None and str(ex).startswith("Invalid SQL query"):
1575
- # If fids is not None, the where being applied is the one formatted
1576
- # above.
1577
- raise ValueError(
1578
- f"error applying filter for {len(fids)} fids; max. number for "
1579
- f"drivers with default SQL dialect 'OGRSQL' is 4997"
1580
- ) from ex
1581
-
1582
- raise
1583
-
1584
- # Apply the spatial filter
1585
- if bbox is not None:
1586
- apply_bbox_filter(ogr_layer, bbox)
1587
-
1588
- elif mask is not None:
1589
- apply_geometry_filter(ogr_layer, mask)
1590
-
1591
- # Limit to specified columns
1592
- if ignored_fields:
1593
- for field in ignored_fields:
1594
- field_b = field.encode("utf-8")
1595
- field_c = field_b
1596
- fields_c = CSLAddString(fields_c, field_c)
1597
-
1598
- OGR_L_SetIgnoredFields(ogr_layer, <const char**>fields_c)
1599
-
1600
- if not return_fids:
1601
- options = CSLSetNameValue(options, "INCLUDE_FID", "NO")
1602
-
1603
- if batch_size > 0:
1604
- options = CSLSetNameValue(
1605
- options,
1606
- "MAX_FEATURES_IN_BATCH",
1607
- str(batch_size).encode('UTF-8')
1608
- )
1609
-
1610
- # Default to geoarrow metadata encoding
1611
- IF CTE_GDAL_VERSION >= (3, 8, 0):
1612
- options = CSLSetNameValue(
1613
- options,
1614
- "GEOMETRY_METADATA_ENCODING",
1615
- "GEOARROW".encode('UTF-8')
1616
- )
1617
-
1618
- # make sure layer is read from beginning
1619
- OGR_L_ResetReading(ogr_layer)
1620
-
1621
- # allocate the stream struct and wrap in capsule to ensure clean-up on error
1622
- capsule = alloc_c_stream(&stream)
1623
-
1624
- if not OGR_L_GetArrowStream(ogr_layer, stream, options):
1625
- raise RuntimeError("Failed to open ArrowArrayStream from Layer")
1626
-
1627
- if skip_features:
1628
- # only supported for GDAL >= 3.8.0; have to do this after getting
1629
- # the Arrow stream
1630
- OGR_L_SetNextByIndex(ogr_layer, skip_features)
1631
-
1632
- if use_pyarrow:
1633
- import pyarrow as pa
1634
-
1635
- reader = pa.RecordBatchStreamReader._import_from_c(<uintptr_t> stream)
1636
- else:
1637
- reader = _ArrowStream(capsule)
1638
-
1639
- meta = {
1640
- 'crs': crs,
1641
- 'encoding': encoding,
1642
- 'fields': fields[:,2], # return only names
1643
- 'geometry_type': geometry_type,
1644
- 'geometry_name': geometry_name,
1645
- 'fid_column': fid_column,
1646
- }
1647
-
1648
- # stream has to be consumed before the Dataset is closed
1649
- yield meta, reader
1650
-
1651
- finally:
1652
- if use_pyarrow and reader is not None:
1653
- # Mark reader as closed to prevent reading batches
1654
- reader.close()
1655
-
1656
- # `stream` will be freed through `capsule` destructor
1657
-
1658
- CSLDestroy(options)
1659
- if fields_c != NULL:
1660
- CSLDestroy(fields_c)
1661
- fields_c = NULL
1662
-
1663
- if dataset_options != NULL:
1664
- CSLDestroy(dataset_options)
1665
- dataset_options = NULL
1666
-
1667
- if ogr_dataset != NULL:
1668
- if sql is not None:
1669
- GDALDatasetReleaseResultSet(ogr_dataset, ogr_layer)
1670
-
1671
- GDALClose(ogr_dataset)
1672
- ogr_dataset = NULL
1673
-
1674
- # reset SHAPE_ENCODING config parameter if temporarily set above
1675
- if override_shape_encoding:
1676
- CPLSetThreadLocalConfigOption("SHAPE_ENCODING", prev_shape_encoding)
1677
-
1678
- if prev_shape_encoding != NULL:
1679
- CPLFree(<void*>prev_shape_encoding)
1680
- prev_shape_encoding = NULL
1681
-
1682
- if is_vsimem:
1683
- delete_vsimem_file(path)
1684
-
1685
-
1686
- def ogr_read_bounds(
1687
- object path_or_buffer,
1688
- object layer=None,
1689
- object encoding=None,
1690
- int read_geometry=True,
1691
- int force_2d=False,
1692
- object columns=None,
1693
- int skip_features=0,
1694
- int max_features=0,
1695
- object where=None,
1696
- tuple bbox=None,
1697
- object mask=None):
1698
-
1699
- cdef int err = 0
1700
- cdef bint is_vsimem = isinstance(path_or_buffer, bytes)
1701
- cdef const char *path_c = NULL
1702
- cdef const char *where_c = NULL
1703
- cdef OGRDataSourceH ogr_dataset = NULL
1704
- cdef OGRLayerH ogr_layer = NULL
1705
- cdef int feature_count = 0
1706
- cdef double xmin, ymin, xmax, ymax
1707
-
1708
- if bbox and mask:
1709
- raise ValueError("cannot set both 'bbox' and 'mask'")
1710
-
1711
- if skip_features < 0:
1712
- raise ValueError("'skip_features' must be >= 0")
1713
-
1714
- if max_features < 0:
1715
- raise ValueError("'max_features' must be >= 0")
1716
-
1717
- try:
1718
- path = read_buffer_to_vsimem(path_or_buffer) if is_vsimem else path_or_buffer
1719
- ogr_dataset = ogr_open(path.encode('UTF-8'), 0, NULL)
1720
-
1721
- if layer is None:
1722
- layer = get_default_layer(ogr_dataset)
1723
-
1724
- ogr_layer = get_ogr_layer(ogr_dataset, layer)
1725
-
1726
- # Apply the attribute filter
1727
- if where is not None and where != "":
1728
- apply_where_filter(ogr_layer, where)
1729
-
1730
- # Apply the spatial filter
1731
- if bbox is not None:
1732
- apply_bbox_filter(ogr_layer, bbox)
1733
-
1734
- elif mask is not None:
1735
- apply_geometry_filter(ogr_layer, mask)
1736
-
1737
- # Limit feature range to available range
1738
- skip_features, num_features = validate_feature_range(ogr_layer, skip_features, max_features)
1739
-
1740
- bounds = get_bounds(ogr_layer, skip_features, num_features)
1741
-
1742
- finally:
1743
- if ogr_dataset != NULL:
1744
- GDALClose(ogr_dataset)
1745
- ogr_dataset = NULL
1746
-
1747
- if is_vsimem:
1748
- delete_vsimem_file(path)
1749
-
1750
- return bounds
1751
-
1752
-
1753
- def ogr_read_info(
1754
- object path_or_buffer,
1755
- dataset_kwargs,
1756
- object layer=None,
1757
- object encoding=None,
1758
- int force_feature_count=False,
1759
- int force_total_bounds=False):
1760
-
1761
- cdef bint is_vsimem = isinstance(path_or_buffer, bytes)
1762
- cdef const char *path_c = NULL
1763
- cdef char **dataset_options = NULL
1764
- cdef OGRDataSourceH ogr_dataset = NULL
1765
- cdef OGRLayerH ogr_layer = NULL
1766
- cdef const char *prev_shape_encoding = NULL
1767
- cdef bint override_shape_encoding = False
1768
-
1769
- try:
1770
- path = read_buffer_to_vsimem(path_or_buffer) if is_vsimem else path_or_buffer
1771
-
1772
- if encoding:
1773
- override_shape_encoding = True
1774
- prev_shape_encoding = override_threadlocal_config_option("SHAPE_ENCODING", encoding)
1775
-
1776
- dataset_options = dict_to_options(dataset_kwargs)
1777
- ogr_dataset = ogr_open(path.encode('UTF-8'), 0, dataset_options)
1778
-
1779
- if layer is None:
1780
- layer = get_default_layer(ogr_dataset)
1781
- ogr_layer = get_ogr_layer(ogr_dataset, layer)
1782
-
1783
- if encoding and get_driver(ogr_dataset) == "ESRI Shapefile":
1784
- encoding = "UTF-8"
1785
- else:
1786
- encoding = encoding or detect_encoding(ogr_dataset, ogr_layer)
1787
-
1788
- fields = get_fields(ogr_layer, encoding)
1789
-
1790
- meta = {
1791
- "layer_name": get_string(OGR_L_GetName(ogr_layer)),
1792
- "crs": get_crs(ogr_layer),
1793
- "encoding": encoding,
1794
- "fields": fields[:,2], # return only names
1795
- "dtypes": fields[:,3],
1796
- "fid_column": get_string(OGR_L_GetFIDColumn(ogr_layer)),
1797
- "geometry_name": get_string(OGR_L_GetGeometryColumn(ogr_layer)),
1798
- "geometry_type": get_geometry_type(ogr_layer),
1799
- "features": get_feature_count(ogr_layer, force_feature_count),
1800
- "total_bounds": get_total_bounds(ogr_layer, force_total_bounds),
1801
- "driver": get_driver(ogr_dataset),
1802
- "capabilities": {
1803
- "random_read": OGR_L_TestCapability(ogr_layer, OLCRandomRead) == 1,
1804
- "fast_set_next_by_index": OGR_L_TestCapability(ogr_layer, OLCFastSetNextByIndex) == 1,
1805
- "fast_spatial_filter": OGR_L_TestCapability(ogr_layer, OLCFastSpatialFilter) == 1,
1806
- "fast_feature_count": OGR_L_TestCapability(ogr_layer, OLCFastFeatureCount) == 1,
1807
- "fast_total_bounds": OGR_L_TestCapability(ogr_layer, OLCFastGetExtent) == 1,
1808
- },
1809
- "layer_metadata": get_metadata(ogr_layer),
1810
- "dataset_metadata": get_metadata(ogr_dataset),
1811
- }
1812
-
1813
- finally:
1814
- if dataset_options != NULL:
1815
- CSLDestroy(dataset_options)
1816
- dataset_options = NULL
1817
-
1818
- if ogr_dataset != NULL:
1819
- GDALClose(ogr_dataset)
1820
- ogr_dataset = NULL
1821
-
1822
- # reset SHAPE_ENCODING config parameter if temporarily set above
1823
- if override_shape_encoding:
1824
- CPLSetThreadLocalConfigOption("SHAPE_ENCODING", prev_shape_encoding)
1825
-
1826
- if prev_shape_encoding != NULL:
1827
- CPLFree(<void*>prev_shape_encoding)
1828
-
1829
- if is_vsimem:
1830
- delete_vsimem_file(path)
1831
-
1832
- return meta
1833
-
1834
-
1835
- def ogr_list_layers(object path_or_buffer):
1836
- cdef bint is_vsimem = isinstance(path_or_buffer, bytes)
1837
- cdef const char *path_c = NULL
1838
- cdef OGRDataSourceH ogr_dataset = NULL
1839
-
1840
- try:
1841
- path = read_buffer_to_vsimem(path_or_buffer) if is_vsimem else path_or_buffer
1842
- ogr_dataset = ogr_open(path.encode('UTF-8'), 0, NULL)
1843
- layers = get_layer_names(ogr_dataset)
1844
-
1845
- finally:
1846
- if ogr_dataset != NULL:
1847
- GDALClose(ogr_dataset)
1848
- ogr_dataset = NULL
1849
-
1850
- if is_vsimem:
1851
- delete_vsimem_file(path)
1852
-
1853
- return layers
1854
-
1855
-
1856
- cdef str get_default_layer(OGRDataSourceH ogr_dataset):
1857
- """ Get the layer in the dataset that is read by default.
1858
-
1859
- The caller is responsible for closing the dataset.
1860
-
1861
- Parameters
1862
- ----------
1863
- ogr_dataset : pointer to open OGR dataset
1864
-
1865
- Returns
1866
- -------
1867
- str
1868
- the name of the default layer to be read.
1869
-
1870
- """
1871
- layers = get_layer_names(ogr_dataset)
1872
- first_layer_name = layers[0][0]
1873
-
1874
- if len(layers) > 1:
1875
- dataset_name = os.path.basename(get_string(OGR_DS_GetName(ogr_dataset)))
1876
-
1877
- other_layer_names = ', '.join([f"'{l}'" for l in layers[1:, 0]])
1878
- warnings.warn(
1879
- f"More than one layer found in '{dataset_name}': '{first_layer_name}' "
1880
- f"(default), {other_layer_names}. Specify layer parameter to avoid this "
1881
- "warning.",
1882
- stacklevel=2,
1883
- )
1884
-
1885
- return first_layer_name
1886
-
1887
-
1888
- cdef get_layer_names(OGRDataSourceH ogr_dataset):
1889
- """ Get the layers in the dataset.
1890
-
1891
- The caller is responsible for closing the dataset.
1892
-
1893
- Parameters
1894
- ----------
1895
- ogr_dataset : pointer to open OGR dataset
1896
-
1897
- Returns
1898
- -------
1899
- ndarray(n)
1900
- array of layer names
1901
-
1902
- """
1903
- cdef OGRLayerH ogr_layer = NULL
1904
-
1905
- layer_count = GDALDatasetGetLayerCount(ogr_dataset)
1906
-
1907
- data = np.empty(shape=(layer_count, 2), dtype=object)
1908
- data_view = data[:]
1909
- for i in range(layer_count):
1910
- ogr_layer = GDALDatasetGetLayer(ogr_dataset, i)
1911
-
1912
- data_view[i, 0] = get_string(OGR_L_GetName(ogr_layer))
1913
- data_view[i, 1] = get_geometry_type(ogr_layer)
1914
-
1915
- return data
1916
-
1917
-
1918
- # NOTE: all modes are write-only
1919
- # some data sources have multiple layers
1920
- cdef void * ogr_create(const char* path_c, const char* driver_c, char** options) except NULL:
1921
- cdef void *ogr_driver = NULL
1922
- cdef OGRDataSourceH ogr_dataset = NULL
1923
-
1924
- # Get the driver
1925
- try:
1926
- ogr_driver = exc_wrap_pointer(GDALGetDriverByName(driver_c))
1927
-
1928
- except NullPointerError:
1929
- raise DataSourceError(f"Could not obtain driver: {driver_c.decode('utf-8')} (check that it was installed correctly into GDAL)")
1930
-
1931
- except CPLE_BaseError as exc:
1932
- raise DataSourceError(str(exc))
1933
-
1934
- # Create the dataset
1935
- try:
1936
- ogr_dataset = exc_wrap_pointer(GDALCreate(ogr_driver, path_c, 0, 0, 0, GDT_Unknown, options))
1937
-
1938
- except NullPointerError:
1939
- raise DataSourceError(f"Failed to create dataset with driver: {path_c.decode('utf-8')} {driver_c.decode('utf-8')}") from None
1940
-
1941
- except CPLE_NotSupportedError as exc:
1942
- raise DataSourceError(f"Driver {driver_c.decode('utf-8')} does not support write functionality") from None
1943
-
1944
- except CPLE_BaseError as exc:
1945
- raise DataSourceError(str(exc))
1946
-
1947
- return ogr_dataset
1948
-
1949
-
1950
- cdef void * create_crs(str crs) except NULL:
1951
- cdef char *crs_c = NULL
1952
- cdef void *ogr_crs = NULL
1953
-
1954
- crs_b = crs.encode('UTF-8')
1955
- crs_c = crs_b
1956
-
1957
- try:
1958
- ogr_crs = exc_wrap_pointer(OSRNewSpatialReference(NULL))
1959
- err = OSRSetFromUserInput(ogr_crs, crs_c)
1960
- if err:
1961
- raise CRSError("Could not set CRS: {}".format(crs_c.decode('UTF-8'))) from None
1962
-
1963
- except CPLE_BaseError as exc:
1964
- OSRRelease(ogr_crs)
1965
- raise CRSError("Could not set CRS: {}".format(exc))
1966
-
1967
- return ogr_crs
1968
-
1969
-
1970
- cdef infer_field_types(list dtypes):
1971
- cdef int field_type = 0
1972
- cdef int field_subtype = 0
1973
- cdef int width = 0
1974
- cdef int precision = 0
1975
-
1976
- field_types = np.zeros(shape=(len(dtypes), 4), dtype=int)
1977
- field_types_view = field_types[:]
1978
-
1979
- for i in range(len(dtypes)):
1980
- dtype = dtypes[i]
1981
-
1982
- if dtype.name in DTYPE_OGR_FIELD_TYPES:
1983
- field_type, field_subtype = DTYPE_OGR_FIELD_TYPES[dtype.name]
1984
- field_types_view[i, 0] = field_type
1985
- field_types_view[i, 1] = field_subtype
1986
-
1987
- # Determine field type from ndarray values
1988
- elif dtype == np.dtype('O'):
1989
- # Object type is ambiguous: could be a string or binary data
1990
- # TODO: handle binary or other types
1991
- # for now fall back to string (same as Geopandas)
1992
- field_types_view[i, 0] = OFTString
1993
- # Convert to unicode string then take itemsize
1994
- # TODO: better implementation of this
1995
- # width = values.astype(np.str_).dtype.itemsize // 4
1996
- # DO WE NEED WIDTH HERE?
1997
-
1998
- elif dtype.type is np.str_ or dtype.type is np.bytes_:
1999
- field_types_view[i, 0] = OFTString
2000
- field_types_view[i, 2] = int(dtype.itemsize // 4)
2001
-
2002
- elif dtype.name.startswith("datetime64"):
2003
- # datetime dtype precision is specified with eg. [ms], but this isn't
2004
- # usefull when writing to gdal.
2005
- field_type, field_subtype = DTYPE_OGR_FIELD_TYPES["datetime64"]
2006
- field_types_view[i, 0] = field_type
2007
- field_types_view[i, 1] = field_subtype
2008
-
2009
- else:
2010
- raise NotImplementedError(f"field type is not supported {dtype.name} (field index: {i})")
2011
-
2012
- return field_types
2013
-
2014
-
2015
- cdef create_ogr_dataset_layer(
2016
- str path,
2017
- bint is_vsi,
2018
- str layer,
2019
- str driver,
2020
- str crs,
2021
- str geometry_type,
2022
- str encoding,
2023
- object dataset_kwargs,
2024
- object layer_kwargs,
2025
- bint append,
2026
- dataset_metadata,
2027
- layer_metadata,
2028
- OGRDataSourceH* ogr_dataset_out,
2029
- OGRLayerH* ogr_layer_out,
2030
- ):
2031
- """
2032
- Construct the OGRDataSource and OGRLayer objects based on input
2033
- path and layer.
2034
-
2035
- If the file already exists, will open the existing dataset and overwrite
2036
- or append the layer (depending on `append`), otherwise will create a new
2037
- dataset.
2038
-
2039
- Fills in the `ogr_dataset_out` and `ogr_layer_out` pointers passed as
2040
- parameter with initialized objects (or raise error is it fails to do so).
2041
- It is the responsibility of the caller to clean up those objects after use.
2042
- Returns whether a new layer was created or not (when the layer was created,
2043
- the caller still needs to set up the layer definition, i.e. create the
2044
- fields).
2045
-
2046
- Parameters
2047
- ----------
2048
- encoding : str
2049
- Only used if `driver` is "ESRI Shapefile". If not None, it overrules the default
2050
- shapefile encoding, which is "UTF-8" in pyogrio.
2051
-
2052
- Returns
2053
- -------
2054
- bool :
2055
- Whether a new layer was created, or False if we are appending to an
2056
- existing layer.
2057
- """
2058
- cdef const char *path_c = NULL
2059
- cdef const char *layer_c = NULL
2060
- cdef const char *driver_c = NULL
2061
- cdef const char *crs_c = NULL
2062
- cdef const char *encoding_c = NULL
2063
- cdef char **dataset_options = NULL
2064
- cdef char **layer_options = NULL
2065
- cdef const char *ogr_name = NULL
2066
- cdef OGRDataSourceH ogr_dataset = NULL
2067
- cdef OGRLayerH ogr_layer = NULL
2068
- cdef OGRSpatialReferenceH ogr_crs = NULL
2069
- cdef OGRwkbGeometryType geometry_code
2070
- cdef int layer_idx = -1
2071
-
2072
- path_b = path.encode('UTF-8')
2073
- path_c = path_b
2074
-
2075
- driver_b = driver.encode('UTF-8')
2076
- driver_c = driver_b
2077
-
2078
- # in-memory dataset is always created from scratch
2079
- path_exists = os.path.exists(path) if not is_vsi else False
2080
-
2081
- if not layer:
2082
- layer = os.path.splitext(os.path.split(path)[1])[0]
2083
-
2084
- # if shapefile, GeoJSON, or FlatGeobuf, always delete first
2085
- # for other types, check if we can create layers
2086
- # GPKG might be the only multi-layer writeable type. TODO: check this
2087
- if driver in ('ESRI Shapefile', 'GeoJSON', 'GeoJSONSeq', 'FlatGeobuf') and path_exists:
2088
- if not append:
2089
- os.unlink(path)
2090
- path_exists = False
2091
-
2092
- layer_exists = False
2093
- if path_exists:
2094
- try:
2095
- ogr_dataset = ogr_open(path_c, 1, NULL)
2096
-
2097
- for i in range(GDALDatasetGetLayerCount(ogr_dataset)):
2098
- name = OGR_L_GetName(GDALDatasetGetLayer(ogr_dataset, i))
2099
- if layer == name.decode('UTF-8'):
2100
- layer_idx = i
2101
- break
2102
-
2103
- if layer_idx >= 0:
2104
- layer_exists = True
2105
-
2106
- if not append:
2107
- GDALDatasetDeleteLayer(ogr_dataset, layer_idx)
2108
-
2109
- except DataSourceError as exc:
2110
- # open failed
2111
- if append:
2112
- raise exc
2113
-
2114
- # otherwise create from scratch
2115
- if is_vsi:
2116
- VSIUnlink(path_c)
2117
- else:
2118
- os.unlink(path)
2119
-
2120
- ogr_dataset = NULL
2121
-
2122
- # either it didn't exist or could not open it in write mode
2123
- if ogr_dataset == NULL:
2124
- dataset_options = dict_to_options(dataset_kwargs)
2125
- ogr_dataset = ogr_create(path_c, driver_c, dataset_options)
2126
-
2127
- # if we are not appending to an existing layer, we need to create
2128
- # the layer and all associated properties (CRS, field defs, etc)
2129
- create_layer = not (append and layer_exists)
2130
-
2131
- ### Create the layer
2132
- if create_layer:
2133
- # Create the CRS
2134
- if crs is not None:
2135
- try:
2136
- ogr_crs = create_crs(crs)
2137
- # force geographic CRS to use lon, lat order and ignore axis order specified by CRS, in order
2138
- # to correctly write KML and GeoJSON coordinates in correct order
2139
- OSRSetAxisMappingStrategy(ogr_crs, OAMS_TRADITIONAL_GIS_ORDER)
2140
-
2141
-
2142
- except Exception as exc:
2143
- if dataset_options != NULL:
2144
- CSLDestroy(dataset_options)
2145
- dataset_options = NULL
2146
-
2147
- GDALClose(ogr_dataset)
2148
- ogr_dataset = NULL
2149
-
2150
- raise exc
2151
-
2152
- # Setup other layer creation options
2153
- for k, v in layer_kwargs.items():
2154
- k = k.encode('UTF-8')
2155
- v = v.encode('UTF-8')
2156
- layer_options = CSLAddNameValue(layer_options, <const char *>k, <const char *>v)
2157
-
2158
- if driver == 'ESRI Shapefile':
2159
- # ENCODING option must be set for shapefiles to properly write *.cpg
2160
- # file containing the encoding; this is not a supported option for
2161
- # other drivers. This is done after setting general options above
2162
- # to override ENCODING if passed by the user as a layer option.
2163
- if encoding and "ENCODING" in layer_kwargs:
2164
- raise ValueError('cannot provide both encoding parameter and "ENCODING" layer creation option; use the encoding parameter')
2165
-
2166
- # always write to UTF-8 if encoding is not set
2167
- encoding = encoding or "UTF-8"
2168
- encoding_b = encoding.upper().encode('UTF-8')
2169
- encoding_c = encoding_b
2170
- layer_options = CSLSetNameValue(layer_options, "ENCODING", encoding_c)
2171
-
2172
-
2173
- ### Get geometry type
2174
- # TODO: this is brittle for 3D / ZM / M types
2175
- # TODO: fail on M / ZM types
2176
- geometry_code = get_geometry_type_code(geometry_type)
2177
-
2178
- try:
2179
- if create_layer:
2180
- layer_b = layer.encode('UTF-8')
2181
- layer_c = layer_b
2182
-
2183
- ogr_layer = exc_wrap_pointer(
2184
- GDALDatasetCreateLayer(ogr_dataset, layer_c, ogr_crs,
2185
- geometry_code, layer_options))
2186
-
2187
- else:
2188
- ogr_layer = exc_wrap_pointer(get_ogr_layer(ogr_dataset, layer))
2189
-
2190
- # Set dataset and layer metadata
2191
- set_metadata(ogr_dataset, dataset_metadata)
2192
- set_metadata(ogr_layer, layer_metadata)
2193
-
2194
- except Exception as exc:
2195
- GDALClose(ogr_dataset)
2196
- ogr_dataset = NULL
2197
- raise DataLayerError(str(exc))
2198
-
2199
- finally:
2200
- if ogr_crs != NULL:
2201
- OSRRelease(ogr_crs)
2202
- ogr_crs = NULL
2203
-
2204
- if dataset_options != NULL:
2205
- CSLDestroy(dataset_options)
2206
- dataset_options = NULL
2207
-
2208
- if layer_options != NULL:
2209
- CSLDestroy(layer_options)
2210
- layer_options = NULL
2211
-
2212
- ogr_dataset_out[0] = ogr_dataset
2213
- ogr_layer_out[0] = ogr_layer
2214
-
2215
- return create_layer
2216
-
2217
-
2218
- # TODO: set geometry and field data as memory views?
2219
- def ogr_write(
2220
- object path_or_fp,
2221
- str layer,
2222
- str driver,
2223
- geometry,
2224
- fields,
2225
- field_data,
2226
- field_mask,
2227
- str crs,
2228
- str geometry_type,
2229
- str encoding,
2230
- object dataset_kwargs,
2231
- object layer_kwargs,
2232
- bint promote_to_multi=False,
2233
- bint nan_as_null=True,
2234
- bint append=False,
2235
- dataset_metadata=None,
2236
- layer_metadata=None,
2237
- gdal_tz_offsets=None
2238
- ):
2239
- cdef OGRDataSourceH ogr_dataset = NULL
2240
- cdef OGRLayerH ogr_layer = NULL
2241
- cdef OGRFeatureH ogr_feature = NULL
2242
- cdef OGRGeometryH ogr_geometry = NULL
2243
- cdef OGRGeometryH ogr_geometry_multi = NULL
2244
- cdef OGRFeatureDefnH ogr_featuredef = NULL
2245
- cdef OGRFieldDefnH ogr_fielddef = NULL
2246
- cdef unsigned char *wkb_buffer = NULL
2247
- cdef int supports_transactions = 0
2248
- cdef int err = 0
2249
- cdef int i = 0
2250
- cdef int num_records = -1
2251
- cdef int num_field_data = len(field_data) if field_data is not None else 0
2252
- cdef int num_fields = len(fields) if fields is not None else 0
2253
- cdef bint is_vsi = False
2254
-
2255
- if num_fields != num_field_data:
2256
- raise ValueError("field_data array needs to be same length as fields array")
2257
-
2258
- if num_fields == 0 and geometry is None:
2259
- raise ValueError("You must provide at least a geometry column or a field")
2260
-
2261
- if num_fields > 0:
2262
- num_records = len(field_data[0])
2263
- for i in range(1, len(field_data)):
2264
- if len(field_data[i]) != num_records:
2265
- raise ValueError("field_data arrays must be same length")
2266
-
2267
- if geometry is None:
2268
- # If no geometry data, we ignore the geometry_type and don't create a geometry
2269
- # column
2270
- geometry_type = None
2271
- else:
2272
- if num_fields > 0:
2273
- if len(geometry) != num_records:
2274
- raise ValueError(
2275
- "field_data arrays must be same length as geometry array"
2276
- )
2277
- else:
2278
- num_records = len(geometry)
2279
-
2280
- if field_mask is not None:
2281
- if len(field_data) != len(field_mask):
2282
- raise ValueError("field_data and field_mask must be same length")
2283
- for i in range(0, len(field_mask)):
2284
- if field_mask[i] is not None and len(field_mask[i]) != num_records:
2285
- raise ValueError("field_mask arrays must be same length as geometry array")
2286
- else:
2287
- field_mask = [None] * num_fields
2288
-
2289
- if gdal_tz_offsets is None:
2290
- gdal_tz_offsets = {}
2291
-
2292
- try:
2293
- # Setup in-memory handler if needed
2294
- path = get_ogr_vsimem_write_path(path_or_fp, driver)
2295
- is_vsi = path.startswith('/vsimem/')
2296
-
2297
- # Setup dataset and layer
2298
- layer_created = create_ogr_dataset_layer(
2299
- path, is_vsi, layer, driver, crs, geometry_type, encoding,
2300
- dataset_kwargs, layer_kwargs, append,
2301
- dataset_metadata, layer_metadata,
2302
- &ogr_dataset, &ogr_layer,
2303
- )
2304
-
2305
- if driver == 'ESRI Shapefile':
2306
- # force encoding for remaining operations to be in UTF-8 (even if user
2307
- # provides an encoding) because GDAL will automatically convert those to
2308
- # the target encoding because ENCODING is set as a layer creation option
2309
- encoding = "UTF-8"
2310
-
2311
- else:
2312
- # Now the dataset and layer have been created, we can properly determine the
2313
- # encoding. It is derived from the user, from the dataset capabilities / type,
2314
- # or from the system locale
2315
- encoding = encoding or detect_encoding(ogr_dataset, ogr_layer)
2316
-
2317
- ### Create the fields
2318
- field_types = None
2319
- if num_fields > 0:
2320
- field_types = infer_field_types([field.dtype for field in field_data])
2321
-
2322
- if layer_created:
2323
- for i in range(num_fields):
2324
- field_type, field_subtype, width, precision = field_types[i]
2325
-
2326
- name_b = fields[i].encode(encoding)
2327
- try:
2328
- ogr_fielddef = exc_wrap_pointer(OGR_Fld_Create(name_b, field_type))
2329
-
2330
- # subtypes, see: https://gdal.org/development/rfc/rfc50_ogr_field_subtype.html
2331
- if field_subtype != OFSTNone:
2332
- OGR_Fld_SetSubType(ogr_fielddef, field_subtype)
2333
-
2334
- if width:
2335
- OGR_Fld_SetWidth(ogr_fielddef, width)
2336
-
2337
- # TODO: set precision
2338
-
2339
- exc_wrap_int(OGR_L_CreateField(ogr_layer, ogr_fielddef, 1))
2340
-
2341
- except:
2342
- raise FieldError(f"Error adding field '{fields[i]}' to layer") from None
2343
-
2344
- finally:
2345
- if ogr_fielddef != NULL:
2346
- OGR_Fld_Destroy(ogr_fielddef)
2347
- ogr_fielddef = NULL
2348
-
2349
-
2350
- ### Create the features
2351
- ogr_featuredef = OGR_L_GetLayerDefn(ogr_layer)
2352
-
2353
- supports_transactions = OGR_L_TestCapability(ogr_layer, OLCTransactions)
2354
- if supports_transactions:
2355
- start_transaction(ogr_dataset, 0)
2356
-
2357
- for i in range(num_records):
2358
- # create the feature
2359
- ogr_feature = OGR_F_Create(ogr_featuredef)
2360
- if ogr_feature == NULL:
2361
- raise FeatureError(f"Could not create feature at index {i}") from None
2362
-
2363
- # create the geometry based on specific WKB type (there might be mixed types in geometries)
2364
- # TODO: geometry must not be null or errors
2365
- wkb = None if geometry is None else geometry[i]
2366
- if wkb is not None:
2367
- wkbtype = <int>bytearray(wkb)[1]
2368
- # may need to consider all 4 bytes: int.from_bytes(wkb[0][1:4], byteorder="little")
2369
- # use "little" if the first byte == 1
2370
- ogr_geometry = OGR_G_CreateGeometry(<OGRwkbGeometryType>wkbtype)
2371
- if ogr_geometry == NULL:
2372
- raise GeometryError(f"Could not create geometry at index {i} for WKB type {wkbtype}") from None
2373
-
2374
- # import the WKB
2375
- wkb_buffer = wkb
2376
- err = OGR_G_ImportFromWkb(ogr_geometry, wkb_buffer, len(wkb))
2377
- if err:
2378
- raise GeometryError(f"Could not create geometry from WKB at index {i}") from None
2379
-
2380
- # Convert to multi type
2381
- if promote_to_multi:
2382
- if wkbtype in (wkbPoint, wkbPoint25D, wkbPointM, wkbPointZM):
2383
- ogr_geometry = OGR_G_ForceToMultiPoint(ogr_geometry)
2384
- elif wkbtype in (wkbLineString, wkbLineString25D, wkbLineStringM, wkbLineStringZM):
2385
- ogr_geometry = OGR_G_ForceToMultiLineString(ogr_geometry)
2386
- elif wkbtype in (wkbPolygon, wkbPolygon25D, wkbPolygonM, wkbPolygonZM):
2387
- ogr_geometry = OGR_G_ForceToMultiPolygon(ogr_geometry)
2388
-
2389
- # Set the geometry on the feature
2390
- # this assumes ownership of the geometry and it's cleanup
2391
- err = OGR_F_SetGeometryDirectly(ogr_feature, ogr_geometry)
2392
- ogr_geometry = NULL # to prevent cleanup after this point
2393
- if err:
2394
- raise GeometryError(f"Could not set geometry for feature at index {i}") from None
2395
-
2396
- # Set field values
2397
- for field_idx in range(num_fields):
2398
- field_value = field_data[field_idx][i]
2399
- field_type = field_types[field_idx][0]
2400
-
2401
- mask = field_mask[field_idx]
2402
- if mask is not None and mask[i]:
2403
- OGR_F_SetFieldNull(ogr_feature, field_idx)
2404
-
2405
- elif field_type == OFTString:
2406
- if (
2407
- field_value is None
2408
- or (isinstance(field_value, float) and isnan(field_value))
2409
- ):
2410
- OGR_F_SetFieldNull(ogr_feature, field_idx)
2411
-
2412
- else:
2413
- if not isinstance(field_value, str):
2414
- field_value = str(field_value)
2415
-
2416
- try:
2417
- value_b = field_value.encode(encoding)
2418
- OGR_F_SetFieldString(ogr_feature, field_idx, value_b)
2419
-
2420
- except AttributeError:
2421
- raise ValueError(f"Could not encode value '{field_value}' in field '{fields[field_idx]}' to string")
2422
-
2423
- except Exception:
2424
- raise
2425
-
2426
- elif field_type == OFTInteger:
2427
- OGR_F_SetFieldInteger(ogr_feature, field_idx, field_value)
2428
-
2429
- elif field_type == OFTInteger64:
2430
- OGR_F_SetFieldInteger64(ogr_feature, field_idx, field_value)
2431
-
2432
- elif field_type == OFTReal:
2433
- if nan_as_null and isnan(field_value):
2434
- OGR_F_SetFieldNull(ogr_feature, field_idx)
2435
- else:
2436
- OGR_F_SetFieldDouble(ogr_feature, field_idx, field_value)
2437
-
2438
- elif field_type == OFTDate:
2439
- if np.isnat(field_value):
2440
- OGR_F_SetFieldNull(ogr_feature, field_idx)
2441
- else:
2442
- datetime = field_value.item()
2443
- OGR_F_SetFieldDateTimeEx(
2444
- ogr_feature,
2445
- field_idx,
2446
- datetime.year,
2447
- datetime.month,
2448
- datetime.day,
2449
- 0,
2450
- 0,
2451
- 0.0,
2452
- 0
2453
- )
2454
-
2455
- elif field_type == OFTDateTime:
2456
- if np.isnat(field_value):
2457
- OGR_F_SetFieldNull(ogr_feature, field_idx)
2458
- else:
2459
- datetime = field_value.astype("datetime64[ms]").item()
2460
- tz_array = gdal_tz_offsets.get(fields[field_idx], None)
2461
- if tz_array is None:
2462
- gdal_tz = 0
2463
- else:
2464
- gdal_tz = tz_array[i]
2465
- OGR_F_SetFieldDateTimeEx(
2466
- ogr_feature,
2467
- field_idx,
2468
- datetime.year,
2469
- datetime.month,
2470
- datetime.day,
2471
- datetime.hour,
2472
- datetime.minute,
2473
- datetime.second + datetime.microsecond / 10**6,
2474
- gdal_tz
2475
- )
2476
-
2477
- else:
2478
- raise NotImplementedError(f"OGR field type is not supported for writing: {field_type}")
2479
-
2480
-
2481
- # Add feature to the layer
2482
- try:
2483
- exc_wrap_int(OGR_L_CreateFeature(ogr_layer, ogr_feature))
2484
-
2485
- except CPLE_BaseError as exc:
2486
- raise FeatureError(f"Could not add feature to layer at index {i}: {exc}") from None
2487
-
2488
- OGR_F_Destroy(ogr_feature)
2489
- ogr_feature = NULL
2490
-
2491
-
2492
- if supports_transactions:
2493
- commit_transaction(ogr_dataset)
2494
-
2495
- log.info(f"Created {num_records:,} records" )
2496
-
2497
- # close dataset to force driver to flush data
2498
- exc = ogr_close(ogr_dataset)
2499
- ogr_dataset = NULL
2500
- if exc:
2501
- raise DataSourceError(f"Failed to write features to dataset {path}; {exc}")
2502
-
2503
- # copy in-memory file back to path_or_fp object
2504
- if is_vsi:
2505
- read_vsimem_to_buffer(path, path_or_fp)
2506
-
2507
- finally:
2508
- ### Final cleanup
2509
- # make sure that all objects allocated above are released if exceptions
2510
- # are raised, and the dataset is closed
2511
- if ogr_fielddef != NULL:
2512
- OGR_Fld_Destroy(ogr_fielddef)
2513
- ogr_fielddef = NULL
2514
-
2515
- if ogr_feature != NULL:
2516
- OGR_F_Destroy(ogr_feature)
2517
- ogr_feature = NULL
2518
-
2519
- if ogr_geometry != NULL:
2520
- OGR_G_DestroyGeometry(ogr_geometry)
2521
- ogr_geometry = NULL
2522
-
2523
- if ogr_dataset != NULL:
2524
- ogr_close(ogr_dataset)
2525
-
2526
- if is_vsi:
2527
- delete_vsimem_file(path)
2528
-
2529
-
2530
- def ogr_write_arrow(
2531
- object path_or_fp,
2532
- str layer,
2533
- str driver,
2534
- object arrow_obj,
2535
- str crs,
2536
- str geometry_type,
2537
- str geometry_name,
2538
- str encoding,
2539
- object dataset_kwargs,
2540
- object layer_kwargs,
2541
- bint append=False,
2542
- dataset_metadata=None,
2543
- layer_metadata=None,
2544
- ):
2545
- IF CTE_GDAL_VERSION < (3, 8, 0):
2546
- raise RuntimeError("Need GDAL>=3.8 for Arrow write support")
2547
-
2548
- cdef OGRDataSourceH ogr_dataset = NULL
2549
- cdef OGRLayerH ogr_layer = NULL
2550
- cdef char **options = NULL
2551
- cdef bint is_vsi = False
2552
- cdef ArrowArrayStream* stream = NULL
2553
- cdef ArrowSchema schema
2554
- cdef ArrowArray array
2555
-
2556
- schema.release = NULL
2557
- array.release = NULL
2558
-
2559
- try:
2560
- path = get_ogr_vsimem_write_path(path_or_fp, driver)
2561
- is_vsi = path.startswith('/vsimem/')
2562
-
2563
- layer_created = create_ogr_dataset_layer(
2564
- path, is_vsi, layer, driver, crs, geometry_type, encoding,
2565
- dataset_kwargs, layer_kwargs, append,
2566
- dataset_metadata, layer_metadata,
2567
- &ogr_dataset, &ogr_layer,
2568
- )
2569
-
2570
- # only shapefile supports non-UTF encoding because ENCODING option is set
2571
- # during dataset creation and GDAL auto-translates from UTF-8 values to that
2572
- # encoding
2573
- if encoding and encoding.replace('-','').upper() != 'UTF8' and driver != 'ESRI Shapefile':
2574
- raise ValueError("non-UTF-8 encoding is not supported for Arrow; use the non-Arrow interface instead")
2575
-
2576
- if geometry_name:
2577
- opts = {"GEOMETRY_NAME": geometry_name}
2578
- else:
2579
- opts = {}
2580
-
2581
- options = dict_to_options(opts)
2582
-
2583
- stream_capsule = arrow_obj.__arrow_c_stream__()
2584
- stream = <ArrowArrayStream*>PyCapsule_GetPointer(
2585
- stream_capsule, "arrow_array_stream"
2586
- )
2587
-
2588
- if stream == NULL:
2589
- raise RuntimeError("Could not extract valid Arrow array stream.")
2590
-
2591
- if stream.release == NULL:
2592
- raise RuntimeError("Arrow array stream was already released.")
2593
-
2594
- if stream.get_schema(stream, &schema) != 0:
2595
- raise RuntimeError("Could not get Arrow schema from stream.")
2596
-
2597
- if layer_created:
2598
- create_fields_from_arrow_schema(ogr_layer, &schema, options, geometry_name)
2599
-
2600
- while True:
2601
- if stream.get_next(stream, &array) != 0:
2602
- raise RuntimeError("Error while accessing batch from stream.")
2603
-
2604
- # We've reached the end of the stream
2605
- if array.release == NULL:
2606
- break
2607
-
2608
- if not OGR_L_WriteArrowBatch(ogr_layer, &schema, &array, options):
2609
- exc = exc_check()
2610
- gdal_msg = f": {str(exc)}" if exc else "."
2611
- raise DataLayerError(
2612
- f"Error while writing batch to OGR layer{gdal_msg}"
2613
- )
2614
-
2615
- if array.release != NULL:
2616
- array.release(&array)
2617
-
2618
- # close dataset to force driver to flush data
2619
- exc = ogr_close(ogr_dataset)
2620
- ogr_dataset = NULL
2621
- if exc:
2622
- raise DataSourceError(f"Failed to write features to dataset {path}; {exc}")
2623
-
2624
- # copy in-memory file back to path_or_fp object
2625
- if is_vsi:
2626
- read_vsimem_to_buffer(path, path_or_fp)
2627
-
2628
- finally:
2629
- if stream != NULL and stream.release != NULL:
2630
- stream.release(stream)
2631
-
2632
- if schema.release != NULL:
2633
- schema.release(&schema)
2634
-
2635
- if array.release != NULL:
2636
- array.release(&array)
2637
-
2638
- if options != NULL:
2639
- CSLDestroy(options)
2640
- options = NULL
2641
-
2642
- if ogr_dataset != NULL:
2643
- ogr_close(ogr_dataset)
2644
-
2645
- if is_vsi:
2646
- delete_vsimem_file(path)
2647
-
2648
-
2649
- cdef get_arrow_extension_metadata(const ArrowSchema* schema):
2650
- """
2651
- Parse the metadata of the ArrowSchema and extract extension type
2652
- metadata (extension name and metadata).
2653
-
2654
- For the exact layout of the bytes, see
2655
- https://arrow.apache.org/docs/dev/format/CDataInterface.html#c.ArrowSchema.metadata
2656
- """
2657
- cdef const char *metadata = schema.metadata
2658
-
2659
- extension_name = None
2660
- extension_metadata = None
2661
-
2662
- if metadata == NULL:
2663
- return extension_name, extension_metadata
2664
-
2665
- # the number of metadata key/value pairs is stored
2666
- # as an int32 value in the first 4 bytes
2667
- n = int.from_bytes(metadata[:4], byteorder=sys.byteorder)
2668
- pos = 4
2669
-
2670
- for i in range(n):
2671
- # for each metadata key/value pair, the first 4 bytes is the byte length
2672
- # of the key as an int32, then follows the key (not null-terminated),
2673
- # and then the same for the value length and bytes
2674
- key_length = int.from_bytes(
2675
- metadata[pos:pos+4], byteorder=sys.byteorder, signed=True
2676
- )
2677
- pos += 4
2678
- key = metadata[pos:pos+key_length]
2679
- pos += key_length
2680
- value_length = int.from_bytes(
2681
- metadata[pos:pos+4], byteorder=sys.byteorder, signed=True
2682
- )
2683
- pos += 4
2684
- value = metadata[pos:pos+value_length]
2685
- pos += value_length
2686
-
2687
- if key == b"ARROW:extension:name":
2688
- extension_name = value
2689
- elif key == b"ARROW:extension:metadata":
2690
- extension_metadata = value
2691
-
2692
- if extension_name is not None and extension_metadata is not None:
2693
- break
2694
-
2695
- return extension_name, extension_metadata
2696
-
2697
-
2698
- cdef is_arrow_geometry_field(const ArrowSchema* schema):
2699
- name, _ = get_arrow_extension_metadata(schema)
2700
- if name is not None:
2701
- if name == b"geoarrow.wkb" or name == b"ogc.wkb":
2702
- return True
2703
-
2704
- # raise an error for other geoarrow types
2705
- if name.startswith(b"geoarrow."):
2706
- raise NotImplementedError(
2707
- f"Writing a geometry column of type {name.decode()} is not yet "
2708
- "supported. Only WKB is currently supported ('geoarrow.wkb' or "
2709
- "'ogc.wkb' types)."
2710
- )
2711
-
2712
- return False
2713
-
2714
-
2715
- cdef create_fields_from_arrow_schema(
2716
- OGRLayerH destLayer, const ArrowSchema* schema, char** options, str geometry_name
2717
- ):
2718
- """Create output fields using CreateFieldFromArrowSchema()"""
2719
-
2720
- IF CTE_GDAL_VERSION < (3, 8, 0):
2721
- raise RuntimeError("Need GDAL>=3.8 for Arrow write support")
2722
-
2723
- # The schema object is a struct type where each child is a column.
2724
- cdef ArrowSchema* child
2725
- for i in range(schema.n_children):
2726
- child = schema.children[i]
2727
-
2728
- if child == NULL:
2729
- raise RuntimeError("Received invalid Arrow schema (null child)")
2730
-
2731
- # Don't create property for geometry column
2732
- if get_string(child.name) == geometry_name or is_arrow_geometry_field(child):
2733
- continue
2734
-
2735
- if not OGR_L_CreateFieldFromArrowSchema(destLayer, child, options):
2736
- exc = exc_check()
2737
- gdal_msg = f" ({str(exc)})" if exc else ""
2738
- raise FieldError(
2739
- f"Error while creating field from Arrow for field {i} with name "
2740
- f"'{get_string(child.name)}' and type {get_string(child.format)}"
2741
- f"{gdal_msg}."
2742
- )