pyogrio 0.7.2__cp38-cp38-win_amd64.whl → 0.9.0__cp38-cp38-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyogrio might be problematic. Click here for more details.
- pyogrio/__init__.py +12 -7
- pyogrio/_compat.py +6 -1
- pyogrio/_err.c +855 -321
- pyogrio/_err.cp38-win_amd64.pyd +0 -0
- pyogrio/_err.pyx +7 -3
- pyogrio/_geometry.c +134 -75
- pyogrio/_geometry.cp38-win_amd64.pyd +0 -0
- pyogrio/_io.c +28947 -23126
- pyogrio/_io.cp38-win_amd64.pyd +0 -0
- pyogrio/_io.pyx +904 -242
- pyogrio/_ogr.c +1317 -1640
- pyogrio/_ogr.cp38-win_amd64.pyd +0 -0
- pyogrio/_ogr.pxd +69 -13
- pyogrio/_ogr.pyx +8 -24
- pyogrio/_version.py +3 -3
- pyogrio/_vsi.c +6815 -0
- pyogrio/_vsi.cp38-win_amd64.pyd +0 -0
- pyogrio/_vsi.pxd +4 -0
- pyogrio/_vsi.pyx +140 -0
- pyogrio/core.py +43 -44
- pyogrio/gdal_data/GDAL-targets-release.cmake +1 -1
- pyogrio/gdal_data/GDAL-targets.cmake +10 -6
- pyogrio/gdal_data/GDALConfigVersion.cmake +3 -3
- pyogrio/gdal_data/gdalinfo_output.schema.json +2 -0
- pyogrio/gdal_data/gdalvrt.xsd +163 -0
- pyogrio/gdal_data/ogrinfo_output.schema.json +12 -1
- pyogrio/gdal_data/vcpkg.spdx.json +23 -23
- pyogrio/gdal_data/vcpkg_abi_info.txt +29 -28
- pyogrio/geopandas.py +140 -34
- pyogrio/proj_data/ITRF2008 +2 -2
- pyogrio/proj_data/proj-config-version.cmake +2 -2
- pyogrio/proj_data/proj-config.cmake +2 -1
- pyogrio/proj_data/proj-targets-release.cmake +0 -1
- pyogrio/proj_data/proj-targets.cmake +10 -6
- pyogrio/proj_data/proj.db +0 -0
- pyogrio/proj_data/proj4-targets-release.cmake +0 -1
- pyogrio/proj_data/proj4-targets.cmake +10 -6
- pyogrio/proj_data/vcpkg.spdx.json +21 -43
- pyogrio/proj_data/vcpkg_abi_info.txt +16 -17
- pyogrio/raw.py +438 -116
- pyogrio/tests/conftest.py +75 -6
- pyogrio/tests/fixtures/poly_not_enough_points.shp.zip +0 -0
- pyogrio/tests/test_arrow.py +841 -7
- pyogrio/tests/test_core.py +99 -7
- pyogrio/tests/test_geopandas_io.py +827 -121
- pyogrio/tests/test_path.py +23 -3
- pyogrio/tests/test_raw_io.py +276 -50
- pyogrio/util.py +39 -19
- pyogrio-0.9.0.dist-info/DELVEWHEEL +2 -0
- {pyogrio-0.7.2.dist-info → pyogrio-0.9.0.dist-info}/METADATA +2 -2
- {pyogrio-0.7.2.dist-info → pyogrio-0.9.0.dist-info}/RECORD +74 -69
- {pyogrio-0.7.2.dist-info → pyogrio-0.9.0.dist-info}/WHEEL +1 -1
- pyogrio.libs/.load-order-pyogrio-0.9.0 +19 -0
- pyogrio.libs/Lerc-5e4d8cbeeabca06f95e2270792304dc3.dll +0 -0
- pyogrio.libs/{gdal-c3b1d8f66682071d0cd26d86e4182013.dll → gdal-b434963605a006e01c486c0df6dea4e0.dll} +0 -0
- pyogrio.libs/geos-f0622d0794b81c937a851b2e6fa9b712.dll +0 -0
- pyogrio.libs/geos_c-0e16bf70612fc3301d077b9d863a3fdb.dll +0 -0
- pyogrio.libs/{geotiff-e43cdab688866b59f8800cfcde836d16.dll → geotiff-772e7c705fb15ddf91b432adb4eb1f6c.dll} +0 -0
- pyogrio.libs/iconv-2-8fcc23ddc6f096c45871011b6e008b44.dll +0 -0
- pyogrio.libs/{jpeg62-567ab743ac805dfb57fe3867ba5788a4.dll → jpeg62-2f9b7af22d78338e8f0be0058503dc35.dll} +0 -0
- pyogrio.libs/json-c-e52a077545e4057de42beb4948289b41.dll +0 -0
- pyogrio.libs/libcurl-bc81cd8afe15b10c0821b181b6af8bd0.dll +0 -0
- pyogrio.libs/libexpat-fbe03ca8917dfda776562d4338b289b8.dll +0 -0
- pyogrio.libs/{liblzma-de7f4770d4e3715acd031ca93883f10c.dll → liblzma-6b36f24d54d3dd45f274a2aebef81085.dll} +0 -0
- pyogrio.libs/libpng16-13928571ad910705eae8d7dd8eef8b11.dll +0 -0
- pyogrio.libs/{msvcp140-83b6a1a2fa8b1735a358b2fe13cabe4e.dll → msvcp140-46db46e967c8db2cb7a20fc75872a57e.dll} +0 -0
- pyogrio.libs/proj-8a30239ef2dfc3b9dd2bb48e8abb330f.dll +0 -0
- pyogrio.libs/{qhull_r-99ae8a526357acc44b162cb4df2c3bb6.dll → qhull_r-c45abde5d0c92faf723cc2942138af77.dll} +0 -0
- pyogrio.libs/sqlite3-df30c3cf230727e23c43c40126a530f7.dll +0 -0
- pyogrio.libs/{tiff-7c2d4b204ec2db46c81f6a597895c2f7.dll → tiff-43630f30487a9015213475ae86ed3fa3.dll} +0 -0
- pyogrio.libs/vcruntime140_1-8f7e93381c4b3d1e411993c0bae01646.dll +0 -0
- pyogrio.libs/{zlib1-824de9299616f0908aeeb9441a084848.dll → zlib1-e1272810861a13dd8d6cff3beac47f17.dll} +0 -0
- pyogrio/tests/win32.py +0 -86
- pyogrio-0.7.2.dist-info/DELVEWHEEL +0 -2
- pyogrio.libs/.load-order-pyogrio-0.7.2 +0 -18
- pyogrio.libs/Lerc-d5afc4101deffe7de21241ccd4d562f6.dll +0 -0
- pyogrio.libs/geos-1c764a1384537a0ad2995e83d23e8642.dll +0 -0
- pyogrio.libs/geos_c-0d7dfdcee49efa8df585e2fb993157aa.dll +0 -0
- pyogrio.libs/json-c-36c91e30c4410d41c22b2010c31183e3.dll +0 -0
- pyogrio.libs/libcurl-ebcc8c18195071a90e59f818902e10c6.dll +0 -0
- pyogrio.libs/libexpat-345379c9c11632130d8c383cbacde1a6.dll +0 -0
- pyogrio.libs/libpng16-2c30e6846653c47ef2ff9d7dec3338ba.dll +0 -0
- pyogrio.libs/proj-98758c96a6cb682b5cec7e8dc5e29a50.dll +0 -0
- pyogrio.libs/sqlite3-327ed7b38bfd91fb4a17544960e055e9.dll +0 -0
- pyogrio.libs/vcruntime140_1-d1a1506707e0c0a26950a60c5f97ad99.dll +0 -0
- {pyogrio-0.7.2.dist-info → pyogrio-0.9.0.dist-info}/LICENSE +0 -0
- {pyogrio-0.7.2.dist-info → pyogrio-0.9.0.dist-info}/top_level.txt +0 -0
pyogrio/raw.py
CHANGED
|
@@ -1,24 +1,25 @@
|
|
|
1
|
+
from io import BytesIO
|
|
1
2
|
import warnings
|
|
2
3
|
|
|
3
4
|
from pyogrio._env import GDALEnv
|
|
4
|
-
from pyogrio._compat import HAS_ARROW_API
|
|
5
|
+
from pyogrio._compat import HAS_ARROW_API, HAS_ARROW_WRITE_API, HAS_PYARROW
|
|
5
6
|
from pyogrio.core import detect_write_driver
|
|
6
7
|
from pyogrio.errors import DataSourceError
|
|
7
8
|
from pyogrio.util import (
|
|
8
|
-
get_vsi_path,
|
|
9
|
-
vsi_path,
|
|
10
|
-
_preprocess_options_key_value,
|
|
11
9
|
_mask_to_wkb,
|
|
10
|
+
_preprocess_options_key_value,
|
|
11
|
+
get_vsi_path_or_buffer,
|
|
12
|
+
vsi_path,
|
|
12
13
|
)
|
|
13
14
|
|
|
14
15
|
with GDALEnv():
|
|
15
|
-
from pyogrio._io import ogr_open_arrow, ogr_read, ogr_write
|
|
16
|
+
from pyogrio._io import ogr_open_arrow, ogr_read, ogr_write, ogr_write_arrow
|
|
16
17
|
from pyogrio._ogr import (
|
|
18
|
+
_get_driver_metadata_item,
|
|
17
19
|
get_gdal_version,
|
|
18
20
|
get_gdal_version_string,
|
|
19
21
|
ogr_driver_supports_write,
|
|
20
|
-
|
|
21
|
-
_get_driver_metadata_item,
|
|
22
|
+
ogr_driver_supports_vsi,
|
|
22
23
|
)
|
|
23
24
|
|
|
24
25
|
|
|
@@ -67,13 +68,16 @@ def read(
|
|
|
67
68
|
of the layer in the data source. Defaults to first layer in data source.
|
|
68
69
|
encoding : str, optional (default: None)
|
|
69
70
|
If present, will be used as the encoding for reading string values from
|
|
70
|
-
the data source
|
|
71
|
-
|
|
71
|
+
the data source. By default will automatically try to detect the native
|
|
72
|
+
encoding and decode to ``UTF-8``.
|
|
72
73
|
columns : list-like, optional (default: all columns)
|
|
73
74
|
List of column names to import from the data source. Column names must
|
|
74
75
|
exactly match the names in the data source, and will be returned in
|
|
75
76
|
the order they occur in the data source. To avoid reading any columns,
|
|
76
|
-
pass an empty list-like.
|
|
77
|
+
pass an empty list-like. If combined with ``where`` parameter, must
|
|
78
|
+
include columns referenced in the ``where`` expression or the data may
|
|
79
|
+
not be correctly read; the data source may return empty results or
|
|
80
|
+
raise an exception (behavior varies by driver).
|
|
77
81
|
read_geometry : bool, optional (default: True)
|
|
78
82
|
If True, will read geometry into WKB. If False, geometry will be None.
|
|
79
83
|
force_2d : bool, optional (default: False)
|
|
@@ -186,35 +190,28 @@ def read(
|
|
|
186
190
|
https://www.gaia-gis.it/gaia-sins/spatialite-sql-latest.html
|
|
187
191
|
|
|
188
192
|
"""
|
|
189
|
-
path, buffer = get_vsi_path(path_or_buffer)
|
|
190
193
|
|
|
191
194
|
dataset_kwargs = _preprocess_options_key_value(kwargs) if kwargs else {}
|
|
192
195
|
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
)
|
|
213
|
-
finally:
|
|
214
|
-
if buffer is not None:
|
|
215
|
-
remove_virtual_file(path)
|
|
216
|
-
|
|
217
|
-
return result
|
|
196
|
+
return ogr_read(
|
|
197
|
+
get_vsi_path_or_buffer(path_or_buffer),
|
|
198
|
+
layer=layer,
|
|
199
|
+
encoding=encoding,
|
|
200
|
+
columns=columns,
|
|
201
|
+
read_geometry=read_geometry,
|
|
202
|
+
force_2d=force_2d,
|
|
203
|
+
skip_features=skip_features,
|
|
204
|
+
max_features=max_features or 0,
|
|
205
|
+
where=where,
|
|
206
|
+
bbox=bbox,
|
|
207
|
+
mask=_mask_to_wkb(mask),
|
|
208
|
+
fids=fids,
|
|
209
|
+
sql=sql,
|
|
210
|
+
sql_dialect=sql_dialect,
|
|
211
|
+
return_fids=return_fids,
|
|
212
|
+
dataset_kwargs=dataset_kwargs,
|
|
213
|
+
datetime_as_string=datetime_as_string,
|
|
214
|
+
)
|
|
218
215
|
|
|
219
216
|
|
|
220
217
|
def read_arrow(
|
|
@@ -256,8 +253,16 @@ def read_arrow(
|
|
|
256
253
|
"geometry_name": "<name of geometry column in arrow table>",
|
|
257
254
|
}
|
|
258
255
|
"""
|
|
256
|
+
if not HAS_PYARROW:
|
|
257
|
+
raise RuntimeError(
|
|
258
|
+
"pyarrow required to read using 'read_arrow'. You can use 'open_arrow' "
|
|
259
|
+
"to read data with an alternative Arrow implementation"
|
|
260
|
+
)
|
|
261
|
+
|
|
259
262
|
from pyarrow import Table
|
|
260
263
|
|
|
264
|
+
gdal_version = get_gdal_version()
|
|
265
|
+
|
|
261
266
|
if skip_features < 0:
|
|
262
267
|
raise ValueError("'skip_features' must be >= 0")
|
|
263
268
|
|
|
@@ -275,7 +280,7 @@ def read_arrow(
|
|
|
275
280
|
|
|
276
281
|
# handle skip_features internally within open_arrow if GDAL >= 3.8.0
|
|
277
282
|
gdal_skip_features = 0
|
|
278
|
-
if
|
|
283
|
+
if gdal_version >= (3, 8, 0):
|
|
279
284
|
gdal_skip_features = skip_features
|
|
280
285
|
skip_features = 0
|
|
281
286
|
|
|
@@ -295,6 +300,7 @@ def read_arrow(
|
|
|
295
300
|
return_fids=return_fids,
|
|
296
301
|
skip_features=gdal_skip_features,
|
|
297
302
|
batch_size=batch_size,
|
|
303
|
+
use_pyarrow=True,
|
|
298
304
|
**kwargs,
|
|
299
305
|
) as source:
|
|
300
306
|
meta, reader = source
|
|
@@ -349,17 +355,37 @@ def open_arrow(
|
|
|
349
355
|
sql_dialect=None,
|
|
350
356
|
return_fids=False,
|
|
351
357
|
batch_size=65_536,
|
|
358
|
+
use_pyarrow=False,
|
|
352
359
|
**kwargs,
|
|
353
360
|
):
|
|
354
361
|
"""
|
|
355
|
-
Open OGR data source as a stream of
|
|
362
|
+
Open OGR data source as a stream of Arrow record batches.
|
|
356
363
|
|
|
357
364
|
See docstring of `read` for parameters.
|
|
358
365
|
|
|
359
|
-
The
|
|
366
|
+
The returned object is reading from a stream provided by OGR and must not be
|
|
360
367
|
accessed after the OGR dataset has been closed, i.e. after the context manager has
|
|
361
368
|
been closed.
|
|
362
369
|
|
|
370
|
+
By default this functions returns a generic stream object implementing
|
|
371
|
+
the `Arrow PyCapsule Protocol`_ (i.e. having an ``__arrow_c_stream__``
|
|
372
|
+
method). This object can then be consumed by your Arrow implementation
|
|
373
|
+
of choice that supports this protocol.
|
|
374
|
+
Optionally, you can specify ``use_pyarrow=True`` to directly get the
|
|
375
|
+
stream as a `pyarrow.RecordBatchReader`.
|
|
376
|
+
|
|
377
|
+
.. _Arrow PyCapsule Protocol: https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
|
|
378
|
+
|
|
379
|
+
Other Parameters
|
|
380
|
+
----------------
|
|
381
|
+
batch_size : int (default: 65_536)
|
|
382
|
+
Maximum number of features to retrieve in a batch.
|
|
383
|
+
use_pyarrow : bool (default: False)
|
|
384
|
+
If True, return a pyarrow RecordBatchReader instead of a generic
|
|
385
|
+
ArrowStream object. In the default case, this stream object needs
|
|
386
|
+
to be passed to another library supporting the Arrow PyCapsule
|
|
387
|
+
Protocol to consume the stream of data.
|
|
388
|
+
|
|
363
389
|
Examples
|
|
364
390
|
--------
|
|
365
391
|
|
|
@@ -368,16 +394,29 @@ def open_arrow(
|
|
|
368
394
|
>>> import shapely
|
|
369
395
|
>>>
|
|
370
396
|
>>> with open_arrow(path) as source:
|
|
397
|
+
>>> meta, stream = source
|
|
398
|
+
>>> # wrap the arrow stream object in a pyarrow RecordBatchReader
|
|
399
|
+
>>> reader = pa.RecordBatchReader.from_stream(stream)
|
|
400
|
+
>>> for batch in reader:
|
|
401
|
+
>>> geometries = shapely.from_wkb(batch[meta["geometry_name"] or "wkb_geometry"])
|
|
402
|
+
|
|
403
|
+
The returned `stream` object needs to be consumed by a library implementing
|
|
404
|
+
the Arrow PyCapsule Protocol. In the above example, pyarrow is used through
|
|
405
|
+
its RecordBatchReader. For this case, you can also specify ``use_pyarrow=True``
|
|
406
|
+
to directly get this result as a short-cut:
|
|
407
|
+
|
|
408
|
+
>>> with open_arrow(path, use_pyarrow=True) as source:
|
|
371
409
|
>>> meta, reader = source
|
|
372
|
-
>>> for
|
|
373
|
-
>>> geometries = shapely.from_wkb(
|
|
410
|
+
>>> for batch in reader:
|
|
411
|
+
>>> geometries = shapely.from_wkb(batch[meta["geometry_name"] or "wkb_geometry"])
|
|
374
412
|
|
|
375
413
|
Returns
|
|
376
414
|
-------
|
|
377
|
-
(dict, pyarrow.
|
|
415
|
+
(dict, pyarrow.RecordBatchReader or ArrowStream)
|
|
378
416
|
|
|
379
417
|
Returns a tuple of meta information about the data source in a dict,
|
|
380
|
-
and a
|
|
418
|
+
and a data stream object (a generic ArrowStream object, or a pyarrow
|
|
419
|
+
RecordBatchReader if `use_pyarrow` is set to True).
|
|
381
420
|
|
|
382
421
|
Meta is: {
|
|
383
422
|
"crs": "<crs>",
|
|
@@ -388,35 +427,30 @@ def open_arrow(
|
|
|
388
427
|
}
|
|
389
428
|
"""
|
|
390
429
|
if not HAS_ARROW_API:
|
|
391
|
-
raise RuntimeError("
|
|
392
|
-
|
|
393
|
-
path, buffer = get_vsi_path(path_or_buffer)
|
|
430
|
+
raise RuntimeError("GDAL>= 3.6 required to read using arrow")
|
|
394
431
|
|
|
395
432
|
dataset_kwargs = _preprocess_options_key_value(kwargs) if kwargs else {}
|
|
396
433
|
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
finally:
|
|
418
|
-
if buffer is not None:
|
|
419
|
-
remove_virtual_file(path)
|
|
434
|
+
return ogr_open_arrow(
|
|
435
|
+
get_vsi_path_or_buffer(path_or_buffer),
|
|
436
|
+
layer=layer,
|
|
437
|
+
encoding=encoding,
|
|
438
|
+
columns=columns,
|
|
439
|
+
read_geometry=read_geometry,
|
|
440
|
+
force_2d=force_2d,
|
|
441
|
+
skip_features=skip_features,
|
|
442
|
+
max_features=max_features or 0,
|
|
443
|
+
where=where,
|
|
444
|
+
bbox=bbox,
|
|
445
|
+
mask=_mask_to_wkb(mask),
|
|
446
|
+
fids=fids,
|
|
447
|
+
sql=sql,
|
|
448
|
+
sql_dialect=sql_dialect,
|
|
449
|
+
return_fids=return_fids,
|
|
450
|
+
dataset_kwargs=dataset_kwargs,
|
|
451
|
+
batch_size=batch_size,
|
|
452
|
+
use_pyarrow=use_pyarrow,
|
|
453
|
+
)
|
|
420
454
|
|
|
421
455
|
|
|
422
456
|
def _parse_options_names(xml):
|
|
@@ -436,6 +470,112 @@ def _parse_options_names(xml):
|
|
|
436
470
|
return options
|
|
437
471
|
|
|
438
472
|
|
|
473
|
+
def _validate_metadata(dataset_metadata, layer_metadata, metadata):
|
|
474
|
+
""" """
|
|
475
|
+
if metadata is not None:
|
|
476
|
+
if layer_metadata is not None:
|
|
477
|
+
raise ValueError("Cannot pass both metadata and layer_metadata")
|
|
478
|
+
layer_metadata = metadata
|
|
479
|
+
|
|
480
|
+
# validate metadata types
|
|
481
|
+
for metadata in [dataset_metadata, layer_metadata]:
|
|
482
|
+
if metadata is not None:
|
|
483
|
+
for k, v in metadata.items():
|
|
484
|
+
if not isinstance(k, str):
|
|
485
|
+
raise ValueError(f"metadata key {k} must be a string")
|
|
486
|
+
|
|
487
|
+
if not isinstance(v, str):
|
|
488
|
+
raise ValueError(f"metadata value {v} must be a string")
|
|
489
|
+
return dataset_metadata, layer_metadata
|
|
490
|
+
|
|
491
|
+
|
|
492
|
+
def _preprocess_options_kwargs(driver, dataset_options, layer_options, kwargs):
|
|
493
|
+
"""
|
|
494
|
+
Preprocess kwargs and split in dataset and layer creation options.
|
|
495
|
+
"""
|
|
496
|
+
|
|
497
|
+
dataset_kwargs = _preprocess_options_key_value(dataset_options or {})
|
|
498
|
+
layer_kwargs = _preprocess_options_key_value(layer_options or {})
|
|
499
|
+
if kwargs:
|
|
500
|
+
kwargs = _preprocess_options_key_value(kwargs)
|
|
501
|
+
dataset_option_names = _parse_options_names(
|
|
502
|
+
_get_driver_metadata_item(driver, "DMD_CREATIONOPTIONLIST")
|
|
503
|
+
)
|
|
504
|
+
layer_option_names = _parse_options_names(
|
|
505
|
+
_get_driver_metadata_item(driver, "DS_LAYER_CREATIONOPTIONLIST")
|
|
506
|
+
)
|
|
507
|
+
for k, v in kwargs.items():
|
|
508
|
+
if k in dataset_option_names:
|
|
509
|
+
dataset_kwargs[k] = v
|
|
510
|
+
elif k in layer_option_names:
|
|
511
|
+
layer_kwargs[k] = v
|
|
512
|
+
else:
|
|
513
|
+
raise ValueError(f"unrecognized option '{k}' for driver '{driver}'")
|
|
514
|
+
|
|
515
|
+
return dataset_kwargs, layer_kwargs
|
|
516
|
+
|
|
517
|
+
|
|
518
|
+
def _get_write_path_driver(path, driver, append=False):
|
|
519
|
+
"""Validate and return path and driver
|
|
520
|
+
|
|
521
|
+
Parameters
|
|
522
|
+
----------
|
|
523
|
+
path : str or io.BytesIO
|
|
524
|
+
path to output file on writeable file system or an io.BytesIO object to
|
|
525
|
+
allow writing to memory
|
|
526
|
+
driver : str, optional (default: None)
|
|
527
|
+
The OGR format driver used to write the vector file. By default attempts
|
|
528
|
+
to infer driver from path. Must be provided to write to a file-like
|
|
529
|
+
object.
|
|
530
|
+
append : bool, optional (default: False)
|
|
531
|
+
True if path and driver is being tested for append support
|
|
532
|
+
|
|
533
|
+
Returns
|
|
534
|
+
-------
|
|
535
|
+
(path, driver)
|
|
536
|
+
"""
|
|
537
|
+
|
|
538
|
+
if isinstance(path, BytesIO):
|
|
539
|
+
if driver is None:
|
|
540
|
+
raise ValueError("driver must be provided to write to in-memory file")
|
|
541
|
+
|
|
542
|
+
# blacklist certain drivers known not to work in current memory implementation
|
|
543
|
+
# because they create multiple files
|
|
544
|
+
if driver in {"ESRI Shapefile", "OpenFileGDB"}:
|
|
545
|
+
raise ValueError(f"writing to in-memory file is not supported for {driver}")
|
|
546
|
+
|
|
547
|
+
# verify that driver supports VSI methods
|
|
548
|
+
if not ogr_driver_supports_vsi(driver):
|
|
549
|
+
raise DataSourceError(
|
|
550
|
+
f"{driver} does not support ability to write in-memory in GDAL "
|
|
551
|
+
f"{get_gdal_version_string()}"
|
|
552
|
+
)
|
|
553
|
+
|
|
554
|
+
if append:
|
|
555
|
+
raise NotImplementedError("append is not supported for in-memory files")
|
|
556
|
+
|
|
557
|
+
else:
|
|
558
|
+
path = vsi_path(str(path))
|
|
559
|
+
|
|
560
|
+
if driver is None:
|
|
561
|
+
driver = detect_write_driver(path)
|
|
562
|
+
|
|
563
|
+
# verify that driver supports writing
|
|
564
|
+
if not ogr_driver_supports_write(driver):
|
|
565
|
+
raise DataSourceError(
|
|
566
|
+
f"{driver} does not support write functionality in GDAL "
|
|
567
|
+
f"{get_gdal_version_string()}"
|
|
568
|
+
)
|
|
569
|
+
|
|
570
|
+
# prevent segfault from: https://github.com/OSGeo/gdal/issues/5739
|
|
571
|
+
if append and driver == "FlatGeobuf" and get_gdal_version() <= (3, 5, 0):
|
|
572
|
+
raise RuntimeError(
|
|
573
|
+
"append to FlatGeobuf is not supported for GDAL <= 3.5.0 due to segfault"
|
|
574
|
+
)
|
|
575
|
+
|
|
576
|
+
return path, driver
|
|
577
|
+
|
|
578
|
+
|
|
439
579
|
def write(
|
|
440
580
|
path,
|
|
441
581
|
geometry,
|
|
@@ -459,41 +599,94 @@ def write(
|
|
|
459
599
|
gdal_tz_offsets=None,
|
|
460
600
|
**kwargs,
|
|
461
601
|
):
|
|
602
|
+
"""Write geometry and field data to an OGR file format.
|
|
603
|
+
|
|
604
|
+
Parameters
|
|
605
|
+
----------
|
|
606
|
+
path : str or io.BytesIO
|
|
607
|
+
path to output file on writeable file system or an io.BytesIO object to
|
|
608
|
+
allow writing to memory
|
|
609
|
+
NOTE: support for writing to memory is limited to specific drivers.
|
|
610
|
+
geometry : ndarray of WKB encoded geometries or None
|
|
611
|
+
If None, geometries will not be written to output file
|
|
612
|
+
field_data : list-like of shape (num_fields, num_records)
|
|
613
|
+
contains one record per field to be written in same order as fields
|
|
614
|
+
fields : list-like
|
|
615
|
+
contains field names
|
|
616
|
+
field_mask : list-like of ndarrays or None, optional (default: None)
|
|
617
|
+
contains mask arrays indicating null values of the field at the same
|
|
618
|
+
position in the outer list, or None to indicate field does not have
|
|
619
|
+
a mask array
|
|
620
|
+
layer : str, optional (default: None)
|
|
621
|
+
layer name to create. If writing to memory and layer name is not
|
|
622
|
+
provided, it layer name will be set to a UUID4 value.
|
|
623
|
+
driver : string, optional (default: None)
|
|
624
|
+
The OGR format driver used to write the vector file. By default attempts
|
|
625
|
+
to infer driver from path. Must be provided to write to memory.
|
|
626
|
+
geometry_type : str, optional (default: None)
|
|
627
|
+
Possible values are: "Unknown", "Point", "LineString", "Polygon",
|
|
628
|
+
"MultiPoint", "MultiLineString", "MultiPolygon" or "GeometryCollection".
|
|
629
|
+
|
|
630
|
+
This parameter does not modify the geometry, but it will try to force
|
|
631
|
+
the layer type of the output file to this value. Use this parameter with
|
|
632
|
+
caution because using a wrong layer geometry type may result in errors
|
|
633
|
+
when writing the file, may be ignored by the driver, or may result in
|
|
634
|
+
invalid files.
|
|
635
|
+
crs : str, optional (default: None)
|
|
636
|
+
WKT-encoded CRS of the geometries to be written.
|
|
637
|
+
encoding : str, optional (default: None)
|
|
638
|
+
If present, will be used as the encoding for writing string values to
|
|
639
|
+
the file. Use with caution, only certain drivers support encodings
|
|
640
|
+
other than UTF-8.
|
|
641
|
+
promote_to_multi : bool, optional (default: None)
|
|
642
|
+
If True, will convert singular geometry types in the data to their
|
|
643
|
+
corresponding multi geometry type for writing. By default, will convert
|
|
644
|
+
mixed singular and multi geometry types to multi geometry types for
|
|
645
|
+
drivers that do not support mixed singular and multi geometry types. If
|
|
646
|
+
False, geometry types will not be promoted, which may result in errors
|
|
647
|
+
or invalid files when attempting to write mixed singular and multi
|
|
648
|
+
geometry types to drivers that do not support such combinations.
|
|
649
|
+
nan_as_null : bool, default True
|
|
650
|
+
For floating point columns (float32 / float64), whether NaN values are
|
|
651
|
+
written as "null" (missing value). Defaults to True because in pandas
|
|
652
|
+
NaNs are typically used as missing value. Note that when set to False,
|
|
653
|
+
behaviour is format specific: some formats don't support NaNs by
|
|
654
|
+
default (e.g. GeoJSON will skip this property) or might treat them as
|
|
655
|
+
null anyway (e.g. GeoPackage).
|
|
656
|
+
append : bool, optional (default: False)
|
|
657
|
+
If True, the data source specified by path already exists, and the
|
|
658
|
+
driver supports appending to an existing data source, will cause the
|
|
659
|
+
data to be appended to the existing records in the data source. Not
|
|
660
|
+
supported for writing to in-memory files.
|
|
661
|
+
NOTE: append support is limited to specific drivers and GDAL versions.
|
|
662
|
+
dataset_metadata : dict, optional (default: None)
|
|
663
|
+
Metadata to be stored at the dataset level in the output file; limited
|
|
664
|
+
to drivers that support writing metadata, such as GPKG, and silently
|
|
665
|
+
ignored otherwise. Keys and values must be strings.
|
|
666
|
+
layer_metadata : dict, optional (default: None)
|
|
667
|
+
Metadata to be stored at the layer level in the output file; limited to
|
|
668
|
+
drivers that support writing metadata, such as GPKG, and silently
|
|
669
|
+
ignored otherwise. Keys and values must be strings.
|
|
670
|
+
metadata : dict, optional (default: None)
|
|
671
|
+
alias of layer_metadata
|
|
672
|
+
dataset_options : dict, optional
|
|
673
|
+
Dataset creation options (format specific) passed to OGR. Specify as
|
|
674
|
+
a key-value dictionary.
|
|
675
|
+
layer_options : dict, optional
|
|
676
|
+
Layer creation options (format specific) passed to OGR. Specify as
|
|
677
|
+
a key-value dictionary.
|
|
678
|
+
gdal_tz_offsets : dict, optional (default: None)
|
|
679
|
+
Used to handle GDAL timezone offsets for each field contained in dict.
|
|
680
|
+
"""
|
|
462
681
|
# if dtypes is given, remove it from kwargs (dtypes is included in meta returned by
|
|
463
682
|
# read, and it is convenient to pass meta directly into write for round trip tests)
|
|
464
683
|
kwargs.pop("dtypes", None)
|
|
465
|
-
path = vsi_path(str(path))
|
|
466
684
|
|
|
467
|
-
|
|
468
|
-
driver = detect_write_driver(path)
|
|
685
|
+
path, driver = _get_write_path_driver(path, driver, append=append)
|
|
469
686
|
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
f"{driver} does not support write functionality in GDAL "
|
|
474
|
-
f"{get_gdal_version_string()}"
|
|
475
|
-
)
|
|
476
|
-
|
|
477
|
-
# prevent segfault from: https://github.com/OSGeo/gdal/issues/5739
|
|
478
|
-
if append and driver == "FlatGeobuf" and get_gdal_version() <= (3, 5, 0):
|
|
479
|
-
raise RuntimeError(
|
|
480
|
-
"append to FlatGeobuf is not supported for GDAL <= 3.5.0 due to segfault"
|
|
481
|
-
)
|
|
482
|
-
|
|
483
|
-
if metadata is not None:
|
|
484
|
-
if layer_metadata is not None:
|
|
485
|
-
raise ValueError("Cannot pass both metadata and layer_metadata")
|
|
486
|
-
layer_metadata = metadata
|
|
487
|
-
|
|
488
|
-
# validate metadata types
|
|
489
|
-
for metadata in [dataset_metadata, layer_metadata]:
|
|
490
|
-
if metadata is not None:
|
|
491
|
-
for k, v in metadata.items():
|
|
492
|
-
if not isinstance(k, str):
|
|
493
|
-
raise ValueError(f"metadata key {k} must be a string")
|
|
494
|
-
|
|
495
|
-
if not isinstance(v, str):
|
|
496
|
-
raise ValueError(f"metadata value {v} must be a string")
|
|
687
|
+
dataset_metadata, layer_metadata = _validate_metadata(
|
|
688
|
+
dataset_metadata, layer_metadata, metadata
|
|
689
|
+
)
|
|
497
690
|
|
|
498
691
|
if geometry is not None and promote_to_multi is None:
|
|
499
692
|
promote_to_multi = (
|
|
@@ -509,23 +702,9 @@ def write(
|
|
|
509
702
|
)
|
|
510
703
|
|
|
511
704
|
# preprocess kwargs and split in dataset and layer creation options
|
|
512
|
-
dataset_kwargs =
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
kwargs = _preprocess_options_key_value(kwargs)
|
|
516
|
-
dataset_option_names = _parse_options_names(
|
|
517
|
-
_get_driver_metadata_item(driver, "DMD_CREATIONOPTIONLIST")
|
|
518
|
-
)
|
|
519
|
-
layer_option_names = _parse_options_names(
|
|
520
|
-
_get_driver_metadata_item(driver, "DS_LAYER_CREATIONOPTIONLIST")
|
|
521
|
-
)
|
|
522
|
-
for k, v in kwargs.items():
|
|
523
|
-
if k in dataset_option_names:
|
|
524
|
-
dataset_kwargs[k] = v
|
|
525
|
-
elif k in layer_option_names:
|
|
526
|
-
layer_kwargs[k] = v
|
|
527
|
-
else:
|
|
528
|
-
raise ValueError(f"unrecognized option '{k}' for driver '{driver}'")
|
|
705
|
+
dataset_kwargs, layer_kwargs = _preprocess_options_kwargs(
|
|
706
|
+
driver, dataset_options, layer_options, kwargs
|
|
707
|
+
)
|
|
529
708
|
|
|
530
709
|
ogr_write(
|
|
531
710
|
path,
|
|
@@ -547,3 +726,146 @@ def write(
|
|
|
547
726
|
layer_kwargs=layer_kwargs,
|
|
548
727
|
gdal_tz_offsets=gdal_tz_offsets,
|
|
549
728
|
)
|
|
729
|
+
|
|
730
|
+
|
|
731
|
+
def write_arrow(
|
|
732
|
+
arrow_obj,
|
|
733
|
+
path,
|
|
734
|
+
layer=None,
|
|
735
|
+
driver=None,
|
|
736
|
+
geometry_name=None,
|
|
737
|
+
geometry_type=None,
|
|
738
|
+
crs=None,
|
|
739
|
+
encoding=None,
|
|
740
|
+
append=False,
|
|
741
|
+
dataset_metadata=None,
|
|
742
|
+
layer_metadata=None,
|
|
743
|
+
metadata=None,
|
|
744
|
+
dataset_options=None,
|
|
745
|
+
layer_options=None,
|
|
746
|
+
**kwargs,
|
|
747
|
+
):
|
|
748
|
+
"""
|
|
749
|
+
Write an Arrow-compatible data source to an OGR file format.
|
|
750
|
+
|
|
751
|
+
.. _Arrow PyCapsule Protocol: https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
|
|
752
|
+
|
|
753
|
+
Parameters
|
|
754
|
+
----------
|
|
755
|
+
arrow_obj
|
|
756
|
+
The Arrow data to write. This can be any Arrow-compatible tabular data
|
|
757
|
+
object that implements the `Arrow PyCapsule Protocol`_ (i.e. has an
|
|
758
|
+
``__arrow_c_stream__`` method), for example a pyarrow Table or
|
|
759
|
+
RecordBatchReader.
|
|
760
|
+
path : str or io.BytesIO
|
|
761
|
+
path to output file on writeable file system or an io.BytesIO object to
|
|
762
|
+
allow writing to memory
|
|
763
|
+
NOTE: support for writing to memory is limited to specific drivers.
|
|
764
|
+
layer : str, optional (default: None)
|
|
765
|
+
layer name to create. If writing to memory and layer name is not
|
|
766
|
+
provided, it layer name will be set to a UUID4 value.
|
|
767
|
+
driver : string, optional (default: None)
|
|
768
|
+
The OGR format driver used to write the vector file. By default attempts
|
|
769
|
+
to infer driver from path. Must be provided to write to memory.
|
|
770
|
+
geometry_name : str, optional (default: None)
|
|
771
|
+
The name of the column in the input data that will be written as the
|
|
772
|
+
geometry field. Will be inferred from the input data if the geometry
|
|
773
|
+
column is annotated as an "geoarrow.wkb" or "ogc.wkb" extension type.
|
|
774
|
+
Otherwise needs to be specified explicitly.
|
|
775
|
+
geometry_type : str
|
|
776
|
+
The geometry type of the written layer. Currently, this needs to be
|
|
777
|
+
specified explicitly when creating a new layer with geometries.
|
|
778
|
+
Possible values are: "Unknown", "Point", "LineString", "Polygon",
|
|
779
|
+
"MultiPoint", "MultiLineString", "MultiPolygon" or "GeometryCollection".
|
|
780
|
+
|
|
781
|
+
This parameter does not modify the geometry, but it will try to force the layer
|
|
782
|
+
type of the output file to this value. Use this parameter with caution because
|
|
783
|
+
using a wrong layer geometry type may result in errors when writing the
|
|
784
|
+
file, may be ignored by the driver, or may result in invalid files.
|
|
785
|
+
encoding : str, optional (default: None)
|
|
786
|
+
Only used for the .dbf file of ESRI Shapefiles. If not specified,
|
|
787
|
+
uses the default locale.
|
|
788
|
+
append : bool, optional (default: False)
|
|
789
|
+
If True, the data source specified by path already exists, and the
|
|
790
|
+
driver supports appending to an existing data source, will cause the
|
|
791
|
+
data to be appended to the existing records in the data source. Not
|
|
792
|
+
supported for writing to in-memory files.
|
|
793
|
+
NOTE: append support is limited to specific drivers and GDAL versions.
|
|
794
|
+
dataset_metadata : dict, optional (default: None)
|
|
795
|
+
Metadata to be stored at the dataset level in the output file; limited
|
|
796
|
+
to drivers that support writing metadata, such as GPKG, and silently
|
|
797
|
+
ignored otherwise. Keys and values must be strings.
|
|
798
|
+
layer_metadata : dict, optional (default: None)
|
|
799
|
+
Metadata to be stored at the layer level in the output file; limited to
|
|
800
|
+
drivers that support writing metadata, such as GPKG, and silently
|
|
801
|
+
ignored otherwise. Keys and values must be strings.
|
|
802
|
+
metadata : dict, optional (default: None)
|
|
803
|
+
alias of layer_metadata
|
|
804
|
+
dataset_options : dict, optional
|
|
805
|
+
Dataset creation options (format specific) passed to OGR. Specify as
|
|
806
|
+
a key-value dictionary.
|
|
807
|
+
layer_options : dict, optional
|
|
808
|
+
Layer creation options (format specific) passed to OGR. Specify as
|
|
809
|
+
a key-value dictionary.
|
|
810
|
+
**kwargs
|
|
811
|
+
Additional driver-specific dataset or layer creation options passed
|
|
812
|
+
to OGR. pyogrio will attempt to automatically pass those keywords
|
|
813
|
+
either as dataset or as layer creation option based on the known
|
|
814
|
+
options for the specific driver. Alternatively, you can use the
|
|
815
|
+
explicit `dataset_options` or `layer_options` keywords to manually
|
|
816
|
+
do this (for example if an option exists as both dataset and layer
|
|
817
|
+
option).
|
|
818
|
+
|
|
819
|
+
"""
|
|
820
|
+
if not HAS_ARROW_WRITE_API:
|
|
821
|
+
raise RuntimeError("GDAL>=3.8 required to write using arrow")
|
|
822
|
+
|
|
823
|
+
if not hasattr(arrow_obj, "__arrow_c_stream__"):
|
|
824
|
+
raise ValueError(
|
|
825
|
+
"The provided data is not recognized as Arrow data. The object "
|
|
826
|
+
"should implement the Arrow PyCapsule Protocol (i.e. have a "
|
|
827
|
+
"'__arrow_c_stream__' method)."
|
|
828
|
+
)
|
|
829
|
+
|
|
830
|
+
path, driver = _get_write_path_driver(path, driver, append=append)
|
|
831
|
+
|
|
832
|
+
if "promote_to_multi" in kwargs:
|
|
833
|
+
raise ValueError(
|
|
834
|
+
"The 'promote_to_multi' option is not supported when writing using Arrow"
|
|
835
|
+
)
|
|
836
|
+
|
|
837
|
+
if geometry_name is not None:
|
|
838
|
+
if geometry_type is None:
|
|
839
|
+
raise ValueError("'geometry_type' keyword is required")
|
|
840
|
+
if crs is None:
|
|
841
|
+
# TODO: does GDAL infer CRS automatically from geometry metadata?
|
|
842
|
+
warnings.warn(
|
|
843
|
+
"'crs' was not provided. The output dataset will not have "
|
|
844
|
+
"projection information defined and may not be usable in other "
|
|
845
|
+
"systems."
|
|
846
|
+
)
|
|
847
|
+
|
|
848
|
+
dataset_metadata, layer_metadata = _validate_metadata(
|
|
849
|
+
dataset_metadata, layer_metadata, metadata
|
|
850
|
+
)
|
|
851
|
+
|
|
852
|
+
# preprocess kwargs and split in dataset and layer creation options
|
|
853
|
+
dataset_kwargs, layer_kwargs = _preprocess_options_kwargs(
|
|
854
|
+
driver, dataset_options, layer_options, kwargs
|
|
855
|
+
)
|
|
856
|
+
|
|
857
|
+
ogr_write_arrow(
|
|
858
|
+
path,
|
|
859
|
+
layer=layer,
|
|
860
|
+
driver=driver,
|
|
861
|
+
arrow_obj=arrow_obj,
|
|
862
|
+
geometry_type=geometry_type,
|
|
863
|
+
geometry_name=geometry_name,
|
|
864
|
+
crs=crs,
|
|
865
|
+
encoding=encoding,
|
|
866
|
+
append=append,
|
|
867
|
+
dataset_metadata=dataset_metadata,
|
|
868
|
+
layer_metadata=layer_metadata,
|
|
869
|
+
dataset_kwargs=dataset_kwargs,
|
|
870
|
+
layer_kwargs=layer_kwargs,
|
|
871
|
+
)
|