pyogrio 0.7.2__cp39-cp39-win_amd64.whl → 0.9.0__cp39-cp39-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyogrio might be problematic. Click here for more details.

Files changed (85) hide show
  1. pyogrio/__init__.py +12 -7
  2. pyogrio/_compat.py +6 -1
  3. pyogrio/_err.c +855 -321
  4. pyogrio/_err.cp39-win_amd64.pyd +0 -0
  5. pyogrio/_err.pyx +7 -3
  6. pyogrio/_geometry.c +134 -75
  7. pyogrio/_geometry.cp39-win_amd64.pyd +0 -0
  8. pyogrio/_io.c +28462 -22609
  9. pyogrio/_io.cp39-win_amd64.pyd +0 -0
  10. pyogrio/_io.pyx +904 -242
  11. pyogrio/_ogr.c +1317 -1640
  12. pyogrio/_ogr.cp39-win_amd64.pyd +0 -0
  13. pyogrio/_ogr.pxd +69 -13
  14. pyogrio/_ogr.pyx +8 -24
  15. pyogrio/_version.py +3 -3
  16. pyogrio/_vsi.c +6815 -0
  17. pyogrio/_vsi.cp39-win_amd64.pyd +0 -0
  18. pyogrio/_vsi.pxd +4 -0
  19. pyogrio/_vsi.pyx +140 -0
  20. pyogrio/core.py +43 -44
  21. pyogrio/gdal_data/GDAL-targets-release.cmake +1 -1
  22. pyogrio/gdal_data/GDAL-targets.cmake +10 -6
  23. pyogrio/gdal_data/GDALConfigVersion.cmake +3 -3
  24. pyogrio/gdal_data/gdalinfo_output.schema.json +2 -0
  25. pyogrio/gdal_data/gdalvrt.xsd +163 -0
  26. pyogrio/gdal_data/ogrinfo_output.schema.json +12 -1
  27. pyogrio/gdal_data/vcpkg.spdx.json +23 -23
  28. pyogrio/gdal_data/vcpkg_abi_info.txt +29 -28
  29. pyogrio/geopandas.py +140 -34
  30. pyogrio/proj_data/ITRF2008 +2 -2
  31. pyogrio/proj_data/proj-config-version.cmake +2 -2
  32. pyogrio/proj_data/proj-config.cmake +2 -1
  33. pyogrio/proj_data/proj-targets-release.cmake +0 -1
  34. pyogrio/proj_data/proj-targets.cmake +10 -6
  35. pyogrio/proj_data/proj.db +0 -0
  36. pyogrio/proj_data/proj4-targets-release.cmake +0 -1
  37. pyogrio/proj_data/proj4-targets.cmake +10 -6
  38. pyogrio/proj_data/vcpkg.spdx.json +21 -43
  39. pyogrio/proj_data/vcpkg_abi_info.txt +16 -17
  40. pyogrio/raw.py +438 -116
  41. pyogrio/tests/conftest.py +75 -6
  42. pyogrio/tests/fixtures/poly_not_enough_points.shp.zip +0 -0
  43. pyogrio/tests/test_arrow.py +841 -7
  44. pyogrio/tests/test_core.py +99 -7
  45. pyogrio/tests/test_geopandas_io.py +827 -121
  46. pyogrio/tests/test_path.py +23 -3
  47. pyogrio/tests/test_raw_io.py +276 -50
  48. pyogrio/util.py +39 -19
  49. pyogrio-0.9.0.dist-info/DELVEWHEEL +2 -0
  50. {pyogrio-0.7.2.dist-info → pyogrio-0.9.0.dist-info}/METADATA +2 -2
  51. {pyogrio-0.7.2.dist-info → pyogrio-0.9.0.dist-info}/RECORD +73 -68
  52. {pyogrio-0.7.2.dist-info → pyogrio-0.9.0.dist-info}/WHEEL +1 -1
  53. pyogrio.libs/.load-order-pyogrio-0.9.0 +18 -0
  54. pyogrio.libs/Lerc-5e4d8cbeeabca06f95e2270792304dc3.dll +0 -0
  55. pyogrio.libs/{gdal-c3b1d8f66682071d0cd26d86e4182013.dll → gdal-b434963605a006e01c486c0df6dea4e0.dll} +0 -0
  56. pyogrio.libs/geos-f0622d0794b81c937a851b2e6fa9b712.dll +0 -0
  57. pyogrio.libs/geos_c-0e16bf70612fc3301d077b9d863a3fdb.dll +0 -0
  58. pyogrio.libs/{geotiff-e43cdab688866b59f8800cfcde836d16.dll → geotiff-772e7c705fb15ddf91b432adb4eb1f6c.dll} +0 -0
  59. pyogrio.libs/iconv-2-8fcc23ddc6f096c45871011b6e008b44.dll +0 -0
  60. pyogrio.libs/{jpeg62-567ab743ac805dfb57fe3867ba5788a4.dll → jpeg62-2f9b7af22d78338e8f0be0058503dc35.dll} +0 -0
  61. pyogrio.libs/json-c-e52a077545e4057de42beb4948289b41.dll +0 -0
  62. pyogrio.libs/libcurl-bc81cd8afe15b10c0821b181b6af8bd0.dll +0 -0
  63. pyogrio.libs/libexpat-fbe03ca8917dfda776562d4338b289b8.dll +0 -0
  64. pyogrio.libs/{liblzma-de7f4770d4e3715acd031ca93883f10c.dll → liblzma-6b36f24d54d3dd45f274a2aebef81085.dll} +0 -0
  65. pyogrio.libs/libpng16-13928571ad910705eae8d7dd8eef8b11.dll +0 -0
  66. pyogrio.libs/{msvcp140-83b6a1a2fa8b1735a358b2fe13cabe4e.dll → msvcp140-46db46e967c8db2cb7a20fc75872a57e.dll} +0 -0
  67. pyogrio.libs/proj-8a30239ef2dfc3b9dd2bb48e8abb330f.dll +0 -0
  68. pyogrio.libs/{qhull_r-99ae8a526357acc44b162cb4df2c3bb6.dll → qhull_r-c45abde5d0c92faf723cc2942138af77.dll} +0 -0
  69. pyogrio.libs/sqlite3-df30c3cf230727e23c43c40126a530f7.dll +0 -0
  70. pyogrio.libs/{tiff-7c2d4b204ec2db46c81f6a597895c2f7.dll → tiff-43630f30487a9015213475ae86ed3fa3.dll} +0 -0
  71. pyogrio.libs/{zlib1-824de9299616f0908aeeb9441a084848.dll → zlib1-e1272810861a13dd8d6cff3beac47f17.dll} +0 -0
  72. pyogrio/tests/win32.py +0 -86
  73. pyogrio-0.7.2.dist-info/DELVEWHEEL +0 -2
  74. pyogrio.libs/.load-order-pyogrio-0.7.2 +0 -17
  75. pyogrio.libs/Lerc-d5afc4101deffe7de21241ccd4d562f6.dll +0 -0
  76. pyogrio.libs/geos-1c764a1384537a0ad2995e83d23e8642.dll +0 -0
  77. pyogrio.libs/geos_c-0d7dfdcee49efa8df585e2fb993157aa.dll +0 -0
  78. pyogrio.libs/json-c-36c91e30c4410d41c22b2010c31183e3.dll +0 -0
  79. pyogrio.libs/libcurl-ebcc8c18195071a90e59f818902e10c6.dll +0 -0
  80. pyogrio.libs/libexpat-345379c9c11632130d8c383cbacde1a6.dll +0 -0
  81. pyogrio.libs/libpng16-2c30e6846653c47ef2ff9d7dec3338ba.dll +0 -0
  82. pyogrio.libs/proj-98758c96a6cb682b5cec7e8dc5e29a50.dll +0 -0
  83. pyogrio.libs/sqlite3-327ed7b38bfd91fb4a17544960e055e9.dll +0 -0
  84. {pyogrio-0.7.2.dist-info → pyogrio-0.9.0.dist-info}/LICENSE +0 -0
  85. {pyogrio-0.7.2.dist-info → pyogrio-0.9.0.dist-info}/top_level.txt +0 -0
pyogrio/raw.py CHANGED
@@ -1,24 +1,25 @@
1
+ from io import BytesIO
1
2
  import warnings
2
3
 
3
4
  from pyogrio._env import GDALEnv
4
- from pyogrio._compat import HAS_ARROW_API
5
+ from pyogrio._compat import HAS_ARROW_API, HAS_ARROW_WRITE_API, HAS_PYARROW
5
6
  from pyogrio.core import detect_write_driver
6
7
  from pyogrio.errors import DataSourceError
7
8
  from pyogrio.util import (
8
- get_vsi_path,
9
- vsi_path,
10
- _preprocess_options_key_value,
11
9
  _mask_to_wkb,
10
+ _preprocess_options_key_value,
11
+ get_vsi_path_or_buffer,
12
+ vsi_path,
12
13
  )
13
14
 
14
15
  with GDALEnv():
15
- from pyogrio._io import ogr_open_arrow, ogr_read, ogr_write
16
+ from pyogrio._io import ogr_open_arrow, ogr_read, ogr_write, ogr_write_arrow
16
17
  from pyogrio._ogr import (
18
+ _get_driver_metadata_item,
17
19
  get_gdal_version,
18
20
  get_gdal_version_string,
19
21
  ogr_driver_supports_write,
20
- remove_virtual_file,
21
- _get_driver_metadata_item,
22
+ ogr_driver_supports_vsi,
22
23
  )
23
24
 
24
25
 
@@ -67,13 +68,16 @@ def read(
67
68
  of the layer in the data source. Defaults to first layer in data source.
68
69
  encoding : str, optional (default: None)
69
70
  If present, will be used as the encoding for reading string values from
70
- the data source, unless encoding can be inferred directly from the data
71
- source.
71
+ the data source. By default will automatically try to detect the native
72
+ encoding and decode to ``UTF-8``.
72
73
  columns : list-like, optional (default: all columns)
73
74
  List of column names to import from the data source. Column names must
74
75
  exactly match the names in the data source, and will be returned in
75
76
  the order they occur in the data source. To avoid reading any columns,
76
- pass an empty list-like.
77
+ pass an empty list-like. If combined with ``where`` parameter, must
78
+ include columns referenced in the ``where`` expression or the data may
79
+ not be correctly read; the data source may return empty results or
80
+ raise an exception (behavior varies by driver).
77
81
  read_geometry : bool, optional (default: True)
78
82
  If True, will read geometry into WKB. If False, geometry will be None.
79
83
  force_2d : bool, optional (default: False)
@@ -186,35 +190,28 @@ def read(
186
190
  https://www.gaia-gis.it/gaia-sins/spatialite-sql-latest.html
187
191
 
188
192
  """
189
- path, buffer = get_vsi_path(path_or_buffer)
190
193
 
191
194
  dataset_kwargs = _preprocess_options_key_value(kwargs) if kwargs else {}
192
195
 
193
- try:
194
- result = ogr_read(
195
- path,
196
- layer=layer,
197
- encoding=encoding,
198
- columns=columns,
199
- read_geometry=read_geometry,
200
- force_2d=force_2d,
201
- skip_features=skip_features,
202
- max_features=max_features or 0,
203
- where=where,
204
- bbox=bbox,
205
- mask=_mask_to_wkb(mask),
206
- fids=fids,
207
- sql=sql,
208
- sql_dialect=sql_dialect,
209
- return_fids=return_fids,
210
- dataset_kwargs=dataset_kwargs,
211
- datetime_as_string=datetime_as_string,
212
- )
213
- finally:
214
- if buffer is not None:
215
- remove_virtual_file(path)
216
-
217
- return result
196
+ return ogr_read(
197
+ get_vsi_path_or_buffer(path_or_buffer),
198
+ layer=layer,
199
+ encoding=encoding,
200
+ columns=columns,
201
+ read_geometry=read_geometry,
202
+ force_2d=force_2d,
203
+ skip_features=skip_features,
204
+ max_features=max_features or 0,
205
+ where=where,
206
+ bbox=bbox,
207
+ mask=_mask_to_wkb(mask),
208
+ fids=fids,
209
+ sql=sql,
210
+ sql_dialect=sql_dialect,
211
+ return_fids=return_fids,
212
+ dataset_kwargs=dataset_kwargs,
213
+ datetime_as_string=datetime_as_string,
214
+ )
218
215
 
219
216
 
220
217
  def read_arrow(
@@ -256,8 +253,16 @@ def read_arrow(
256
253
  "geometry_name": "<name of geometry column in arrow table>",
257
254
  }
258
255
  """
256
+ if not HAS_PYARROW:
257
+ raise RuntimeError(
258
+ "pyarrow required to read using 'read_arrow'. You can use 'open_arrow' "
259
+ "to read data with an alternative Arrow implementation"
260
+ )
261
+
259
262
  from pyarrow import Table
260
263
 
264
+ gdal_version = get_gdal_version()
265
+
261
266
  if skip_features < 0:
262
267
  raise ValueError("'skip_features' must be >= 0")
263
268
 
@@ -275,7 +280,7 @@ def read_arrow(
275
280
 
276
281
  # handle skip_features internally within open_arrow if GDAL >= 3.8.0
277
282
  gdal_skip_features = 0
278
- if get_gdal_version() >= (3, 8, 0):
283
+ if gdal_version >= (3, 8, 0):
279
284
  gdal_skip_features = skip_features
280
285
  skip_features = 0
281
286
 
@@ -295,6 +300,7 @@ def read_arrow(
295
300
  return_fids=return_fids,
296
301
  skip_features=gdal_skip_features,
297
302
  batch_size=batch_size,
303
+ use_pyarrow=True,
298
304
  **kwargs,
299
305
  ) as source:
300
306
  meta, reader = source
@@ -349,17 +355,37 @@ def open_arrow(
349
355
  sql_dialect=None,
350
356
  return_fids=False,
351
357
  batch_size=65_536,
358
+ use_pyarrow=False,
352
359
  **kwargs,
353
360
  ):
354
361
  """
355
- Open OGR data source as a stream of pyarrow record batches.
362
+ Open OGR data source as a stream of Arrow record batches.
356
363
 
357
364
  See docstring of `read` for parameters.
358
365
 
359
- The RecordBatchStreamReader is reading from a stream provided by OGR and must not be
366
+ The returned object is reading from a stream provided by OGR and must not be
360
367
  accessed after the OGR dataset has been closed, i.e. after the context manager has
361
368
  been closed.
362
369
 
370
+ By default this functions returns a generic stream object implementing
371
+ the `Arrow PyCapsule Protocol`_ (i.e. having an ``__arrow_c_stream__``
372
+ method). This object can then be consumed by your Arrow implementation
373
+ of choice that supports this protocol.
374
+ Optionally, you can specify ``use_pyarrow=True`` to directly get the
375
+ stream as a `pyarrow.RecordBatchReader`.
376
+
377
+ .. _Arrow PyCapsule Protocol: https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
378
+
379
+ Other Parameters
380
+ ----------------
381
+ batch_size : int (default: 65_536)
382
+ Maximum number of features to retrieve in a batch.
383
+ use_pyarrow : bool (default: False)
384
+ If True, return a pyarrow RecordBatchReader instead of a generic
385
+ ArrowStream object. In the default case, this stream object needs
386
+ to be passed to another library supporting the Arrow PyCapsule
387
+ Protocol to consume the stream of data.
388
+
363
389
  Examples
364
390
  --------
365
391
 
@@ -368,16 +394,29 @@ def open_arrow(
368
394
  >>> import shapely
369
395
  >>>
370
396
  >>> with open_arrow(path) as source:
397
+ >>> meta, stream = source
398
+ >>> # wrap the arrow stream object in a pyarrow RecordBatchReader
399
+ >>> reader = pa.RecordBatchReader.from_stream(stream)
400
+ >>> for batch in reader:
401
+ >>> geometries = shapely.from_wkb(batch[meta["geometry_name"] or "wkb_geometry"])
402
+
403
+ The returned `stream` object needs to be consumed by a library implementing
404
+ the Arrow PyCapsule Protocol. In the above example, pyarrow is used through
405
+ its RecordBatchReader. For this case, you can also specify ``use_pyarrow=True``
406
+ to directly get this result as a short-cut:
407
+
408
+ >>> with open_arrow(path, use_pyarrow=True) as source:
371
409
  >>> meta, reader = source
372
- >>> for table in reader:
373
- >>> geometries = shapely.from_wkb(table[meta["geometry_name"]])
410
+ >>> for batch in reader:
411
+ >>> geometries = shapely.from_wkb(batch[meta["geometry_name"] or "wkb_geometry"])
374
412
 
375
413
  Returns
376
414
  -------
377
- (dict, pyarrow.RecordBatchStreamReader)
415
+ (dict, pyarrow.RecordBatchReader or ArrowStream)
378
416
 
379
417
  Returns a tuple of meta information about the data source in a dict,
380
- and a pyarrow RecordBatchStreamReader with data.
418
+ and a data stream object (a generic ArrowStream object, or a pyarrow
419
+ RecordBatchReader if `use_pyarrow` is set to True).
381
420
 
382
421
  Meta is: {
383
422
  "crs": "<crs>",
@@ -388,35 +427,30 @@ def open_arrow(
388
427
  }
389
428
  """
390
429
  if not HAS_ARROW_API:
391
- raise RuntimeError("pyarrow and GDAL>= 3.6 required to read using arrow")
392
-
393
- path, buffer = get_vsi_path(path_or_buffer)
430
+ raise RuntimeError("GDAL>= 3.6 required to read using arrow")
394
431
 
395
432
  dataset_kwargs = _preprocess_options_key_value(kwargs) if kwargs else {}
396
433
 
397
- try:
398
- return ogr_open_arrow(
399
- path,
400
- layer=layer,
401
- encoding=encoding,
402
- columns=columns,
403
- read_geometry=read_geometry,
404
- force_2d=force_2d,
405
- skip_features=skip_features,
406
- max_features=max_features or 0,
407
- where=where,
408
- bbox=bbox,
409
- mask=_mask_to_wkb(mask),
410
- fids=fids,
411
- sql=sql,
412
- sql_dialect=sql_dialect,
413
- return_fids=return_fids,
414
- dataset_kwargs=dataset_kwargs,
415
- batch_size=batch_size,
416
- )
417
- finally:
418
- if buffer is not None:
419
- remove_virtual_file(path)
434
+ return ogr_open_arrow(
435
+ get_vsi_path_or_buffer(path_or_buffer),
436
+ layer=layer,
437
+ encoding=encoding,
438
+ columns=columns,
439
+ read_geometry=read_geometry,
440
+ force_2d=force_2d,
441
+ skip_features=skip_features,
442
+ max_features=max_features or 0,
443
+ where=where,
444
+ bbox=bbox,
445
+ mask=_mask_to_wkb(mask),
446
+ fids=fids,
447
+ sql=sql,
448
+ sql_dialect=sql_dialect,
449
+ return_fids=return_fids,
450
+ dataset_kwargs=dataset_kwargs,
451
+ batch_size=batch_size,
452
+ use_pyarrow=use_pyarrow,
453
+ )
420
454
 
421
455
 
422
456
  def _parse_options_names(xml):
@@ -436,6 +470,112 @@ def _parse_options_names(xml):
436
470
  return options
437
471
 
438
472
 
473
+ def _validate_metadata(dataset_metadata, layer_metadata, metadata):
474
+ """ """
475
+ if metadata is not None:
476
+ if layer_metadata is not None:
477
+ raise ValueError("Cannot pass both metadata and layer_metadata")
478
+ layer_metadata = metadata
479
+
480
+ # validate metadata types
481
+ for metadata in [dataset_metadata, layer_metadata]:
482
+ if metadata is not None:
483
+ for k, v in metadata.items():
484
+ if not isinstance(k, str):
485
+ raise ValueError(f"metadata key {k} must be a string")
486
+
487
+ if not isinstance(v, str):
488
+ raise ValueError(f"metadata value {v} must be a string")
489
+ return dataset_metadata, layer_metadata
490
+
491
+
492
+ def _preprocess_options_kwargs(driver, dataset_options, layer_options, kwargs):
493
+ """
494
+ Preprocess kwargs and split in dataset and layer creation options.
495
+ """
496
+
497
+ dataset_kwargs = _preprocess_options_key_value(dataset_options or {})
498
+ layer_kwargs = _preprocess_options_key_value(layer_options or {})
499
+ if kwargs:
500
+ kwargs = _preprocess_options_key_value(kwargs)
501
+ dataset_option_names = _parse_options_names(
502
+ _get_driver_metadata_item(driver, "DMD_CREATIONOPTIONLIST")
503
+ )
504
+ layer_option_names = _parse_options_names(
505
+ _get_driver_metadata_item(driver, "DS_LAYER_CREATIONOPTIONLIST")
506
+ )
507
+ for k, v in kwargs.items():
508
+ if k in dataset_option_names:
509
+ dataset_kwargs[k] = v
510
+ elif k in layer_option_names:
511
+ layer_kwargs[k] = v
512
+ else:
513
+ raise ValueError(f"unrecognized option '{k}' for driver '{driver}'")
514
+
515
+ return dataset_kwargs, layer_kwargs
516
+
517
+
518
+ def _get_write_path_driver(path, driver, append=False):
519
+ """Validate and return path and driver
520
+
521
+ Parameters
522
+ ----------
523
+ path : str or io.BytesIO
524
+ path to output file on writeable file system or an io.BytesIO object to
525
+ allow writing to memory
526
+ driver : str, optional (default: None)
527
+ The OGR format driver used to write the vector file. By default attempts
528
+ to infer driver from path. Must be provided to write to a file-like
529
+ object.
530
+ append : bool, optional (default: False)
531
+ True if path and driver is being tested for append support
532
+
533
+ Returns
534
+ -------
535
+ (path, driver)
536
+ """
537
+
538
+ if isinstance(path, BytesIO):
539
+ if driver is None:
540
+ raise ValueError("driver must be provided to write to in-memory file")
541
+
542
+ # blacklist certain drivers known not to work in current memory implementation
543
+ # because they create multiple files
544
+ if driver in {"ESRI Shapefile", "OpenFileGDB"}:
545
+ raise ValueError(f"writing to in-memory file is not supported for {driver}")
546
+
547
+ # verify that driver supports VSI methods
548
+ if not ogr_driver_supports_vsi(driver):
549
+ raise DataSourceError(
550
+ f"{driver} does not support ability to write in-memory in GDAL "
551
+ f"{get_gdal_version_string()}"
552
+ )
553
+
554
+ if append:
555
+ raise NotImplementedError("append is not supported for in-memory files")
556
+
557
+ else:
558
+ path = vsi_path(str(path))
559
+
560
+ if driver is None:
561
+ driver = detect_write_driver(path)
562
+
563
+ # verify that driver supports writing
564
+ if not ogr_driver_supports_write(driver):
565
+ raise DataSourceError(
566
+ f"{driver} does not support write functionality in GDAL "
567
+ f"{get_gdal_version_string()}"
568
+ )
569
+
570
+ # prevent segfault from: https://github.com/OSGeo/gdal/issues/5739
571
+ if append and driver == "FlatGeobuf" and get_gdal_version() <= (3, 5, 0):
572
+ raise RuntimeError(
573
+ "append to FlatGeobuf is not supported for GDAL <= 3.5.0 due to segfault"
574
+ )
575
+
576
+ return path, driver
577
+
578
+
439
579
  def write(
440
580
  path,
441
581
  geometry,
@@ -459,41 +599,94 @@ def write(
459
599
  gdal_tz_offsets=None,
460
600
  **kwargs,
461
601
  ):
602
+ """Write geometry and field data to an OGR file format.
603
+
604
+ Parameters
605
+ ----------
606
+ path : str or io.BytesIO
607
+ path to output file on writeable file system or an io.BytesIO object to
608
+ allow writing to memory
609
+ NOTE: support for writing to memory is limited to specific drivers.
610
+ geometry : ndarray of WKB encoded geometries or None
611
+ If None, geometries will not be written to output file
612
+ field_data : list-like of shape (num_fields, num_records)
613
+ contains one record per field to be written in same order as fields
614
+ fields : list-like
615
+ contains field names
616
+ field_mask : list-like of ndarrays or None, optional (default: None)
617
+ contains mask arrays indicating null values of the field at the same
618
+ position in the outer list, or None to indicate field does not have
619
+ a mask array
620
+ layer : str, optional (default: None)
621
+ layer name to create. If writing to memory and layer name is not
622
+ provided, it layer name will be set to a UUID4 value.
623
+ driver : string, optional (default: None)
624
+ The OGR format driver used to write the vector file. By default attempts
625
+ to infer driver from path. Must be provided to write to memory.
626
+ geometry_type : str, optional (default: None)
627
+ Possible values are: "Unknown", "Point", "LineString", "Polygon",
628
+ "MultiPoint", "MultiLineString", "MultiPolygon" or "GeometryCollection".
629
+
630
+ This parameter does not modify the geometry, but it will try to force
631
+ the layer type of the output file to this value. Use this parameter with
632
+ caution because using a wrong layer geometry type may result in errors
633
+ when writing the file, may be ignored by the driver, or may result in
634
+ invalid files.
635
+ crs : str, optional (default: None)
636
+ WKT-encoded CRS of the geometries to be written.
637
+ encoding : str, optional (default: None)
638
+ If present, will be used as the encoding for writing string values to
639
+ the file. Use with caution, only certain drivers support encodings
640
+ other than UTF-8.
641
+ promote_to_multi : bool, optional (default: None)
642
+ If True, will convert singular geometry types in the data to their
643
+ corresponding multi geometry type for writing. By default, will convert
644
+ mixed singular and multi geometry types to multi geometry types for
645
+ drivers that do not support mixed singular and multi geometry types. If
646
+ False, geometry types will not be promoted, which may result in errors
647
+ or invalid files when attempting to write mixed singular and multi
648
+ geometry types to drivers that do not support such combinations.
649
+ nan_as_null : bool, default True
650
+ For floating point columns (float32 / float64), whether NaN values are
651
+ written as "null" (missing value). Defaults to True because in pandas
652
+ NaNs are typically used as missing value. Note that when set to False,
653
+ behaviour is format specific: some formats don't support NaNs by
654
+ default (e.g. GeoJSON will skip this property) or might treat them as
655
+ null anyway (e.g. GeoPackage).
656
+ append : bool, optional (default: False)
657
+ If True, the data source specified by path already exists, and the
658
+ driver supports appending to an existing data source, will cause the
659
+ data to be appended to the existing records in the data source. Not
660
+ supported for writing to in-memory files.
661
+ NOTE: append support is limited to specific drivers and GDAL versions.
662
+ dataset_metadata : dict, optional (default: None)
663
+ Metadata to be stored at the dataset level in the output file; limited
664
+ to drivers that support writing metadata, such as GPKG, and silently
665
+ ignored otherwise. Keys and values must be strings.
666
+ layer_metadata : dict, optional (default: None)
667
+ Metadata to be stored at the layer level in the output file; limited to
668
+ drivers that support writing metadata, such as GPKG, and silently
669
+ ignored otherwise. Keys and values must be strings.
670
+ metadata : dict, optional (default: None)
671
+ alias of layer_metadata
672
+ dataset_options : dict, optional
673
+ Dataset creation options (format specific) passed to OGR. Specify as
674
+ a key-value dictionary.
675
+ layer_options : dict, optional
676
+ Layer creation options (format specific) passed to OGR. Specify as
677
+ a key-value dictionary.
678
+ gdal_tz_offsets : dict, optional (default: None)
679
+ Used to handle GDAL timezone offsets for each field contained in dict.
680
+ """
462
681
  # if dtypes is given, remove it from kwargs (dtypes is included in meta returned by
463
682
  # read, and it is convenient to pass meta directly into write for round trip tests)
464
683
  kwargs.pop("dtypes", None)
465
- path = vsi_path(str(path))
466
684
 
467
- if driver is None:
468
- driver = detect_write_driver(path)
685
+ path, driver = _get_write_path_driver(path, driver, append=append)
469
686
 
470
- # verify that driver supports writing
471
- if not ogr_driver_supports_write(driver):
472
- raise DataSourceError(
473
- f"{driver} does not support write functionality in GDAL "
474
- f"{get_gdal_version_string()}"
475
- )
476
-
477
- # prevent segfault from: https://github.com/OSGeo/gdal/issues/5739
478
- if append and driver == "FlatGeobuf" and get_gdal_version() <= (3, 5, 0):
479
- raise RuntimeError(
480
- "append to FlatGeobuf is not supported for GDAL <= 3.5.0 due to segfault"
481
- )
482
-
483
- if metadata is not None:
484
- if layer_metadata is not None:
485
- raise ValueError("Cannot pass both metadata and layer_metadata")
486
- layer_metadata = metadata
487
-
488
- # validate metadata types
489
- for metadata in [dataset_metadata, layer_metadata]:
490
- if metadata is not None:
491
- for k, v in metadata.items():
492
- if not isinstance(k, str):
493
- raise ValueError(f"metadata key {k} must be a string")
494
-
495
- if not isinstance(v, str):
496
- raise ValueError(f"metadata value {v} must be a string")
687
+ dataset_metadata, layer_metadata = _validate_metadata(
688
+ dataset_metadata, layer_metadata, metadata
689
+ )
497
690
 
498
691
  if geometry is not None and promote_to_multi is None:
499
692
  promote_to_multi = (
@@ -509,23 +702,9 @@ def write(
509
702
  )
510
703
 
511
704
  # preprocess kwargs and split in dataset and layer creation options
512
- dataset_kwargs = _preprocess_options_key_value(dataset_options or {})
513
- layer_kwargs = _preprocess_options_key_value(layer_options or {})
514
- if kwargs:
515
- kwargs = _preprocess_options_key_value(kwargs)
516
- dataset_option_names = _parse_options_names(
517
- _get_driver_metadata_item(driver, "DMD_CREATIONOPTIONLIST")
518
- )
519
- layer_option_names = _parse_options_names(
520
- _get_driver_metadata_item(driver, "DS_LAYER_CREATIONOPTIONLIST")
521
- )
522
- for k, v in kwargs.items():
523
- if k in dataset_option_names:
524
- dataset_kwargs[k] = v
525
- elif k in layer_option_names:
526
- layer_kwargs[k] = v
527
- else:
528
- raise ValueError(f"unrecognized option '{k}' for driver '{driver}'")
705
+ dataset_kwargs, layer_kwargs = _preprocess_options_kwargs(
706
+ driver, dataset_options, layer_options, kwargs
707
+ )
529
708
 
530
709
  ogr_write(
531
710
  path,
@@ -547,3 +726,146 @@ def write(
547
726
  layer_kwargs=layer_kwargs,
548
727
  gdal_tz_offsets=gdal_tz_offsets,
549
728
  )
729
+
730
+
731
+ def write_arrow(
732
+ arrow_obj,
733
+ path,
734
+ layer=None,
735
+ driver=None,
736
+ geometry_name=None,
737
+ geometry_type=None,
738
+ crs=None,
739
+ encoding=None,
740
+ append=False,
741
+ dataset_metadata=None,
742
+ layer_metadata=None,
743
+ metadata=None,
744
+ dataset_options=None,
745
+ layer_options=None,
746
+ **kwargs,
747
+ ):
748
+ """
749
+ Write an Arrow-compatible data source to an OGR file format.
750
+
751
+ .. _Arrow PyCapsule Protocol: https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
752
+
753
+ Parameters
754
+ ----------
755
+ arrow_obj
756
+ The Arrow data to write. This can be any Arrow-compatible tabular data
757
+ object that implements the `Arrow PyCapsule Protocol`_ (i.e. has an
758
+ ``__arrow_c_stream__`` method), for example a pyarrow Table or
759
+ RecordBatchReader.
760
+ path : str or io.BytesIO
761
+ path to output file on writeable file system or an io.BytesIO object to
762
+ allow writing to memory
763
+ NOTE: support for writing to memory is limited to specific drivers.
764
+ layer : str, optional (default: None)
765
+ layer name to create. If writing to memory and layer name is not
766
+ provided, it layer name will be set to a UUID4 value.
767
+ driver : string, optional (default: None)
768
+ The OGR format driver used to write the vector file. By default attempts
769
+ to infer driver from path. Must be provided to write to memory.
770
+ geometry_name : str, optional (default: None)
771
+ The name of the column in the input data that will be written as the
772
+ geometry field. Will be inferred from the input data if the geometry
773
+ column is annotated as an "geoarrow.wkb" or "ogc.wkb" extension type.
774
+ Otherwise needs to be specified explicitly.
775
+ geometry_type : str
776
+ The geometry type of the written layer. Currently, this needs to be
777
+ specified explicitly when creating a new layer with geometries.
778
+ Possible values are: "Unknown", "Point", "LineString", "Polygon",
779
+ "MultiPoint", "MultiLineString", "MultiPolygon" or "GeometryCollection".
780
+
781
+ This parameter does not modify the geometry, but it will try to force the layer
782
+ type of the output file to this value. Use this parameter with caution because
783
+ using a wrong layer geometry type may result in errors when writing the
784
+ file, may be ignored by the driver, or may result in invalid files.
785
+ encoding : str, optional (default: None)
786
+ Only used for the .dbf file of ESRI Shapefiles. If not specified,
787
+ uses the default locale.
788
+ append : bool, optional (default: False)
789
+ If True, the data source specified by path already exists, and the
790
+ driver supports appending to an existing data source, will cause the
791
+ data to be appended to the existing records in the data source. Not
792
+ supported for writing to in-memory files.
793
+ NOTE: append support is limited to specific drivers and GDAL versions.
794
+ dataset_metadata : dict, optional (default: None)
795
+ Metadata to be stored at the dataset level in the output file; limited
796
+ to drivers that support writing metadata, such as GPKG, and silently
797
+ ignored otherwise. Keys and values must be strings.
798
+ layer_metadata : dict, optional (default: None)
799
+ Metadata to be stored at the layer level in the output file; limited to
800
+ drivers that support writing metadata, such as GPKG, and silently
801
+ ignored otherwise. Keys and values must be strings.
802
+ metadata : dict, optional (default: None)
803
+ alias of layer_metadata
804
+ dataset_options : dict, optional
805
+ Dataset creation options (format specific) passed to OGR. Specify as
806
+ a key-value dictionary.
807
+ layer_options : dict, optional
808
+ Layer creation options (format specific) passed to OGR. Specify as
809
+ a key-value dictionary.
810
+ **kwargs
811
+ Additional driver-specific dataset or layer creation options passed
812
+ to OGR. pyogrio will attempt to automatically pass those keywords
813
+ either as dataset or as layer creation option based on the known
814
+ options for the specific driver. Alternatively, you can use the
815
+ explicit `dataset_options` or `layer_options` keywords to manually
816
+ do this (for example if an option exists as both dataset and layer
817
+ option).
818
+
819
+ """
820
+ if not HAS_ARROW_WRITE_API:
821
+ raise RuntimeError("GDAL>=3.8 required to write using arrow")
822
+
823
+ if not hasattr(arrow_obj, "__arrow_c_stream__"):
824
+ raise ValueError(
825
+ "The provided data is not recognized as Arrow data. The object "
826
+ "should implement the Arrow PyCapsule Protocol (i.e. have a "
827
+ "'__arrow_c_stream__' method)."
828
+ )
829
+
830
+ path, driver = _get_write_path_driver(path, driver, append=append)
831
+
832
+ if "promote_to_multi" in kwargs:
833
+ raise ValueError(
834
+ "The 'promote_to_multi' option is not supported when writing using Arrow"
835
+ )
836
+
837
+ if geometry_name is not None:
838
+ if geometry_type is None:
839
+ raise ValueError("'geometry_type' keyword is required")
840
+ if crs is None:
841
+ # TODO: does GDAL infer CRS automatically from geometry metadata?
842
+ warnings.warn(
843
+ "'crs' was not provided. The output dataset will not have "
844
+ "projection information defined and may not be usable in other "
845
+ "systems."
846
+ )
847
+
848
+ dataset_metadata, layer_metadata = _validate_metadata(
849
+ dataset_metadata, layer_metadata, metadata
850
+ )
851
+
852
+ # preprocess kwargs and split in dataset and layer creation options
853
+ dataset_kwargs, layer_kwargs = _preprocess_options_kwargs(
854
+ driver, dataset_options, layer_options, kwargs
855
+ )
856
+
857
+ ogr_write_arrow(
858
+ path,
859
+ layer=layer,
860
+ driver=driver,
861
+ arrow_obj=arrow_obj,
862
+ geometry_type=geometry_type,
863
+ geometry_name=geometry_name,
864
+ crs=crs,
865
+ encoding=encoding,
866
+ append=append,
867
+ dataset_metadata=dataset_metadata,
868
+ layer_metadata=layer_metadata,
869
+ dataset_kwargs=dataset_kwargs,
870
+ layer_kwargs=layer_kwargs,
871
+ )