rashdf 0.5.0__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rashdf/base.py CHANGED
@@ -19,6 +19,7 @@ class RasHdf(h5py.File):
19
19
  Additional keyword arguments to pass to h5py.File
20
20
  """
21
21
  super().__init__(name, mode="r", **kwargs)
22
+ self._loc = name
22
23
 
23
24
  @classmethod
24
25
  def open_uri(
@@ -49,7 +50,9 @@ class RasHdf(h5py.File):
49
50
  import fsspec
50
51
 
51
52
  remote_file = fsspec.open(uri, mode="rb", **fsspec_kwargs)
52
- return cls(remote_file.open(), **h5py_kwargs)
53
+ result = cls(remote_file.open(), **h5py_kwargs)
54
+ result._loc = uri
55
+ return result
53
56
 
54
57
  def get_attrs(self, attr_path: str) -> Dict:
55
58
  """Convert attributes from a HEC-RAS HDF file into a Python dictionary for a given attribute path.
rashdf/plan.py CHANGED
@@ -5,6 +5,7 @@ from .utils import (
5
5
  df_datetimes_to_str,
6
6
  ras_timesteps_to_datetimes,
7
7
  parse_ras_datetime_ms,
8
+ deprecated,
8
9
  )
9
10
 
10
11
  from geopandas import GeoDataFrame
@@ -585,7 +586,8 @@ class RasPlanHdf(RasGeomHdf):
585
586
  Returns
586
587
  -------
587
588
  DataFrame
588
- A DataFrame with columns 'mesh_name', 'cell_id' or 'face_id', a value column, and a time column.
589
+ A DataFrame with columns 'mesh_name', 'cell_id' or 'face_id', a value column,
590
+ and a time column if the value corresponds to a specific time.
589
591
  """
590
592
  methods_with_times = {
591
593
  SummaryOutputVar.MAXIMUM_WATER_SURFACE: self.mesh_max_ws,
@@ -604,6 +606,76 @@ class RasPlanHdf(RasGeomHdf):
604
606
  df = other_methods[var]()
605
607
  return df
606
608
 
609
+ def _mesh_summary_outputs_df(
610
+ self,
611
+ cells_or_faces: str,
612
+ output_vars: Optional[List[SummaryOutputVar]] = None,
613
+ round_to: str = "0.1 s",
614
+ ) -> DataFrame:
615
+ if cells_or_faces == "cells":
616
+ feature_id_field = "cell_id"
617
+ elif cells_or_faces == "faces":
618
+ feature_id_field = "face_id"
619
+ else:
620
+ raise ValueError('cells_or_faces must be either "cells" or "faces".')
621
+ if output_vars is None:
622
+ summary_output_vars = self._summary_output_vars(
623
+ cells_or_faces=cells_or_faces
624
+ )
625
+ elif isinstance(output_vars, list):
626
+ summary_output_vars = []
627
+ for var in output_vars:
628
+ if not isinstance(var, SummaryOutputVar):
629
+ var = SummaryOutputVar(var)
630
+ summary_output_vars.append(var)
631
+ else:
632
+ raise ValueError(
633
+ "include_output must be a boolean or a list of SummaryOutputVar values."
634
+ )
635
+ df = self.mesh_summary_output(summary_output_vars[0], round_to=round_to)
636
+ for var in summary_output_vars[1:]:
637
+ df_var = self.mesh_summary_output(var, round_to=round_to)
638
+ df = df.merge(df_var, on=["mesh_name", feature_id_field], how="left")
639
+ return df
640
+
641
+ def mesh_cells_summary_output(self, round_to: str = "0.1 s") -> DataFrame:
642
+ """
643
+ Return a DataFrame with summary output data for each mesh cell in the model.
644
+
645
+ Parameters
646
+ ----------
647
+ round_to : str, optional
648
+ The time unit to round the datetimes to. Default: "0.1 s" (seconds).
649
+ See Pandas documentation for valid time units:
650
+ https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html
651
+
652
+ Returns
653
+ -------
654
+ DataFrame
655
+ A DataFrame with columns 'mesh_name', 'cell_id', and columns for each
656
+ summary output variable.
657
+ """
658
+ return self._mesh_summary_outputs_df("cells", round_to=round_to)
659
+
660
+ def mesh_faces_summary_output(self, round_to: str = "0.1 s") -> DataFrame:
661
+ """
662
+ Return a DataFrame with summary output data for each mesh face in the model.
663
+
664
+ Parameters
665
+ ----------
666
+ round_to : str, optional
667
+ The time unit to round the datetimes to. Default: "0.1 s" (seconds).
668
+ See Pandas documentation for valid time units:
669
+ https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html
670
+
671
+ Returns
672
+ -------
673
+ DataFrame
674
+ A DataFrame with columns 'mesh_name', 'face_id', and columns for each
675
+ summary output variable.
676
+ """
677
+ return self._mesh_summary_outputs_df("faces", round_to=round_to)
678
+
607
679
  def _summary_output_vars(
608
680
  self, cells_or_faces: Optional[str] = None
609
681
  ) -> List[SummaryOutputVar]:
@@ -812,7 +884,7 @@ class RasPlanHdf(RasGeomHdf):
812
884
  mesh_name: str,
813
885
  var: TimeSeriesOutputVar,
814
886
  ) -> Tuple[np.ndarray, str]:
815
- path = f"{self.UNSTEADY_TIME_SERIES_PATH}/2D Flow Areas/{mesh_name}/{var.value}"
887
+ path = self._mesh_timeseries_output_path(mesh_name, var.value)
816
888
  group = self.get(path)
817
889
  try:
818
890
  import dask.array as da
@@ -830,6 +902,7 @@ class RasPlanHdf(RasGeomHdf):
830
902
  self,
831
903
  mesh_name: str,
832
904
  var: Union[str, TimeSeriesOutputVar],
905
+ truncate: bool = True,
833
906
  ) -> xr.DataArray:
834
907
  """Return the time series output data for a given variable.
835
908
 
@@ -839,6 +912,8 @@ class RasPlanHdf(RasGeomHdf):
839
912
  The name of the 2D flow area mesh.
840
913
  var : TimeSeriesOutputVar
841
914
  The time series output variable to retrieve.
915
+ truncate : bool, optional
916
+ If True, truncate the number of cells to the listed cell count.
842
917
 
843
918
  Returns
844
919
  -------
@@ -856,7 +931,10 @@ class RasPlanHdf(RasGeomHdf):
856
931
  values, units = self._mesh_timeseries_output_values_units(mesh_name, var)
857
932
  if var in TIME_SERIES_OUTPUT_VARS_CELLS:
858
933
  cell_count = mesh_names_counts[mesh_name]
859
- values = values[:, :cell_count]
934
+ if truncate:
935
+ values = values[:, :cell_count]
936
+ else:
937
+ values = values[:, :]
860
938
  id_coord = "cell_id"
861
939
  elif var in TIME_SERIES_OUTPUT_VARS_FACES:
862
940
  id_coord = "face_id"
@@ -874,24 +952,28 @@ class RasPlanHdf(RasGeomHdf):
874
952
  "mesh_name": mesh_name,
875
953
  "variable": var.value,
876
954
  "units": units,
955
+ "hdf_path": self._mesh_timeseries_output_path(mesh_name, var.value),
877
956
  },
878
957
  )
879
958
  return da
880
959
 
960
+ def _mesh_timeseries_output_path(self, mesh_name: str, var_name: str) -> str:
961
+ return f"{self.UNSTEADY_TIME_SERIES_PATH}/2D Flow Areas/{mesh_name}/{var_name}"
962
+
881
963
  def _mesh_timeseries_outputs(
882
- self, mesh_name: str, vars: List[TimeSeriesOutputVar]
964
+ self, mesh_name: str, vars: List[TimeSeriesOutputVar], truncate: bool = True
883
965
  ) -> xr.Dataset:
884
966
  datasets = {}
885
967
  for var in vars:
886
968
  var_path = f"{self.UNSTEADY_TIME_SERIES_PATH}/2D Flow Areas/{mesh_name}/{var.value}"
887
969
  if self.get(var_path) is None:
888
970
  continue
889
- da = self.mesh_timeseries_output(mesh_name, var)
971
+ da = self.mesh_timeseries_output(mesh_name, var, truncate=truncate)
890
972
  datasets[var.value] = da
891
973
  ds = xr.Dataset(datasets, attrs={"mesh_name": mesh_name})
892
974
  return ds
893
975
 
894
- def mesh_timeseries_output_cells(self, mesh_name: str) -> xr.Dataset:
976
+ def mesh_cells_timeseries_output(self, mesh_name: str) -> xr.Dataset:
895
977
  """Return the time series output data for cells in a 2D flow area mesh.
896
978
 
897
979
  Parameters
@@ -907,7 +989,25 @@ class RasPlanHdf(RasGeomHdf):
907
989
  ds = self._mesh_timeseries_outputs(mesh_name, TIME_SERIES_OUTPUT_VARS_CELLS)
908
990
  return ds
909
991
 
910
- def mesh_timeseries_output_faces(self, mesh_name: str) -> xr.Dataset:
992
+ @deprecated
993
+ def mesh_timeseries_output_cells(self, mesh_name: str) -> xr.Dataset:
994
+ """Return the time series output data for cells in a 2D flow area mesh.
995
+
996
+ Deprecated: use mesh_cells_timeseries_output instead.
997
+
998
+ Parameters
999
+ ----------
1000
+ mesh_name : str
1001
+ The name of the 2D flow area mesh.
1002
+
1003
+ Returns
1004
+ -------
1005
+ xr.Dataset
1006
+ An xarray Dataset with DataArrays for each time series output variable.
1007
+ """
1008
+ return self.mesh_cells_timeseries_output(mesh_name)
1009
+
1010
+ def mesh_faces_timeseries_output(self, mesh_name: str) -> xr.Dataset:
911
1011
  """Return the time series output data for faces in a 2D flow area mesh.
912
1012
 
913
1013
  Parameters
@@ -923,6 +1023,24 @@ class RasPlanHdf(RasGeomHdf):
923
1023
  ds = self._mesh_timeseries_outputs(mesh_name, TIME_SERIES_OUTPUT_VARS_FACES)
924
1024
  return ds
925
1025
 
1026
+ @deprecated
1027
+ def mesh_timeseries_output_faces(self, mesh_name: str) -> xr.Dataset:
1028
+ """Return the time series output data for faces in a 2D flow area mesh.
1029
+
1030
+ Deprecated: use mesh_faces_timeseries_output instead.
1031
+
1032
+ Parameters
1033
+ ----------
1034
+ mesh_name : str
1035
+ The name of the 2D flow area mesh.
1036
+
1037
+ Returns
1038
+ -------
1039
+ xr.Dataset
1040
+ An xarray Dataset with DataArrays for each time series output variable.
1041
+ """
1042
+ return self.mesh_faces_timeseries_output(mesh_name)
1043
+
926
1044
  def reference_timeseries_output(self, reftype: str = "lines") -> xr.Dataset:
927
1045
  """Return timeseries output data for reference lines or points from a HEC-RAS HDF plan file.
928
1046
 
@@ -984,7 +1102,7 @@ class RasPlanHdf(RasGeomHdf):
984
1102
  f"{abbrev}_name": (f"{abbrev}_id", names),
985
1103
  "mesh_name": (f"{abbrev}_id", mesh_areas),
986
1104
  },
987
- attrs={"Units": units},
1105
+ attrs={"units": units, "hdf_path": f"{output_path}/{var}"},
988
1106
  )
989
1107
  das[var] = da
990
1108
  return xr.Dataset(das)
@@ -1317,3 +1435,113 @@ class RasPlanHdf(RasGeomHdf):
1317
1435
  A DataFrame containing the velocity inside the cross sections
1318
1436
  """
1319
1437
  return self.steady_profile_xs_output(XsSteadyOutputVar.VELOCITY_TOTAL)
1438
+
1439
+ def _zmeta(self, ds: xr.Dataset) -> Dict:
1440
+ """Given a xarray Dataset, return kerchunk-style zarr reference metadata."""
1441
+ from kerchunk.hdf import SingleHdf5ToZarr
1442
+ import zarr
1443
+ import base64
1444
+
1445
+ encoding = {}
1446
+ chunk_meta = {}
1447
+
1448
+ # Loop through each variable / DataArray in the Dataset
1449
+ for var, da in ds.data_vars.items():
1450
+ # The "hdf_path" attribute is the path within the HDF5 file
1451
+ # that the DataArray was read from. This is attribute is inserted
1452
+ # by rashdf (see "mesh_timeseries_output" method).
1453
+ hdf_ds_path = da.attrs["hdf_path"]
1454
+ hdf_ds = self.get(hdf_ds_path)
1455
+ if hdf_ds is None:
1456
+ # If we don't know where in the HDF5 the data came from, we
1457
+ # have to skip it, because we won't be able to generate the
1458
+ # correct metadata for it.
1459
+ continue
1460
+ # Get the filters and storage info for the HDF5 dataset.
1461
+ # Calling private methods from Kerchunk here because
1462
+ # there's not a nice public API for this part. This is hacky
1463
+ # and a bit risky because these private methods are more likely
1464
+ # to change, but short of reimplementing these functions ourselves
1465
+ # it's the best way to get the metadata we need.
1466
+ # TODO: raise an issue in Kerchunk to expose this functionality?
1467
+ filters = SingleHdf5ToZarr._decode_filters(None, hdf_ds)
1468
+ encoding[var] = {"compressor": None, "filters": filters}
1469
+ storage_info = SingleHdf5ToZarr._storage_info(None, hdf_ds)
1470
+ # Generate chunk metadata for the DataArray
1471
+ for key, value in storage_info.items():
1472
+ chunk_number = ".".join([str(k) for k in key])
1473
+ chunk_key = f"{var}/{chunk_number}"
1474
+ chunk_meta[chunk_key] = [str(self._loc), value["offset"], value["size"]]
1475
+ # "Write" the Dataset to a temporary in-memory zarr store (which
1476
+ # is the same a Python dictionary)
1477
+ zarr_tmp = zarr.MemoryStore()
1478
+ # Use compute=False here because we don't _actually_ want to write
1479
+ # the data to the zarr store, we just want to generate the metadata.
1480
+ ds.to_zarr(zarr_tmp, mode="w", compute=False, encoding=encoding)
1481
+ zarr_meta = {"version": 1, "refs": {}}
1482
+ # Loop through the in-memory Zarr store, decode the data to strings,
1483
+ # and add it to the final metadata dictionary.
1484
+ for key, value in zarr_tmp.items():
1485
+ try:
1486
+ value_str = value.decode("utf-8")
1487
+ except UnicodeDecodeError:
1488
+ value_str = "base64:" + base64.b64encode(value).decode("utf-8")
1489
+ zarr_meta["refs"][key] = value_str
1490
+ zarr_meta["refs"].update(chunk_meta)
1491
+ return zarr_meta
1492
+
1493
+ def zmeta_mesh_cells_timeseries_output(self, mesh_name: str) -> Dict:
1494
+ """Return kerchunk-style zarr reference metadata.
1495
+
1496
+ Requires the 'zarr' and 'kerchunk' packages.
1497
+
1498
+ Returns
1499
+ -------
1500
+ dict
1501
+ Dictionary of kerchunk-style zarr reference metadata.
1502
+ """
1503
+ ds = self._mesh_timeseries_outputs(
1504
+ mesh_name, TIME_SERIES_OUTPUT_VARS_CELLS, truncate=False
1505
+ )
1506
+ return self._zmeta(ds)
1507
+
1508
+ def zmeta_mesh_faces_timeseries_output(self, mesh_name: str) -> Dict:
1509
+ """Return kerchunk-style zarr reference metadata.
1510
+
1511
+ Requires the 'zarr' and 'kerchunk' packages.
1512
+
1513
+ Returns
1514
+ -------
1515
+ dict
1516
+ Dictionary of kerchunk-style zarr reference metadata.
1517
+ """
1518
+ ds = self._mesh_timeseries_outputs(
1519
+ mesh_name, TIME_SERIES_OUTPUT_VARS_FACES, truncate=False
1520
+ )
1521
+ return self._zmeta(ds)
1522
+
1523
+ def zmeta_reference_lines_timeseries_output(self) -> Dict:
1524
+ """Return kerchunk-style zarr reference metadata.
1525
+
1526
+ Requires the 'zarr' and 'kerchunk' packages.
1527
+
1528
+ Returns
1529
+ -------
1530
+ dict
1531
+ Dictionary of kerchunk-style zarr reference metadata.
1532
+ """
1533
+ ds = self.reference_lines_timeseries_output()
1534
+ return self._zmeta(ds)
1535
+
1536
+ def zmeta_reference_points_timeseries_output(self) -> Dict:
1537
+ """Return kerchunk-style zarr reference metadata.
1538
+
1539
+ Requires the 'zarr' and 'kerchunk' packages.
1540
+
1541
+ Returns
1542
+ -------
1543
+ dict
1544
+ Dictionary of kerchunk-style zarr reference metadata.
1545
+ """
1546
+ ds = self.reference_points_timeseries_output()
1547
+ return self._zmeta(ds)
rashdf/utils.py CHANGED
@@ -6,8 +6,8 @@ import pandas as pd
6
6
 
7
7
  from datetime import datetime, timedelta
8
8
  import re
9
- from typing import Any, List, Tuple, Union, Optional
10
- from shapely import LineString, Polygon, polygonize_full
9
+ from typing import Any, Callable, List, Tuple, Union, Optional
10
+ import warnings
11
11
 
12
12
 
13
13
  def parse_ras_datetime_ms(datetime_str: str) -> datetime:
@@ -308,3 +308,33 @@ def ras_timesteps_to_datetimes(
308
308
  start_time + pd.Timedelta(timestep, unit=time_unit).round(round_to)
309
309
  for timestep in timesteps.astype(np.float64)
310
310
  ]
311
+
312
+
313
+ def deprecated(func) -> Callable:
314
+ """
315
+ Deprecate a function.
316
+
317
+ This is a decorator which can be used to mark functions as deprecated.
318
+ It will result in a warning being emitted when the function is used.
319
+
320
+ Parameters
321
+ ----------
322
+ func: The function to be deprecated.
323
+
324
+ Returns
325
+ -------
326
+ The decorated function.
327
+ """
328
+
329
+ def new_func(*args, **kwargs):
330
+ warnings.warn(
331
+ f"{func.__name__} is deprecated and will be removed in a future version.",
332
+ category=DeprecationWarning,
333
+ stacklevel=2,
334
+ )
335
+ return func(*args, **kwargs)
336
+
337
+ new_func.__name__ = func.__name__
338
+ new_func.__doc__ = func.__doc__
339
+ new_func.__dict__.update(func.__dict__)
340
+ return new_func
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: rashdf
3
- Version: 0.5.0
3
+ Version: 0.6.0
4
4
  Summary: Read data from HEC-RAS HDF files.
5
5
  Project-URL: repository, https://github.com/fema-ffrd/rashdf
6
6
  Classifier: Development Status :: 4 - Beta
@@ -23,6 +23,11 @@ Requires-Dist: ruff ; extra == 'dev'
23
23
  Requires-Dist: pytest ; extra == 'dev'
24
24
  Requires-Dist: pytest-cov ; extra == 'dev'
25
25
  Requires-Dist: fiona ; extra == 'dev'
26
+ Requires-Dist: kerchunk ; extra == 'dev'
27
+ Requires-Dist: zarr ; extra == 'dev'
28
+ Requires-Dist: dask ; extra == 'dev'
29
+ Requires-Dist: fsspec ; extra == 'dev'
30
+ Requires-Dist: s3fs ; extra == 'dev'
26
31
  Provides-Extra: docs
27
32
  Requires-Dist: sphinx ; extra == 'docs'
28
33
  Requires-Dist: numpydoc ; extra == 'docs'
@@ -0,0 +1,12 @@
1
+ cli.py,sha256=yItWmCxnYLcuOpJVRpUsfv_NLS9IxLjojZB9GrxfKAU,6571
2
+ rashdf/__init__.py,sha256=XXFtJDgLPCimqAhfsFz_pTWYECJiRT0i-Kb1uflXmVU,156
3
+ rashdf/base.py,sha256=cAQJX1aeBJKb3MJ06ltpbRTUaZX5NkuxpR1J4f7FyTU,2507
4
+ rashdf/geom.py,sha256=2aTfj6mqZGP6rysflQ5L8FeItlYJsknO00sKHo-yaTw,26090
5
+ rashdf/plan.py,sha256=4kftqnZedhSWPl-5Yn3vz9Z4VifTXcUokti0s5lX1lU,56479
6
+ rashdf/utils.py,sha256=Cba6sULF0m0jg6CQass4bPm2oxTd_avoe1pRQxq082c,10896
7
+ rashdf-0.6.0.dist-info/LICENSE,sha256=L_0QaLpQVHPcglVjiaJPnOocwzP8uXevDRjUPr9DL1Y,1065
8
+ rashdf-0.6.0.dist-info/METADATA,sha256=0MarTKZArGaOTTROyz4PENscSiVy7cYwvatftl89y_Q,5920
9
+ rashdf-0.6.0.dist-info/WHEEL,sha256=Wyh-_nZ0DJYolHNn1_hMa4lM7uDedD_RGVwbmTjyItk,91
10
+ rashdf-0.6.0.dist-info/entry_points.txt,sha256=LHHMR1lLy4wRyscMuW1RlYDXemtPgqQhNcILz0DtStY,36
11
+ rashdf-0.6.0.dist-info/top_level.txt,sha256=SrmLb6FFTJtM_t6O1v0M0JePshiQJMHr0yYVkHL7ztk,11
12
+ rashdf-0.6.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (70.3.0)
2
+ Generator: setuptools (71.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,12 +0,0 @@
1
- cli.py,sha256=yItWmCxnYLcuOpJVRpUsfv_NLS9IxLjojZB9GrxfKAU,6571
2
- rashdf/__init__.py,sha256=XXFtJDgLPCimqAhfsFz_pTWYECJiRT0i-Kb1uflXmVU,156
3
- rashdf/base.py,sha256=lHYVDwFTA1qFI34QYZ55QKcp7b8CeZsmDfESdkYISbg,2432
4
- rashdf/geom.py,sha256=2aTfj6mqZGP6rysflQ5L8FeItlYJsknO00sKHo-yaTw,26090
5
- rashdf/plan.py,sha256=ggXzP4Ryx9MxMSHFrkMpFIjYCdIBufWiFPsFx5SFY6c,47426
6
- rashdf/utils.py,sha256=93arHtIT-iL9dIpbYr7esjrxv1uJabTRJSruyjvr8mw,10168
7
- rashdf-0.5.0.dist-info/LICENSE,sha256=L_0QaLpQVHPcglVjiaJPnOocwzP8uXevDRjUPr9DL1Y,1065
8
- rashdf-0.5.0.dist-info/METADATA,sha256=hnF7VT4q5-tBkwMZQvF1VfGArEQ4y1jznn1A4L0PwGs,5729
9
- rashdf-0.5.0.dist-info/WHEEL,sha256=Z4pYXqR_rTB7OWNDYFOm1qRk0RX6GFP2o8LgvP453Hk,91
10
- rashdf-0.5.0.dist-info/entry_points.txt,sha256=LHHMR1lLy4wRyscMuW1RlYDXemtPgqQhNcILz0DtStY,36
11
- rashdf-0.5.0.dist-info/top_level.txt,sha256=SrmLb6FFTJtM_t6O1v0M0JePshiQJMHr0yYVkHL7ztk,11
12
- rashdf-0.5.0.dist-info/RECORD,,