rashdf 0.5.0__tar.gz → 0.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: rashdf
3
- Version: 0.5.0
3
+ Version: 0.7.0
4
4
  Summary: Read data from HEC-RAS HDF files.
5
5
  Project-URL: repository, https://github.com/fema-ffrd/rashdf
6
6
  Classifier: Development Status :: 4 - Beta
@@ -22,7 +22,12 @@ Requires-Dist: pre-commit; extra == "dev"
22
22
  Requires-Dist: ruff; extra == "dev"
23
23
  Requires-Dist: pytest; extra == "dev"
24
24
  Requires-Dist: pytest-cov; extra == "dev"
25
- Requires-Dist: fiona; extra == "dev"
25
+ Requires-Dist: kerchunk; extra == "dev"
26
+ Requires-Dist: zarr; extra == "dev"
27
+ Requires-Dist: dask; extra == "dev"
28
+ Requires-Dist: fsspec; extra == "dev"
29
+ Requires-Dist: s3fs; extra == "dev"
30
+ Requires-Dist: fiona==1.9.6; extra == "dev"
26
31
  Provides-Extra: docs
27
32
  Requires-Dist: sphinx; extra == "docs"
28
33
  Requires-Dist: numpydoc; extra == "docs"
@@ -140,8 +145,12 @@ $ python -m venv venv-rashdf
140
145
 
141
146
  Activate the virtual environment:
142
147
  ```
143
- $ source ./venv/bin/activate
148
+ # For macOS/Linux
149
+ $ source ./venv-rashdf/bin/activate
144
150
  (venv-rashdf) $
151
+
152
+ # For Windows
153
+ > ./venv-rashdf/Scripts/activate
145
154
  ```
146
155
 
147
156
  Install dev dependencies:
@@ -110,8 +110,12 @@ $ python -m venv venv-rashdf
110
110
 
111
111
  Activate the virtual environment:
112
112
  ```
113
- $ source ./venv/bin/activate
113
+ # For macOS/Linux
114
+ $ source ./venv-rashdf/bin/activate
114
115
  (venv-rashdf) $
116
+
117
+ # For Windows
118
+ > ./venv-rashdf/Scripts/activate
115
119
  ```
116
120
 
117
121
  Install dev dependencies:
@@ -12,11 +12,11 @@ classifiers = [
12
12
  "Programming Language :: Python :: 3.11",
13
13
  "Programming Language :: Python :: 3.12",
14
14
  ]
15
- version = "0.5.0"
15
+ version = "0.7.0"
16
16
  dependencies = ["h5py", "geopandas>=1.0,<2.0", "pyarrow", "xarray"]
17
17
 
18
18
  [project.optional-dependencies]
19
- dev = ["pre-commit", "ruff", "pytest", "pytest-cov", "fiona"]
19
+ dev = ["pre-commit", "ruff", "pytest", "pytest-cov", "kerchunk", "zarr", "dask", "fsspec", "s3fs", "fiona==1.9.6"]
20
20
  docs = ["sphinx", "numpydoc", "sphinx_rtd_theme"]
21
21
 
22
22
  [project.urls]
@@ -19,6 +19,7 @@ class RasHdf(h5py.File):
19
19
  Additional keyword arguments to pass to h5py.File
20
20
  """
21
21
  super().__init__(name, mode="r", **kwargs)
22
+ self._loc = name
22
23
 
23
24
  @classmethod
24
25
  def open_uri(
@@ -49,7 +50,9 @@ class RasHdf(h5py.File):
49
50
  import fsspec
50
51
 
51
52
  remote_file = fsspec.open(uri, mode="rb", **fsspec_kwargs)
52
- return cls(remote_file.open(), **h5py_kwargs)
53
+ result = cls(remote_file.open(), **h5py_kwargs)
54
+ result._loc = uri
55
+ return result
53
56
 
54
57
  def get_attrs(self, attr_path: str) -> Dict:
55
58
  """Convert attributes from a HEC-RAS HDF file into a Python dictionary for a given attribute path.
@@ -5,6 +5,8 @@ from .utils import (
5
5
  df_datetimes_to_str,
6
6
  ras_timesteps_to_datetimes,
7
7
  parse_ras_datetime_ms,
8
+ deprecated,
9
+ convert_ras_hdf_value,
8
10
  )
9
11
 
10
12
  from geopandas import GeoDataFrame
@@ -155,6 +157,7 @@ class RasPlanHdf(RasGeomHdf):
155
157
  PLAN_INFO_PATH = "Plan Data/Plan Information"
156
158
  PLAN_PARAMS_PATH = "Plan Data/Plan Parameters"
157
159
  PRECIP_PATH = "Event Conditions/Meteorology/Precipitation"
160
+ OBS_DATA_PATH = "Event Conditions/Observed Data"
158
161
  RESULTS_UNSTEADY_PATH = "Results/Unsteady"
159
162
  RESULTS_UNSTEADY_SUMMARY_PATH = f"{RESULTS_UNSTEADY_PATH}/Summary"
160
163
  VOLUME_ACCOUNTING_PATH = f"{RESULTS_UNSTEADY_PATH}/Volume Accounting"
@@ -165,6 +168,8 @@ class RasPlanHdf(RasGeomHdf):
165
168
  UNSTEADY_TIME_SERIES_PATH = f"{BASE_OUTPUT_PATH}/Unsteady Time Series"
166
169
  REFERENCE_LINES_OUTPUT_PATH = f"{UNSTEADY_TIME_SERIES_PATH}/Reference Lines"
167
170
  REFERENCE_POINTS_OUTPUT_PATH = f"{UNSTEADY_TIME_SERIES_PATH}/Reference Points"
171
+ OBS_FLOW_OUTPUT_PATH = f"{OBS_DATA_PATH}/Flow"
172
+ OBS_STAGE_OUTPUT_PATH = f"{OBS_DATA_PATH}/Stage"
168
173
 
169
174
  RESULTS_STEADY_PATH = "Results/Steady"
170
175
  BASE_STEADY_PATH = f"{RESULTS_STEADY_PATH}/Output/Output Blocks/Base Output"
@@ -585,7 +590,8 @@ class RasPlanHdf(RasGeomHdf):
585
590
  Returns
586
591
  -------
587
592
  DataFrame
588
- A DataFrame with columns 'mesh_name', 'cell_id' or 'face_id', a value column, and a time column.
593
+ A DataFrame with columns 'mesh_name', 'cell_id' or 'face_id', a value column,
594
+ and a time column if the value corresponds to a specific time.
589
595
  """
590
596
  methods_with_times = {
591
597
  SummaryOutputVar.MAXIMUM_WATER_SURFACE: self.mesh_max_ws,
@@ -604,6 +610,76 @@ class RasPlanHdf(RasGeomHdf):
604
610
  df = other_methods[var]()
605
611
  return df
606
612
 
613
+ def _mesh_summary_outputs_df(
614
+ self,
615
+ cells_or_faces: str,
616
+ output_vars: Optional[List[SummaryOutputVar]] = None,
617
+ round_to: str = "0.1 s",
618
+ ) -> DataFrame:
619
+ if cells_or_faces == "cells":
620
+ feature_id_field = "cell_id"
621
+ elif cells_or_faces == "faces":
622
+ feature_id_field = "face_id"
623
+ else:
624
+ raise ValueError('cells_or_faces must be either "cells" or "faces".')
625
+ if output_vars is None:
626
+ summary_output_vars = self._summary_output_vars(
627
+ cells_or_faces=cells_or_faces
628
+ )
629
+ elif isinstance(output_vars, list):
630
+ summary_output_vars = []
631
+ for var in output_vars:
632
+ if not isinstance(var, SummaryOutputVar):
633
+ var = SummaryOutputVar(var)
634
+ summary_output_vars.append(var)
635
+ else:
636
+ raise ValueError(
637
+ "include_output must be a boolean or a list of SummaryOutputVar values."
638
+ )
639
+ df = self.mesh_summary_output(summary_output_vars[0], round_to=round_to)
640
+ for var in summary_output_vars[1:]:
641
+ df_var = self.mesh_summary_output(var, round_to=round_to)
642
+ df = df.merge(df_var, on=["mesh_name", feature_id_field], how="left")
643
+ return df
644
+
645
+ def mesh_cells_summary_output(self, round_to: str = "0.1 s") -> DataFrame:
646
+ """
647
+ Return a DataFrame with summary output data for each mesh cell in the model.
648
+
649
+ Parameters
650
+ ----------
651
+ round_to : str, optional
652
+ The time unit to round the datetimes to. Default: "0.1 s" (seconds).
653
+ See Pandas documentation for valid time units:
654
+ https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html
655
+
656
+ Returns
657
+ -------
658
+ DataFrame
659
+ A DataFrame with columns 'mesh_name', 'cell_id', and columns for each
660
+ summary output variable.
661
+ """
662
+ return self._mesh_summary_outputs_df("cells", round_to=round_to)
663
+
664
+ def mesh_faces_summary_output(self, round_to: str = "0.1 s") -> DataFrame:
665
+ """
666
+ Return a DataFrame with summary output data for each mesh face in the model.
667
+
668
+ Parameters
669
+ ----------
670
+ round_to : str, optional
671
+ The time unit to round the datetimes to. Default: "0.1 s" (seconds).
672
+ See Pandas documentation for valid time units:
673
+ https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html
674
+
675
+ Returns
676
+ -------
677
+ DataFrame
678
+ A DataFrame with columns 'mesh_name', 'face_id', and columns for each
679
+ summary output variable.
680
+ """
681
+ return self._mesh_summary_outputs_df("faces", round_to=round_to)
682
+
607
683
  def _summary_output_vars(
608
684
  self, cells_or_faces: Optional[str] = None
609
685
  ) -> List[SummaryOutputVar]:
@@ -812,7 +888,7 @@ class RasPlanHdf(RasGeomHdf):
812
888
  mesh_name: str,
813
889
  var: TimeSeriesOutputVar,
814
890
  ) -> Tuple[np.ndarray, str]:
815
- path = f"{self.UNSTEADY_TIME_SERIES_PATH}/2D Flow Areas/{mesh_name}/{var.value}"
891
+ path = self._mesh_timeseries_output_path(mesh_name, var.value)
816
892
  group = self.get(path)
817
893
  try:
818
894
  import dask.array as da
@@ -830,6 +906,7 @@ class RasPlanHdf(RasGeomHdf):
830
906
  self,
831
907
  mesh_name: str,
832
908
  var: Union[str, TimeSeriesOutputVar],
909
+ truncate: bool = True,
833
910
  ) -> xr.DataArray:
834
911
  """Return the time series output data for a given variable.
835
912
 
@@ -839,6 +916,8 @@ class RasPlanHdf(RasGeomHdf):
839
916
  The name of the 2D flow area mesh.
840
917
  var : TimeSeriesOutputVar
841
918
  The time series output variable to retrieve.
919
+ truncate : bool, optional
920
+ If True, truncate the number of cells to the listed cell count.
842
921
 
843
922
  Returns
844
923
  -------
@@ -856,7 +935,10 @@ class RasPlanHdf(RasGeomHdf):
856
935
  values, units = self._mesh_timeseries_output_values_units(mesh_name, var)
857
936
  if var in TIME_SERIES_OUTPUT_VARS_CELLS:
858
937
  cell_count = mesh_names_counts[mesh_name]
859
- values = values[:, :cell_count]
938
+ if truncate:
939
+ values = values[:, :cell_count]
940
+ else:
941
+ values = values[:, :]
860
942
  id_coord = "cell_id"
861
943
  elif var in TIME_SERIES_OUTPUT_VARS_FACES:
862
944
  id_coord = "face_id"
@@ -874,24 +956,28 @@ class RasPlanHdf(RasGeomHdf):
874
956
  "mesh_name": mesh_name,
875
957
  "variable": var.value,
876
958
  "units": units,
959
+ "hdf_path": self._mesh_timeseries_output_path(mesh_name, var.value),
877
960
  },
878
961
  )
879
962
  return da
880
963
 
964
+ def _mesh_timeseries_output_path(self, mesh_name: str, var_name: str) -> str:
965
+ return f"{self.UNSTEADY_TIME_SERIES_PATH}/2D Flow Areas/{mesh_name}/{var_name}"
966
+
881
967
  def _mesh_timeseries_outputs(
882
- self, mesh_name: str, vars: List[TimeSeriesOutputVar]
968
+ self, mesh_name: str, vars: List[TimeSeriesOutputVar], truncate: bool = True
883
969
  ) -> xr.Dataset:
884
970
  datasets = {}
885
971
  for var in vars:
886
972
  var_path = f"{self.UNSTEADY_TIME_SERIES_PATH}/2D Flow Areas/{mesh_name}/{var.value}"
887
973
  if self.get(var_path) is None:
888
974
  continue
889
- da = self.mesh_timeseries_output(mesh_name, var)
975
+ da = self.mesh_timeseries_output(mesh_name, var, truncate=truncate)
890
976
  datasets[var.value] = da
891
977
  ds = xr.Dataset(datasets, attrs={"mesh_name": mesh_name})
892
978
  return ds
893
979
 
894
- def mesh_timeseries_output_cells(self, mesh_name: str) -> xr.Dataset:
980
+ def mesh_cells_timeseries_output(self, mesh_name: str) -> xr.Dataset:
895
981
  """Return the time series output data for cells in a 2D flow area mesh.
896
982
 
897
983
  Parameters
@@ -907,7 +993,25 @@ class RasPlanHdf(RasGeomHdf):
907
993
  ds = self._mesh_timeseries_outputs(mesh_name, TIME_SERIES_OUTPUT_VARS_CELLS)
908
994
  return ds
909
995
 
910
- def mesh_timeseries_output_faces(self, mesh_name: str) -> xr.Dataset:
996
+ @deprecated
997
+ def mesh_timeseries_output_cells(self, mesh_name: str) -> xr.Dataset:
998
+ """Return the time series output data for cells in a 2D flow area mesh.
999
+
1000
+ Deprecated: use mesh_cells_timeseries_output instead.
1001
+
1002
+ Parameters
1003
+ ----------
1004
+ mesh_name : str
1005
+ The name of the 2D flow area mesh.
1006
+
1007
+ Returns
1008
+ -------
1009
+ xr.Dataset
1010
+ An xarray Dataset with DataArrays for each time series output variable.
1011
+ """
1012
+ return self.mesh_cells_timeseries_output(mesh_name)
1013
+
1014
+ def mesh_faces_timeseries_output(self, mesh_name: str) -> xr.Dataset:
911
1015
  """Return the time series output data for faces in a 2D flow area mesh.
912
1016
 
913
1017
  Parameters
@@ -923,6 +1027,24 @@ class RasPlanHdf(RasGeomHdf):
923
1027
  ds = self._mesh_timeseries_outputs(mesh_name, TIME_SERIES_OUTPUT_VARS_FACES)
924
1028
  return ds
925
1029
 
1030
+ @deprecated
1031
+ def mesh_timeseries_output_faces(self, mesh_name: str) -> xr.Dataset:
1032
+ """Return the time series output data for faces in a 2D flow area mesh.
1033
+
1034
+ Deprecated: use mesh_faces_timeseries_output instead.
1035
+
1036
+ Parameters
1037
+ ----------
1038
+ mesh_name : str
1039
+ The name of the 2D flow area mesh.
1040
+
1041
+ Returns
1042
+ -------
1043
+ xr.Dataset
1044
+ An xarray Dataset with DataArrays for each time series output variable.
1045
+ """
1046
+ return self.mesh_faces_timeseries_output(mesh_name)
1047
+
926
1048
  def reference_timeseries_output(self, reftype: str = "lines") -> xr.Dataset:
927
1049
  """Return timeseries output data for reference lines or points from a HEC-RAS HDF plan file.
928
1050
 
@@ -984,7 +1106,7 @@ class RasPlanHdf(RasGeomHdf):
984
1106
  f"{abbrev}_name": (f"{abbrev}_id", names),
985
1107
  "mesh_name": (f"{abbrev}_id", mesh_areas),
986
1108
  },
987
- attrs={"Units": units},
1109
+ attrs={"units": units, "hdf_path": f"{output_path}/{var}"},
988
1110
  )
989
1111
  das[var] = da
990
1112
  return xr.Dataset(das)
@@ -999,6 +1121,74 @@ class RasPlanHdf(RasGeomHdf):
999
1121
  """
1000
1122
  return self.reference_timeseries_output(reftype="lines")
1001
1123
 
1124
+ def observed_timeseries_input(self, vartype: str = "Flow") -> dict:
1125
+ """Return observed timeseries input data for reference lines and points from a HEC-RAS HDF plan file.
1126
+
1127
+ Parameters
1128
+ ----------
1129
+ vartype : str, optional
1130
+ The type of observed data to retrieve. Must be either "Flow" or "Stage".
1131
+ (default: "Flow")
1132
+
1133
+ Returns
1134
+ -------
1135
+ xr.Dataset
1136
+ An xarray Dataset with observed timeseries input data for both reference lines and reference points.
1137
+ """
1138
+ if vartype == "Flow":
1139
+ output_path = self.OBS_FLOW_OUTPUT_PATH
1140
+ elif vartype == "Stage":
1141
+ output_path = self.OBS_STAGE_OUTPUT_PATH
1142
+ else:
1143
+ raise ValueError('vartype must be either "Flow" or "Stage".')
1144
+
1145
+ observed_group = self.get(output_path)
1146
+ if observed_group is None:
1147
+ raise RasPlanHdfError(
1148
+ f"Could not find HDF group at path '{output_path}'."
1149
+ f" Does the Plan HDF file contain reference {vartype} output data?"
1150
+ )
1151
+ if "Attributes" in observed_group.keys():
1152
+ attr_path = observed_group["Attributes"]
1153
+ attrs_df = pd.DataFrame(attr_path[:]).map(convert_ras_hdf_value)
1154
+
1155
+ das = {}
1156
+ for idx, site in enumerate(observed_group.keys()):
1157
+ if site != "Attributes":
1158
+ # Site Ex: 'Ref Point: Grapevine_Lake_RP'
1159
+ site_path = observed_group[site]
1160
+ site_name = site.split(":")[1][1:] # Grapevine_Lake_RP
1161
+ ref_type = site.split(":")[0] # Ref Point
1162
+ if ref_type == "Ref Line":
1163
+ ref_type = "refln"
1164
+ else:
1165
+ ref_type = "refpt"
1166
+ df = pd.DataFrame(site_path[:]).map(convert_ras_hdf_value)
1167
+ # rename Date to time
1168
+ df = df.rename(columns={"Date": "time"})
1169
+ # Ensure the Date index is unique
1170
+ df = df.drop_duplicates(subset="time")
1171
+ # Package into an 1D xarray DataArray
1172
+ values = df["Value"].values
1173
+ times = df["time"].values
1174
+ da = xr.DataArray(
1175
+ values,
1176
+ name=vartype,
1177
+ dims=["time"],
1178
+ coords={
1179
+ "time": times,
1180
+ },
1181
+ attrs={
1182
+ "hdf_path": f"{output_path}/{site}",
1183
+ },
1184
+ )
1185
+ # Expand dimensions to add additional coordinates
1186
+ da = da.expand_dims({f"{ref_type}_id": [idx - 1]})
1187
+ da = da.expand_dims({f"{ref_type}_name": [site_name]})
1188
+ das[site_name] = da
1189
+ das = xr.concat([das[site] for site in das.keys()], dim="time")
1190
+ return das
1191
+
1002
1192
  def reference_points_timeseries_output(self) -> xr.Dataset:
1003
1193
  """Return timeseries output data for reference points from a HEC-RAS HDF plan file.
1004
1194
 
@@ -1317,3 +1507,113 @@ class RasPlanHdf(RasGeomHdf):
1317
1507
  A DataFrame containing the velocity inside the cross sections
1318
1508
  """
1319
1509
  return self.steady_profile_xs_output(XsSteadyOutputVar.VELOCITY_TOTAL)
1510
+
1511
+ def _zmeta(self, ds: xr.Dataset) -> Dict:
1512
+ """Given a xarray Dataset, return kerchunk-style zarr reference metadata."""
1513
+ from kerchunk.hdf import SingleHdf5ToZarr
1514
+ import zarr
1515
+ import base64
1516
+
1517
+ encoding = {}
1518
+ chunk_meta = {}
1519
+
1520
+ # Loop through each variable / DataArray in the Dataset
1521
+ for var, da in ds.data_vars.items():
1522
+ # The "hdf_path" attribute is the path within the HDF5 file
1523
+ # that the DataArray was read from. This is attribute is inserted
1524
+ # by rashdf (see "mesh_timeseries_output" method).
1525
+ hdf_ds_path = da.attrs["hdf_path"]
1526
+ hdf_ds = self.get(hdf_ds_path)
1527
+ if hdf_ds is None:
1528
+ # If we don't know where in the HDF5 the data came from, we
1529
+ # have to skip it, because we won't be able to generate the
1530
+ # correct metadata for it.
1531
+ continue
1532
+ # Get the filters and storage info for the HDF5 dataset.
1533
+ # Calling private methods from Kerchunk here because
1534
+ # there's not a nice public API for this part. This is hacky
1535
+ # and a bit risky because these private methods are more likely
1536
+ # to change, but short of reimplementing these functions ourselves
1537
+ # it's the best way to get the metadata we need.
1538
+ # TODO: raise an issue in Kerchunk to expose this functionality?
1539
+ filters = SingleHdf5ToZarr._decode_filters(None, hdf_ds)
1540
+ encoding[var] = {"compressor": None, "filters": filters}
1541
+ storage_info = SingleHdf5ToZarr._storage_info(None, hdf_ds)
1542
+ # Generate chunk metadata for the DataArray
1543
+ for key, value in storage_info.items():
1544
+ chunk_number = ".".join([str(k) for k in key])
1545
+ chunk_key = f"{var}/{chunk_number}"
1546
+ chunk_meta[chunk_key] = [str(self._loc), value["offset"], value["size"]]
1547
+ # "Write" the Dataset to a temporary in-memory zarr store (which
1548
+ # is the same a Python dictionary)
1549
+ zarr_tmp = zarr.MemoryStore()
1550
+ # Use compute=False here because we don't _actually_ want to write
1551
+ # the data to the zarr store, we just want to generate the metadata.
1552
+ ds.to_zarr(zarr_tmp, mode="w", compute=False, encoding=encoding)
1553
+ zarr_meta = {"version": 1, "refs": {}}
1554
+ # Loop through the in-memory Zarr store, decode the data to strings,
1555
+ # and add it to the final metadata dictionary.
1556
+ for key, value in zarr_tmp.items():
1557
+ try:
1558
+ value_str = value.decode("utf-8")
1559
+ except UnicodeDecodeError:
1560
+ value_str = "base64:" + base64.b64encode(value).decode("utf-8")
1561
+ zarr_meta["refs"][key] = value_str
1562
+ zarr_meta["refs"].update(chunk_meta)
1563
+ return zarr_meta
1564
+
1565
+ def zmeta_mesh_cells_timeseries_output(self, mesh_name: str) -> Dict:
1566
+ """Return kerchunk-style zarr reference metadata.
1567
+
1568
+ Requires the 'zarr' and 'kerchunk' packages.
1569
+
1570
+ Returns
1571
+ -------
1572
+ dict
1573
+ Dictionary of kerchunk-style zarr reference metadata.
1574
+ """
1575
+ ds = self._mesh_timeseries_outputs(
1576
+ mesh_name, TIME_SERIES_OUTPUT_VARS_CELLS, truncate=False
1577
+ )
1578
+ return self._zmeta(ds)
1579
+
1580
+ def zmeta_mesh_faces_timeseries_output(self, mesh_name: str) -> Dict:
1581
+ """Return kerchunk-style zarr reference metadata.
1582
+
1583
+ Requires the 'zarr' and 'kerchunk' packages.
1584
+
1585
+ Returns
1586
+ -------
1587
+ dict
1588
+ Dictionary of kerchunk-style zarr reference metadata.
1589
+ """
1590
+ ds = self._mesh_timeseries_outputs(
1591
+ mesh_name, TIME_SERIES_OUTPUT_VARS_FACES, truncate=False
1592
+ )
1593
+ return self._zmeta(ds)
1594
+
1595
+ def zmeta_reference_lines_timeseries_output(self) -> Dict:
1596
+ """Return kerchunk-style zarr reference metadata.
1597
+
1598
+ Requires the 'zarr' and 'kerchunk' packages.
1599
+
1600
+ Returns
1601
+ -------
1602
+ dict
1603
+ Dictionary of kerchunk-style zarr reference metadata.
1604
+ """
1605
+ ds = self.reference_lines_timeseries_output()
1606
+ return self._zmeta(ds)
1607
+
1608
+ def zmeta_reference_points_timeseries_output(self) -> Dict:
1609
+ """Return kerchunk-style zarr reference metadata.
1610
+
1611
+ Requires the 'zarr' and 'kerchunk' packages.
1612
+
1613
+ Returns
1614
+ -------
1615
+ dict
1616
+ Dictionary of kerchunk-style zarr reference metadata.
1617
+ """
1618
+ ds = self.reference_points_timeseries_output()
1619
+ return self._zmeta(ds)
@@ -6,8 +6,8 @@ import pandas as pd
6
6
 
7
7
  from datetime import datetime, timedelta
8
8
  import re
9
- from typing import Any, List, Tuple, Union, Optional
10
- from shapely import LineString, Polygon, polygonize_full
9
+ from typing import Any, Callable, List, Tuple, Union, Optional
10
+ import warnings
11
11
 
12
12
 
13
13
  def parse_ras_datetime_ms(datetime_str: str) -> datetime:
@@ -308,3 +308,33 @@ def ras_timesteps_to_datetimes(
308
308
  start_time + pd.Timedelta(timestep, unit=time_unit).round(round_to)
309
309
  for timestep in timesteps.astype(np.float64)
310
310
  ]
311
+
312
+
313
+ def deprecated(func) -> Callable:
314
+ """
315
+ Deprecate a function.
316
+
317
+ This is a decorator which can be used to mark functions as deprecated.
318
+ It will result in a warning being emitted when the function is used.
319
+
320
+ Parameters
321
+ ----------
322
+ func: The function to be deprecated.
323
+
324
+ Returns
325
+ -------
326
+ The decorated function.
327
+ """
328
+
329
+ def new_func(*args, **kwargs):
330
+ warnings.warn(
331
+ f"{func.__name__} is deprecated and will be removed in a future version.",
332
+ category=DeprecationWarning,
333
+ stacklevel=2,
334
+ )
335
+ return func(*args, **kwargs)
336
+
337
+ new_func.__name__ = func.__name__
338
+ new_func.__doc__ = func.__doc__
339
+ new_func.__dict__.update(func.__dict__)
340
+ return new_func
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: rashdf
3
- Version: 0.5.0
3
+ Version: 0.7.0
4
4
  Summary: Read data from HEC-RAS HDF files.
5
5
  Project-URL: repository, https://github.com/fema-ffrd/rashdf
6
6
  Classifier: Development Status :: 4 - Beta
@@ -22,7 +22,12 @@ Requires-Dist: pre-commit; extra == "dev"
22
22
  Requires-Dist: ruff; extra == "dev"
23
23
  Requires-Dist: pytest; extra == "dev"
24
24
  Requires-Dist: pytest-cov; extra == "dev"
25
- Requires-Dist: fiona; extra == "dev"
25
+ Requires-Dist: kerchunk; extra == "dev"
26
+ Requires-Dist: zarr; extra == "dev"
27
+ Requires-Dist: dask; extra == "dev"
28
+ Requires-Dist: fsspec; extra == "dev"
29
+ Requires-Dist: s3fs; extra == "dev"
30
+ Requires-Dist: fiona==1.9.6; extra == "dev"
26
31
  Provides-Extra: docs
27
32
  Requires-Dist: sphinx; extra == "docs"
28
33
  Requires-Dist: numpydoc; extra == "docs"
@@ -140,8 +145,12 @@ $ python -m venv venv-rashdf
140
145
 
141
146
  Activate the virtual environment:
142
147
  ```
143
- $ source ./venv/bin/activate
148
+ # For macOS/Linux
149
+ $ source ./venv-rashdf/bin/activate
144
150
  (venv-rashdf) $
151
+
152
+ # For Windows
153
+ > ./venv-rashdf/Scripts/activate
145
154
  ```
146
155
 
147
156
  Install dev dependencies:
@@ -13,6 +13,7 @@ src/rashdf.egg-info/dependency_links.txt
13
13
  src/rashdf.egg-info/entry_points.txt
14
14
  src/rashdf.egg-info/requires.txt
15
15
  src/rashdf.egg-info/top_level.txt
16
+ tests/test_base.py
16
17
  tests/test_cli.py
17
18
  tests/test_geom.py
18
19
  tests/test_plan.py
@@ -8,7 +8,12 @@ pre-commit
8
8
  ruff
9
9
  pytest
10
10
  pytest-cov
11
- fiona
11
+ kerchunk
12
+ zarr
13
+ dask
14
+ fsspec
15
+ s3fs
16
+ fiona==1.9.6
12
17
 
13
18
  [docs]
14
19
  sphinx
@@ -0,0 +1,20 @@
1
+ from src.rashdf.base import RasHdf
2
+ from unittest.mock import patch
3
+
4
+
5
+ def test_open():
6
+ rasfile = "Muncie.g05.hdf"
7
+ rasfile_path = f"./tests/data/ras/{rasfile}"
8
+ hdf = RasHdf(rasfile_path)
9
+ assert hdf._loc == rasfile_path
10
+
11
+
12
+ def test_open_uri():
13
+ rasfile = "Muncie.g05.hdf"
14
+ rasfile_path = f"./tests/data/ras/{rasfile}"
15
+ url = f"s3://mybucket/{rasfile}"
16
+
17
+ # Mock the specific functions used by s3fs
18
+ with patch("s3fs.core.S3FileSystem.open", return_value=open(rasfile_path, "rb")):
19
+ hdf = RasHdf.open_uri(url)
20
+ assert hdf._loc == url
@@ -5,12 +5,15 @@ from src.rashdf.plan import (
5
5
  TimeSeriesOutputVar,
6
6
  )
7
7
 
8
+ import filecmp
9
+ import json
8
10
  from pathlib import Path
9
11
 
10
12
  import numpy as np
11
13
  import pandas as pd
12
14
  from pandas.testing import assert_frame_equal
13
15
  import pytest
16
+ import xarray as xr
14
17
 
15
18
  from . import (
16
19
  _create_hdf_with_group_attrs,
@@ -26,6 +29,7 @@ TEST_ATTRS = {"test_attribute1": "test_str1", "test_attribute2": 500}
26
29
  BALD_EAGLE_P18 = TEST_DATA / "ras/BaldEagleDamBrk.p18.hdf"
27
30
  BALD_EAGLE_P18_TIMESERIES = TEST_DATA / "ras/BaldEagleDamBrk.p18.timeseries.hdf"
28
31
  BALD_EAGLE_P18_REF = TEST_DATA / "ras/BaldEagleDamBrk.reflines-refpts.p18.hdf"
32
+ DENTON = TEST_DATA / "ras/Denton.hdf"
29
33
  MUNCIE_G05 = TEST_DATA / "ras/Muncie.g05.hdf"
30
34
  COAL_G01 = TEST_DATA / "ras/Coal.g01.hdf"
31
35
  BAXTER_P01 = TEST_DATA / "ras_1d/Baxter.p01.hdf"
@@ -193,9 +197,9 @@ def test_mesh_timeseries_output():
193
197
  plan_hdf.mesh_timeseries_output("BaldEagleCr", "Fake Variable")
194
198
 
195
199
 
196
- def test_mesh_timeseries_output_cells():
200
+ def test_mesh_cells_timeseries_output():
197
201
  with RasPlanHdf(BALD_EAGLE_P18_TIMESERIES) as plan_hdf:
198
- ds = plan_hdf.mesh_timeseries_output_cells("BaldEagleCr")
202
+ ds = plan_hdf.mesh_cells_timeseries_output("BaldEagleCr")
199
203
  assert "time" in ds.coords
200
204
  assert "cell_id" in ds.coords
201
205
  assert "Water Surface" in ds.variables
@@ -212,7 +216,7 @@ def test_mesh_timeseries_output_cells():
212
216
  )
213
217
  assert_frame_equal(df, valid_df)
214
218
 
215
- ds = plan_hdf.mesh_timeseries_output_cells("Upper 2D Area")
219
+ ds = plan_hdf.mesh_cells_timeseries_output("Upper 2D Area")
216
220
  assert "time" in ds.coords
217
221
  assert "cell_id" in ds.coords
218
222
  assert "Water Surface" in ds.variables
@@ -230,9 +234,15 @@ def test_mesh_timeseries_output_cells():
230
234
  assert_frame_equal(df, valid_df)
231
235
 
232
236
 
233
- def test_mesh_timeseries_output_faces():
237
+ def test_mesh_timeseries_output_cells():
238
+ with pytest.warns(DeprecationWarning):
239
+ with RasPlanHdf(BALD_EAGLE_P18_TIMESERIES) as plan_hdf:
240
+ plan_hdf.mesh_timeseries_output_cells("BaldEagleCr")
241
+
242
+
243
+ def test_mesh_faces_timeseries_output():
234
244
  with RasPlanHdf(BALD_EAGLE_P18_TIMESERIES) as plan_hdf:
235
- ds = plan_hdf.mesh_timeseries_output_faces("BaldEagleCr")
245
+ ds = plan_hdf.mesh_faces_timeseries_output("BaldEagleCr")
236
246
  assert "time" in ds.coords
237
247
  assert "face_id" in ds.coords
238
248
  assert "Face Velocity" in ds.variables
@@ -249,7 +259,7 @@ def test_mesh_timeseries_output_faces():
249
259
  )
250
260
  assert_frame_equal(df, valid_df)
251
261
 
252
- ds = plan_hdf.mesh_timeseries_output_faces("Upper 2D Area")
262
+ ds = plan_hdf.mesh_faces_timeseries_output("Upper 2D Area")
253
263
  assert "time" in ds.coords
254
264
  assert "face_id" in ds.coords
255
265
  assert "Face Velocity" in ds.variables
@@ -267,6 +277,12 @@ def test_mesh_timeseries_output_faces():
267
277
  assert_frame_equal(df, valid_df)
268
278
 
269
279
 
280
+ def test_mesh_timeseries_output_faces():
281
+ with pytest.warns(DeprecationWarning):
282
+ with RasPlanHdf(BALD_EAGLE_P18_TIMESERIES) as plan_hdf:
283
+ plan_hdf.mesh_timeseries_output_faces("BaldEagleCr")
284
+
285
+
270
286
  def test_reference_lines(tmp_path: Path):
271
287
  plan_hdf = RasPlanHdf(BALD_EAGLE_P18_REF)
272
288
  gdf = plan_hdf.reference_lines(datetime_to_str=True)
@@ -291,10 +307,10 @@ def test_reference_lines_timeseries(tmp_path: Path):
291
307
 
292
308
  ws = ds["Water Surface"]
293
309
  assert ws.shape == (37, 4)
294
- assert ws.attrs["Units"] == "ft"
310
+ assert ws.attrs["units"] == "ft"
295
311
  q = ds["Flow"]
296
312
  assert q.shape == (37, 4)
297
- assert q.attrs["Units"] == "cfs"
313
+ assert q.attrs["units"] == "cfs"
298
314
 
299
315
  df = ds.sel(refln_id=2).to_dataframe()
300
316
  valid_df = pd.read_csv(
@@ -330,9 +346,9 @@ def test_reference_points_timeseries():
330
346
 
331
347
  ws = ds["Water Surface"]
332
348
  assert ws.shape == (37, 3)
333
- assert ws.attrs["Units"] == "ft"
349
+ assert ws.attrs["units"] == "ft"
334
350
  v = ds["Velocity"]
335
- assert v.attrs["Units"] == "ft/s"
351
+ assert v.attrs["units"] == "ft/s"
336
352
  assert v.shape == (37, 3)
337
353
 
338
354
  df = ds.sel(refpt_id=1).to_dataframe()
@@ -431,3 +447,206 @@ def test_cross_sections_energy_grade():
431
447
  assert _gdf_matches_json_alt(
432
448
  phdf.cross_sections_energy_grade(), xs_energy_grade_json
433
449
  )
450
+
451
+
452
+ def _compare_json(json_file1, json_file2) -> bool:
453
+ with open(json_file1) as j1:
454
+ with open(json_file2) as j2:
455
+ return json.load(j1) == json.load(j2)
456
+
457
+
458
+ def test_zmeta_mesh_cells_timeseries_output(tmp_path):
459
+ with RasPlanHdf(BALD_EAGLE_P18_TIMESERIES) as phdf:
460
+ # Generate Zarr metadata
461
+ zmeta = phdf.zmeta_mesh_cells_timeseries_output("BaldEagleCr")
462
+
463
+ # Write the Zarr metadata to JSON
464
+ zmeta_test_path = tmp_path / "bald-eagle-mesh-cells-zmeta.test.json"
465
+ with open(zmeta_test_path, "w") as f:
466
+ json.dump(zmeta, f, indent=4)
467
+
468
+ # Compare to a validated JSON file
469
+ zmeta_valid_path = TEST_JSON / "bald-eagle-mesh-cells-zmeta.json"
470
+ assert _compare_json(zmeta_test_path, zmeta_valid_path)
471
+
472
+ # Verify that the Zarr metadata can be used to open a dataset
473
+ ds = xr.open_dataset(
474
+ "reference://",
475
+ engine="zarr",
476
+ backend_kwargs={
477
+ "consolidated": False,
478
+ "storage_options": {"fo": str(zmeta_test_path)},
479
+ },
480
+ )
481
+ assert ds["Water Surface"].shape == (37, 3947)
482
+ assert len(ds.coords["time"]) == 37
483
+ assert len(ds.coords["cell_id"]) == 3947
484
+ assert ds.attrs["mesh_name"] == "BaldEagleCr"
485
+
486
+
487
+ def test_zmeta_mesh_faces_timeseries_output(tmp_path):
488
+ with RasPlanHdf(BALD_EAGLE_P18_TIMESERIES) as phdf:
489
+ # Generate Zarr metadata
490
+ zmeta = phdf.zmeta_mesh_faces_timeseries_output("BaldEagleCr")
491
+
492
+ # Write the Zarr metadata to JSON
493
+ zmeta_test_path = tmp_path / "bald-eagle-mesh-faces-zmeta.test.json"
494
+ with open(zmeta_test_path, "w") as f:
495
+ json.dump(zmeta, f, indent=4)
496
+
497
+ # Compare to a validated JSON file
498
+ zmeta_valid_path = TEST_JSON / "bald-eagle-mesh-faces-zmeta.json"
499
+ assert _compare_json(zmeta_test_path, zmeta_valid_path)
500
+
501
+ # Verify that the Zarr metadata can be used to open a dataset
502
+ ds = xr.open_dataset(
503
+ "reference://",
504
+ engine="zarr",
505
+ backend_kwargs={
506
+ "consolidated": False,
507
+ "storage_options": {"fo": str(zmeta_test_path)},
508
+ },
509
+ )
510
+ assert ds["Face Velocity"].shape == (37, 7295)
511
+ assert len(ds.coords["time"]) == 37
512
+ assert len(ds.coords["face_id"]) == 7295
513
+ assert ds.attrs["mesh_name"] == "BaldEagleCr"
514
+
515
+
516
+ def test_zmeta_reference_lines_timeseries_output(tmp_path):
517
+ with RasPlanHdf(BALD_EAGLE_P18_REF) as phdf:
518
+ # Generate Zarr metadata
519
+ zmeta = phdf.zmeta_reference_lines_timeseries_output()
520
+
521
+ # Write the Zarr metadata to JSON
522
+ zmeta_test_path = tmp_path / "bald-eagle-reflines-zmeta.test.json"
523
+ with open(zmeta_test_path, "w") as f:
524
+ json.dump(zmeta, f, indent=4)
525
+
526
+ # Compare to a validated JSON file
527
+ zmeta_valid_path = TEST_JSON / "bald-eagle-reflines-zmeta.json"
528
+ assert _compare_json(zmeta_test_path, zmeta_valid_path)
529
+
530
+ # Verify that the Zarr metadata can be used to open a dataset
531
+ ds = xr.open_dataset(
532
+ "reference://",
533
+ engine="zarr",
534
+ backend_kwargs={
535
+ "consolidated": False,
536
+ "storage_options": {"fo": str(zmeta_test_path)},
537
+ },
538
+ )
539
+ assert ds["Flow"].shape == (37, 4)
540
+ assert len(ds.coords["time"]) == 37
541
+ assert len(ds.coords["refln_id"]) == 4
542
+ assert ds.attrs == {}
543
+
544
+
545
+ def test_zmeta_reference_points_timeseries_output(tmp_path):
546
+ with RasPlanHdf(BALD_EAGLE_P18_REF) as phdf:
547
+ # Generate Zarr metadata
548
+ zmeta = phdf.zmeta_reference_points_timeseries_output()
549
+
550
+ # Write the Zarr metadata to JSON
551
+ zmeta_test_path = tmp_path / "bald-eagle-refpoints-zmeta.test.json"
552
+ with open(zmeta_test_path, "w") as f:
553
+ json.dump(zmeta, f, indent=4)
554
+
555
+ # Compare to a validated JSON file
556
+ zmeta_valid_path = TEST_JSON / "bald-eagle-refpoints-zmeta.json"
557
+ assert _compare_json(zmeta_test_path, zmeta_valid_path)
558
+
559
+ # Verify that the Zarr metadata can be used to open a dataset
560
+ ds = xr.open_dataset(
561
+ "reference://",
562
+ engine="zarr",
563
+ backend_kwargs={
564
+ "consolidated": False,
565
+ "storage_options": {"fo": str(zmeta_test_path)},
566
+ },
567
+ )
568
+ assert ds["Water Surface"].shape == (37, 3)
569
+ assert ds["Velocity"].shape == (37, 3)
570
+ assert len(ds.coords["time"]) == 37
571
+ assert len(ds.coords["refpt_id"]) == 3
572
+ assert ds.attrs == {}
573
+
574
+
575
+ def test_mesh_cells_summary_output(tmp_path):
576
+ with RasPlanHdf(BALD_EAGLE_P18) as phdf:
577
+ df = phdf.mesh_cells_summary_output()
578
+ test_csv = tmp_path / "BaldEagleDamBrk.summary-cells.test.csv"
579
+ df.to_csv(test_csv)
580
+ filecmp.cmp(
581
+ test_csv,
582
+ TEST_CSV / "BaldEagleDamBrk.summary-cells.csv",
583
+ shallow=False,
584
+ )
585
+
586
+
587
+ def test_mesh_faces_summary_output(tmp_path):
588
+ with RasPlanHdf(BALD_EAGLE_P18) as phdf:
589
+ df = phdf.mesh_faces_summary_output()
590
+ test_csv = tmp_path / "BaldEagleDamBrk.summary-faces.test.csv"
591
+ df.to_csv(test_csv)
592
+ filecmp.cmp(
593
+ test_csv,
594
+ TEST_CSV / "BaldEagleDamBrk.summary-faces.csv",
595
+ shallow=False,
596
+ )
597
+
598
+
599
+ def test__mesh_summary_outputs_df(tmp_path):
600
+ with RasPlanHdf(BALD_EAGLE_P18) as phdf:
601
+ with pytest.raises(ValueError):
602
+ phdf._mesh_summary_outputs_df("neither")
603
+
604
+ with pytest.raises(ValueError):
605
+ phdf._mesh_summary_outputs_df(cells_or_faces="cells", output_vars="wrong")
606
+
607
+ df = phdf._mesh_summary_outputs_df(
608
+ cells_or_faces="cells",
609
+ output_vars=[
610
+ SummaryOutputVar.MAXIMUM_WATER_SURFACE,
611
+ SummaryOutputVar.MINIMUM_WATER_SURFACE,
612
+ ],
613
+ )
614
+ test_csv = tmp_path / "BaldEagleDamBrk.summary-cells-selectvars.test.csv"
615
+ df.to_csv(test_csv)
616
+ filecmp.cmp(
617
+ test_csv,
618
+ TEST_CSV / "BaldEagleDamBrk.summary-cells-selectvars.csv",
619
+ shallow=False,
620
+ )
621
+
622
+
623
+ def test_observed_timeseries_input_flow():
624
+ with RasPlanHdf(DENTON) as phdf:
625
+ ds = phdf.observed_timeseries_input(vartype="Flow")
626
+ df = ds.sel(refln_name="Denton-Justin_RL").to_dataframe().dropna().reset_index()
627
+ valid_df = pd.read_csv(TEST_CSV / "Denton-Justin_RL_Flow.csv")
628
+ valid_df["time"] = pd.to_datetime(valid_df["time"])
629
+ assert_frame_equal(df, valid_df)
630
+
631
+
632
+ def test_observed_timeseries_input_stage():
633
+ with RasPlanHdf(DENTON) as phdf:
634
+ ds = phdf.observed_timeseries_input(vartype="Stage")
635
+ df = (
636
+ ds.sel(refpt_name="Grapevine_Lake_RP").to_dataframe().dropna().reset_index()
637
+ )
638
+ valid_df = pd.read_csv(TEST_CSV / "Grapevine_Lake_RP_Stage.csv")
639
+ valid_df["time"] = pd.to_datetime(valid_df["time"])
640
+ assert_frame_equal(df, valid_df)
641
+
642
+
643
+ def test_observed_timeseries_input_value_error():
644
+ with RasPlanHdf(DENTON) as phdf:
645
+ with pytest.raises(ValueError):
646
+ phdf.observed_timeseries_input(vartype="Fake Variable")
647
+
648
+
649
+ def test_observed_timeseries_input_rasplanhdf_error():
650
+ with RasPlanHdf(BALD_EAGLE_P18) as phdf:
651
+ with pytest.raises(RasPlanHdfError):
652
+ phdf.observed_timeseries_input(vartype="Flow")
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes