rashdf 0.9.0__tar.gz → 0.11.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rashdf
3
- Version: 0.9.0
3
+ Version: 0.11.0
4
4
  Summary: Read data from HEC-RAS HDF files.
5
5
  Project-URL: repository, https://github.com/fema-ffrd/rashdf
6
6
  Classifier: Development Status :: 4 - Beta
@@ -17,6 +17,7 @@ Requires-Dist: h5py
17
17
  Requires-Dist: geopandas<2.0,>=1.0
18
18
  Requires-Dist: pyarrow
19
19
  Requires-Dist: xarray<=2025.4.0
20
+ Requires-Dist: pandas<3.0,>=2.0
20
21
  Provides-Extra: dev
21
22
  Requires-Dist: pre-commit; extra == "dev"
22
23
  Requires-Dist: ruff; extra == "dev"
@@ -29,6 +30,7 @@ Requires-Dist: fsspec; extra == "dev"
29
30
  Requires-Dist: s3fs; extra == "dev"
30
31
  Requires-Dist: fiona==1.9.6; extra == "dev"
31
32
  Requires-Dist: numcodecs<0.16; extra == "dev"
33
+ Requires-Dist: rioxarray; extra == "dev"
32
34
  Provides-Extra: docs
33
35
  Requires-Dist: sphinx; extra == "docs"
34
36
  Requires-Dist: numpydoc; extra == "docs"
@@ -12,8 +12,14 @@ classifiers = [
12
12
  "Programming Language :: Python :: 3.12",
13
13
  "Programming Language :: Python :: 3.13",
14
14
  ]
15
- version = "0.9.0"
16
- dependencies = ["h5py", "geopandas>=1.0,<2.0", "pyarrow", "xarray<=2025.4.0"]
15
+ version = "0.11.0"
16
+ dependencies = [
17
+ "h5py",
18
+ "geopandas>=1.0,<2.0",
19
+ "pyarrow",
20
+ "xarray<=2025.4.0",
21
+ "pandas>=2.0,<3.0"
22
+ ]
17
23
 
18
24
  [project.optional-dependencies]
19
25
  dev = [
@@ -27,7 +33,8 @@ dev = [
27
33
  "fsspec",
28
34
  "s3fs",
29
35
  "fiona==1.9.6",
30
- "numcodecs<0.16"
36
+ "numcodecs<0.16",
37
+ "rioxarray",
31
38
  ]
32
39
  docs = ["sphinx", "numpydoc", "sphinx_rtd_theme"]
33
40
 
@@ -150,13 +150,13 @@ class RasGeomHdf(RasHdf):
150
150
  ][()][:, 0]
151
151
  face_id_lists = list(
152
152
  np.vectorize(
153
- lambda cell_id,
154
- cell_face_values=cell_face_values,
155
- cell_face_info=cell_face_info: str(
156
- cell_face_values[
157
- cell_face_info[cell_id][0] : cell_face_info[cell_id][0]
158
- + cell_face_info[cell_id][1]
159
- ]
153
+ lambda cell_id, cell_face_values=cell_face_values, cell_face_info=cell_face_info: (
154
+ str(
155
+ cell_face_values[
156
+ cell_face_info[cell_id][0] : cell_face_info[cell_id][0]
157
+ + cell_face_info[cell_id][1]
158
+ ]
159
+ )
160
160
  )
161
161
  )(cell_ids)
162
162
  )
@@ -4,6 +4,7 @@ from .geom import RasGeomHdf
4
4
  from .utils import (
5
5
  df_datetimes_to_str,
6
6
  ras_timesteps_to_datetimes,
7
+ parse_ras_datetime,
7
8
  parse_ras_datetime_ms,
8
9
  deprecated,
9
10
  convert_ras_hdf_value,
@@ -18,7 +19,7 @@ import xarray as xr
18
19
 
19
20
  from datetime import datetime
20
21
  from enum import Enum
21
- from typing import Dict, List, Optional, Tuple, Union
22
+ from typing import Dict, List, Optional, Tuple, Union, Sequence
22
23
 
23
24
  # Shared constant
24
25
  WATER_SURFACE = "Water Surface"
@@ -1779,3 +1780,213 @@ class RasPlanHdf(RasGeomHdf):
1779
1780
  """
1780
1781
  ds = self.reference_points_timeseries_output()
1781
1782
  return self._zmeta(ds)
1783
+
1784
+ def reference_lines_flow(self, use_names: bool = False) -> DataFrame:
1785
+ """Return wide-format DataFrame for reference lines timeseries flow data.
1786
+
1787
+ Parameters
1788
+ ----------
1789
+ use_names : bool, optional
1790
+ (Default) If False, use reference line IDs as column headers.
1791
+ If True, use reference line names as column headers.
1792
+
1793
+ Returns
1794
+ -------
1795
+ DataFrame
1796
+ Wide-format DataFrame with time as index and reference line IDs (or names) as columns.
1797
+ """
1798
+ ds = self.reference_lines_timeseries_output()
1799
+ return self._timeseries_to_wide_dataframe(
1800
+ ds=ds,
1801
+ var="Flow",
1802
+ id_column="refln_id",
1803
+ name_column="refln_name",
1804
+ mesh_column="mesh_name",
1805
+ use_names_as_col=use_names,
1806
+ )
1807
+
1808
+ def reference_points_stage(self, use_names: bool = False) -> DataFrame:
1809
+ """Return Wide-format DataFrame for reference points timeseries stage data.
1810
+
1811
+ Parameters
1812
+ ----------
1813
+ use_names : bool, optional
1814
+ (Default) If False, use reference point IDs as column headers.
1815
+ If True, use reference point names as column headers.
1816
+
1817
+ Returns
1818
+ -------
1819
+ DataFrame
1820
+ Wide-format DataFrame with time as index and reference point IDs (or names) as columns.
1821
+ """
1822
+ ds = self.reference_points_timeseries_output()
1823
+ return self._timeseries_to_wide_dataframe(
1824
+ ds=ds,
1825
+ var=WATER_SURFACE,
1826
+ id_column="refpt_id",
1827
+ name_column="refpt_name",
1828
+ mesh_column="mesh_name",
1829
+ use_names_as_col=use_names,
1830
+ )
1831
+
1832
+ def bc_lines_flow(self, use_names: bool = False) -> DataFrame:
1833
+ """Return wide-format DataFrame for boundary condition lines timeseries flow data with.
1834
+
1835
+ Parameters
1836
+ ----------
1837
+ use_names : bool, optional
1838
+ (Default) If False, use BC line IDs as column headers.
1839
+ If True, use BC line names as column headers.
1840
+
1841
+ Returns
1842
+ -------
1843
+ DataFrame
1844
+ Wide-format DataFrame with time as index and BC line IDs (or names) as columns.
1845
+ """
1846
+ ds = self.bc_lines_timeseries_output()
1847
+ return self._timeseries_to_wide_dataframe(
1848
+ ds=ds,
1849
+ var="Flow",
1850
+ id_column="bc_line_id",
1851
+ name_column="bc_line_name",
1852
+ mesh_column="mesh_name",
1853
+ use_names_as_col=use_names,
1854
+ )
1855
+
1856
+ def _timeseries_to_wide_dataframe(
1857
+ self,
1858
+ ds: xr.Dataset,
1859
+ var: str,
1860
+ id_column: str,
1861
+ name_column: str,
1862
+ mesh_column: str,
1863
+ use_names_as_col: bool = False,
1864
+ ) -> DataFrame:
1865
+ """Convert xarray timeseries Dataset to wide-format DataFrame with metadata.
1866
+
1867
+ Parameters
1868
+ ----------
1869
+ ds : xr.Dataset
1870
+ xarray Dataset containing timeseries data
1871
+ var : str
1872
+ Variable name to extract (e.g. "Flow", "Water Surface")
1873
+ id_column : str
1874
+ ID column name for pivoting (e.g. "refln_id", "refpt_id", "bc_line_id")
1875
+ name_column : str
1876
+ Name column for creating readable column names (e.g. "refln_name", "refpt_name")
1877
+ mesh_column : str
1878
+ Mesh column name (e.g. "mesh_name")
1879
+ use_names_as_col : bool, optional
1880
+ (Default) If False, use IDs.
1881
+ If True, use names as column headers.
1882
+
1883
+ Returns
1884
+ -------
1885
+ DataFrame
1886
+ Wide-format DataFrame with time as index and IDs or names as columns.
1887
+ Metadata stored in DataFrame.attrs including name and mesh mappings.
1888
+ """
1889
+ if var not in ds:
1890
+ raise ValueError(f"{var} data not found in timeseries output")
1891
+
1892
+ df = ds[var].to_dataframe().dropna().reset_index()
1893
+
1894
+ # check for duplicate names when using names as columns
1895
+ if use_names_as_col:
1896
+ unique_names = df[name_column].nunique()
1897
+ unique_ids = df[id_column].nunique()
1898
+ if unique_names < unique_ids: # should have one name for every one id
1899
+ name_counts = (
1900
+ df[[id_column, name_column]]
1901
+ .drop_duplicates()[name_column]
1902
+ .value_counts()
1903
+ )
1904
+ duplicates = name_counts[name_counts > 1].index.tolist()
1905
+ raise ValueError(
1906
+ f"Cannot use names as columns. The following names are not unique: {duplicates}. "
1907
+ )
1908
+
1909
+ pivot_column = name_column if use_names_as_col else id_column
1910
+ wide_df = df.pivot(index="time", columns=pivot_column, values=var)
1911
+
1912
+ lookup = df[[id_column, name_column, mesh_column]].drop_duplicates()
1913
+ if use_names_as_col:
1914
+ # when using names as columns, key=name -> value=id
1915
+ id_mapping = lookup.set_index(name_column)[id_column].to_dict()
1916
+ mesh_mapping = lookup.set_index(name_column)[mesh_column].to_dict()
1917
+ else:
1918
+ # when using IDs as columns, key=id -> value=name
1919
+ id_mapping = lookup.set_index(id_column)[name_column].to_dict()
1920
+ mesh_mapping = lookup.set_index(id_column)[mesh_column].to_dict()
1921
+
1922
+ wide_df.attrs = {
1923
+ "variable": var,
1924
+ "units": ds[var].attrs.get("units", None),
1925
+ "hdf_path": ds[var].attrs.get("hdf_path", None),
1926
+ "id_mapping": id_mapping,
1927
+ "mesh_mapping": mesh_mapping,
1928
+ }
1929
+
1930
+ return wide_df
1931
+
1932
+ def gridded_precip(
1933
+ self,
1934
+ timestamps: Optional[Union[Sequence[datetime], pd.Series]] = None,
1935
+ precip_attrs: Optional[Dict] = None,
1936
+ ) -> xr.DataArray:
1937
+ """Return precipitation timeseries input data from a HEC-RAS HDF plan file.
1938
+
1939
+ Requires the 'rioxarray' package.
1940
+
1941
+ Parameters
1942
+ ----------
1943
+ timestamps : Optional[Union[Sequence[datetime], pd.Series]], optional
1944
+ Optional sequence of timestamps to use for the time coordinate. If None, timestamps will be read from the HDF file.
1945
+ precip_attrs : Optional[Dict], optional
1946
+ Optional dictionary of precipitation attributes. If None, attributes will be read from the HDF file.
1947
+
1948
+ Returns
1949
+ -------
1950
+ xr.DataArray
1951
+ An xarray DataArray with precipitation timeseries input data.
1952
+ """
1953
+ import rioxarray
1954
+
1955
+ precip_group = self[self.PRECIP_PATH]
1956
+ precip_values: h5py.Dataset = precip_group["Values"]
1957
+ if timestamps is None:
1958
+ ds_timestamps: h5py.Dataset = precip_group["Timestamp"]
1959
+ timestamps = pd.Series(ds_timestamps.asstr()[:]).map(parse_ras_datetime)
1960
+ if precip_attrs is None:
1961
+ precip_attrs = self.get_meteorology_precip_attrs()
1962
+ crs = precip_attrs.get("Projection")
1963
+ rows = precip_attrs.get("Raster Rows")
1964
+ cols = precip_attrs.get("Raster Cols")
1965
+ top = precip_attrs.get("Raster Top")
1966
+ left = precip_attrs.get("Raster Left")
1967
+ cell_size = precip_attrs.get("Raster Cellsize")
1968
+ if not all([rows, cols, top, left, cell_size]):
1969
+ raise RasPlanHdfError(
1970
+ "Precipitation raster metadata is missing or incomplete."
1971
+ )
1972
+
1973
+ precip_values: np.ndarray = precip_values[:]
1974
+ precip_values = precip_values.reshape(precip_values.shape[0], rows, cols)
1975
+ x_coords = left + np.arange(cols) * cell_size + cell_size / 2
1976
+ y_coords = top - np.arange(rows) * cell_size - cell_size / 2
1977
+ precip = xr.DataArray(
1978
+ precip_values,
1979
+ name="Precipitation",
1980
+ dims=["time", "y", "x"],
1981
+ coords={
1982
+ "time": timestamps,
1983
+ "y": y_coords,
1984
+ "x": x_coords,
1985
+ },
1986
+ attrs={
1987
+ "units": precip_attrs.get("Units"),
1988
+ "hdf_path": f"{self.PRECIP_PATH}/Values",
1989
+ },
1990
+ )
1991
+ precip = precip.rio.write_crs(crs)
1992
+ return precip
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rashdf
3
- Version: 0.9.0
3
+ Version: 0.11.0
4
4
  Summary: Read data from HEC-RAS HDF files.
5
5
  Project-URL: repository, https://github.com/fema-ffrd/rashdf
6
6
  Classifier: Development Status :: 4 - Beta
@@ -17,6 +17,7 @@ Requires-Dist: h5py
17
17
  Requires-Dist: geopandas<2.0,>=1.0
18
18
  Requires-Dist: pyarrow
19
19
  Requires-Dist: xarray<=2025.4.0
20
+ Requires-Dist: pandas<3.0,>=2.0
20
21
  Provides-Extra: dev
21
22
  Requires-Dist: pre-commit; extra == "dev"
22
23
  Requires-Dist: ruff; extra == "dev"
@@ -29,6 +30,7 @@ Requires-Dist: fsspec; extra == "dev"
29
30
  Requires-Dist: s3fs; extra == "dev"
30
31
  Requires-Dist: fiona==1.9.6; extra == "dev"
31
32
  Requires-Dist: numcodecs<0.16; extra == "dev"
33
+ Requires-Dist: rioxarray; extra == "dev"
32
34
  Provides-Extra: docs
33
35
  Requires-Dist: sphinx; extra == "docs"
34
36
  Requires-Dist: numpydoc; extra == "docs"
@@ -2,6 +2,7 @@ h5py
2
2
  geopandas<2.0,>=1.0
3
3
  pyarrow
4
4
  xarray<=2025.4.0
5
+ pandas<3.0,>=2.0
5
6
 
6
7
  [dev]
7
8
  pre-commit
@@ -15,6 +16,7 @@ fsspec
15
16
  s3fs
16
17
  fiona==1.9.6
17
18
  numcodecs<0.16
19
+ rioxarray
18
20
 
19
21
  [docs]
20
22
  sphinx
@@ -748,3 +748,109 @@ def test_bc_lines_include_output_true():
748
748
  plan_hdf.bc_lines(include_output=True, datetime_to_str=True),
749
749
  bc_lines_with_output_json,
750
750
  )
751
+
752
+
753
+ def test_reference_lines_flow(tmp_path: Path):
754
+ plan_hdf = RasPlanHdf(BALD_EAGLE_P18_REF)
755
+ df = plan_hdf.reference_lines_flow()
756
+
757
+ assert df.index.name == "time"
758
+ assert df.shape == (37, 4)
759
+ assert list(df.columns) == [0, 1, 2, 3]
760
+
761
+ # Check metadata
762
+ assert df.attrs["variable"] == "Flow"
763
+ assert df.attrs["units"] == "cfs"
764
+
765
+ # Check mappings
766
+ assert "id_mapping" in df.attrs
767
+ assert len(df.attrs["id_mapping"]) == 4
768
+ assert "mesh_mapping" in df.attrs
769
+ assert len(df.attrs["mesh_mapping"]) == 4
770
+
771
+ df_refln2 = df[2].to_frame(name="Flow")
772
+ valid_df = pd.read_csv(
773
+ TEST_CSV / "BaldEagleDamBrk.reflines.2.csv",
774
+ index_col="time",
775
+ parse_dates=True,
776
+ usecols=["time", "Flow"],
777
+ dtype={"Flow": np.float32},
778
+ )
779
+ assert_frame_equal(df_refln2, valid_df, check_dtype=False)
780
+
781
+
782
+ def test_reference_points_stage(tmp_path: Path):
783
+ plan_hdf = RasPlanHdf(BALD_EAGLE_P18_REF)
784
+ df = plan_hdf.reference_points_stage()
785
+
786
+ assert df.index.name == "time"
787
+ assert df.shape == (37, 3)
788
+ assert list(df.columns) == [0, 1, 2]
789
+
790
+ # Check metadata
791
+ assert df.attrs["variable"] == "Water Surface"
792
+ assert df.attrs["units"] == "ft"
793
+
794
+ # Check mappings
795
+ assert "id_mapping" in df.attrs
796
+ assert len(df.attrs["id_mapping"]) == 3
797
+ assert "mesh_mapping" in df.attrs
798
+ assert len(df.attrs["mesh_mapping"]) == 3
799
+
800
+ df_refpt1 = df[1].to_frame(name="Water Surface")
801
+ valid_df = pd.read_csv(
802
+ TEST_CSV / "BaldEagleDamBrk.refpoints.1.csv",
803
+ index_col="time",
804
+ parse_dates=True,
805
+ usecols=["time", "Water Surface"],
806
+ dtype={"Water Surface": np.float32},
807
+ )
808
+ assert_frame_equal(df_refpt1, valid_df, check_dtype=False)
809
+
810
+
811
+ def test_bc_lines_flow(tmp_path: Path):
812
+ plan_hdf = RasPlanHdf(LOWER_KANAWHA_P01_BC_LINES)
813
+ df = plan_hdf.bc_lines_flow()
814
+
815
+ assert df.index.name == "time"
816
+ assert df.shape == (577, 10)
817
+
818
+ # Check metadata
819
+ assert df.attrs["variable"] == "Flow"
820
+ assert df.attrs["units"] == "cfs"
821
+
822
+ # Check mappings
823
+ assert "id_mapping" in df.attrs
824
+ assert len(df.attrs["id_mapping"]) == 10
825
+ assert "mesh_mapping" in df.attrs
826
+ assert len(df.attrs["mesh_mapping"]) == 10
827
+
828
+ df_bcline7 = df[7].to_frame(name="Flow")
829
+ valid_df = pd.read_csv(
830
+ TEST_CSV / "LowerKanawha.p01.bclines.7.csv",
831
+ index_col="time",
832
+ parse_dates=True,
833
+ usecols=["time", "Flow"],
834
+ dtype={"Flow": np.float32},
835
+ )
836
+ assert_frame_equal(df_bcline7, valid_df, check_dtype=False)
837
+
838
+
839
+ def test_gridded_precip():
840
+ plan_hdf = RasPlanHdf(TEST_DATA / "ras/ElkMiddle.gridded-precip.p01.hdf")
841
+ precip = plan_hdf.gridded_precip()
842
+ assert precip.shape == (24, 160, 110)
843
+ assert (
844
+ precip.attrs["units"]
845
+ == plan_hdf["/Event Conditions/Meteorology/Precipitation"]
846
+ .attrs["Units"]
847
+ .decode()
848
+ )
849
+
850
+
851
+ def test_gridded_precip_bad_precip_attrs():
852
+ plan_hdf = RasPlanHdf(TEST_DATA / "ras/ElkMiddle.gridded-precip.p01.hdf")
853
+ precip_attrs = plan_hdf.get_meteorology_precip_attrs()
854
+ precip_attrs.pop("Raster Rows")
855
+ with pytest.raises(RasPlanHdfError):
856
+ plan_hdf.gridded_precip(precip_attrs=precip_attrs)
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes