rashdf 0.9.0__tar.gz → 0.10.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rashdf
3
- Version: 0.9.0
3
+ Version: 0.10.0
4
4
  Summary: Read data from HEC-RAS HDF files.
5
5
  Project-URL: repository, https://github.com/fema-ffrd/rashdf
6
6
  Classifier: Development Status :: 4 - Beta
@@ -12,7 +12,7 @@ classifiers = [
12
12
  "Programming Language :: Python :: 3.12",
13
13
  "Programming Language :: Python :: 3.13",
14
14
  ]
15
- version = "0.9.0"
15
+ version = "0.10.0"
16
16
  dependencies = ["h5py", "geopandas>=1.0,<2.0", "pyarrow", "xarray<=2025.4.0"]
17
17
 
18
18
  [project.optional-dependencies]
@@ -1779,3 +1779,151 @@ class RasPlanHdf(RasGeomHdf):
1779
1779
  """
1780
1780
  ds = self.reference_points_timeseries_output()
1781
1781
  return self._zmeta(ds)
1782
+
1783
+ def reference_lines_flow(self, use_names: bool = False) -> DataFrame:
1784
+ """Return wide-format DataFrame for reference lines timeseries flow data.
1785
+
1786
+ Parameters
1787
+ ----------
1788
+ use_names : bool, optional
1789
+ (Default) If False, use reference line IDs as column headers.
1790
+ If True, use reference line names as column headers.
1791
+
1792
+ Returns
1793
+ -------
1794
+ DataFrame
1795
+ Wide-format DataFrame with time as index and reference line IDs (or names) as columns.
1796
+ """
1797
+ ds = self.reference_lines_timeseries_output()
1798
+ return self._timeseries_to_wide_dataframe(
1799
+ ds=ds,
1800
+ var="Flow",
1801
+ id_column="refln_id",
1802
+ name_column="refln_name",
1803
+ mesh_column="mesh_name",
1804
+ use_names_as_col=use_names,
1805
+ )
1806
+
1807
+ def reference_points_stage(self, use_names: bool = False) -> DataFrame:
1808
+ """Return Wide-format DataFrame for reference points timeseries stage data.
1809
+
1810
+ Parameters
1811
+ ----------
1812
+ use_names : bool, optional
1813
+ (Default) If False, use reference point IDs as column headers.
1814
+ If True, use reference point names as column headers.
1815
+
1816
+ Returns
1817
+ -------
1818
+ DataFrame
1819
+ Wide-format DataFrame with time as index and reference point IDs (or names) as columns.
1820
+ """
1821
+ ds = self.reference_points_timeseries_output()
1822
+ return self._timeseries_to_wide_dataframe(
1823
+ ds=ds,
1824
+ var=WATER_SURFACE,
1825
+ id_column="refpt_id",
1826
+ name_column="refpt_name",
1827
+ mesh_column="mesh_name",
1828
+ use_names_as_col=use_names,
1829
+ )
1830
+
1831
+ def bc_lines_flow(self, use_names: bool = False) -> DataFrame:
1832
+ """Return wide-format DataFrame for boundary condition lines timeseries flow data with.
1833
+
1834
+ Parameters
1835
+ ----------
1836
+ use_names : bool, optional
1837
+ (Default) If False, use BC line IDs as column headers.
1838
+ If True, use BC line names as column headers.
1839
+
1840
+ Returns
1841
+ -------
1842
+ DataFrame
1843
+ Wide-format DataFrame with time as index and BC line IDs (or names) as columns.
1844
+ """
1845
+ ds = self.bc_lines_timeseries_output()
1846
+ return self._timeseries_to_wide_dataframe(
1847
+ ds=ds,
1848
+ var="Flow",
1849
+ id_column="bc_line_id",
1850
+ name_column="bc_line_name",
1851
+ mesh_column="mesh_name",
1852
+ use_names_as_col=use_names,
1853
+ )
1854
+
1855
+ def _timeseries_to_wide_dataframe(
1856
+ self,
1857
+ ds: xr.Dataset,
1858
+ var: str,
1859
+ id_column: str,
1860
+ name_column: str,
1861
+ mesh_column: str,
1862
+ use_names_as_col: bool = False,
1863
+ ) -> DataFrame:
1864
+ """Convert xarray timeseries Dataset to wide-format DataFrame with metadata.
1865
+
1866
+ Parameters
1867
+ ----------
1868
+ ds : xr.Dataset
1869
+ xarray Dataset containing timeseries data
1870
+ var : str
1871
+ Variable name to extract (e.g. "Flow", "Water Surface")
1872
+ id_column : str
1873
+ ID column name for pivoting (e.g. "refln_id", "refpt_id", "bc_line_id")
1874
+ name_column : str
1875
+ Name column for creating readable column names (e.g. "refln_name", "refpt_name")
1876
+ mesh_column : str
1877
+ Mesh column name (e.g. "mesh_name")
1878
+ use_names_as_col : bool, optional
1879
+ (Default) If False, use IDs.
1880
+ If True, use names as column headers.
1881
+
1882
+ Returns
1883
+ -------
1884
+ DataFrame
1885
+ Wide-format DataFrame with time as index and IDs or names as columns.
1886
+ Metadata stored in DataFrame.attrs including name and mesh mappings.
1887
+ """
1888
+ if var not in ds:
1889
+ raise ValueError(f"{var} data not found in timeseries output")
1890
+
1891
+ df = ds[var].to_dataframe().dropna().reset_index()
1892
+
1893
+ # check for duplicate names when using names as columns
1894
+ if use_names_as_col:
1895
+ unique_names = df[name_column].nunique()
1896
+ unique_ids = df[id_column].nunique()
1897
+ if unique_names < unique_ids: # should have one name for every one id
1898
+ name_counts = (
1899
+ df[[id_column, name_column]]
1900
+ .drop_duplicates()[name_column]
1901
+ .value_counts()
1902
+ )
1903
+ duplicates = name_counts[name_counts > 1].index.tolist()
1904
+ raise ValueError(
1905
+ f"Cannot use names as columns. The following names are not unique: {duplicates}. "
1906
+ )
1907
+
1908
+ pivot_column = name_column if use_names_as_col else id_column
1909
+ wide_df = df.pivot(index="time", columns=pivot_column, values=var)
1910
+
1911
+ lookup = df[[id_column, name_column, mesh_column]].drop_duplicates()
1912
+ if use_names_as_col:
1913
+ # when using names as columns, key=name -> value=id
1914
+ id_mapping = lookup.set_index(name_column)[id_column].to_dict()
1915
+ mesh_mapping = lookup.set_index(name_column)[mesh_column].to_dict()
1916
+ else:
1917
+ # when using IDs as columns, key=id -> value=name
1918
+ id_mapping = lookup.set_index(id_column)[name_column].to_dict()
1919
+ mesh_mapping = lookup.set_index(id_column)[mesh_column].to_dict()
1920
+
1921
+ wide_df.attrs = {
1922
+ "variable": var,
1923
+ "units": ds[var].attrs.get("units", None),
1924
+ "hdf_path": ds[var].attrs.get("hdf_path", None),
1925
+ "id_mapping": id_mapping,
1926
+ "mesh_mapping": mesh_mapping,
1927
+ }
1928
+
1929
+ return wide_df
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rashdf
3
- Version: 0.9.0
3
+ Version: 0.10.0
4
4
  Summary: Read data from HEC-RAS HDF files.
5
5
  Project-URL: repository, https://github.com/fema-ffrd/rashdf
6
6
  Classifier: Development Status :: 4 - Beta
@@ -748,3 +748,89 @@ def test_bc_lines_include_output_true():
748
748
  plan_hdf.bc_lines(include_output=True, datetime_to_str=True),
749
749
  bc_lines_with_output_json,
750
750
  )
751
+
752
+
753
+ def test_reference_lines_flow(tmp_path: Path):
754
+ plan_hdf = RasPlanHdf(BALD_EAGLE_P18_REF)
755
+ df = plan_hdf.reference_lines_flow()
756
+
757
+ assert df.index.name == "time"
758
+ assert df.shape == (37, 4)
759
+ assert list(df.columns) == [0, 1, 2, 3]
760
+
761
+ # Check metadata
762
+ assert df.attrs["variable"] == "Flow"
763
+ assert df.attrs["units"] == "cfs"
764
+
765
+ # Check mappings
766
+ assert "id_mapping" in df.attrs
767
+ assert len(df.attrs["id_mapping"]) == 4
768
+ assert "mesh_mapping" in df.attrs
769
+ assert len(df.attrs["mesh_mapping"]) == 4
770
+
771
+ df_refln2 = df[2].to_frame(name="Flow")
772
+ valid_df = pd.read_csv(
773
+ TEST_CSV / "BaldEagleDamBrk.reflines.2.csv",
774
+ index_col="time",
775
+ parse_dates=True,
776
+ usecols=["time", "Flow"],
777
+ dtype={"Flow": np.float32},
778
+ )
779
+ assert_frame_equal(df_refln2, valid_df, check_dtype=False)
780
+
781
+
782
+ def test_reference_points_stage(tmp_path: Path):
783
+ plan_hdf = RasPlanHdf(BALD_EAGLE_P18_REF)
784
+ df = plan_hdf.reference_points_stage()
785
+
786
+ assert df.index.name == "time"
787
+ assert df.shape == (37, 3)
788
+ assert list(df.columns) == [0, 1, 2]
789
+
790
+ # Check metadata
791
+ assert df.attrs["variable"] == "Water Surface"
792
+ assert df.attrs["units"] == "ft"
793
+
794
+ # Check mappings
795
+ assert "id_mapping" in df.attrs
796
+ assert len(df.attrs["id_mapping"]) == 3
797
+ assert "mesh_mapping" in df.attrs
798
+ assert len(df.attrs["mesh_mapping"]) == 3
799
+
800
+ df_refpt1 = df[1].to_frame(name="Water Surface")
801
+ valid_df = pd.read_csv(
802
+ TEST_CSV / "BaldEagleDamBrk.refpoints.1.csv",
803
+ index_col="time",
804
+ parse_dates=True,
805
+ usecols=["time", "Water Surface"],
806
+ dtype={"Water Surface": np.float32},
807
+ )
808
+ assert_frame_equal(df_refpt1, valid_df, check_dtype=False)
809
+
810
+
811
+ def test_bc_lines_flow(tmp_path: Path):
812
+ plan_hdf = RasPlanHdf(LOWER_KANAWHA_P01_BC_LINES)
813
+ df = plan_hdf.bc_lines_flow()
814
+
815
+ assert df.index.name == "time"
816
+ assert df.shape == (577, 10)
817
+
818
+ # Check metadata
819
+ assert df.attrs["variable"] == "Flow"
820
+ assert df.attrs["units"] == "cfs"
821
+
822
+ # Check mappings
823
+ assert "id_mapping" in df.attrs
824
+ assert len(df.attrs["id_mapping"]) == 10
825
+ assert "mesh_mapping" in df.attrs
826
+ assert len(df.attrs["mesh_mapping"]) == 10
827
+
828
+ df_bcline7 = df[7].to_frame(name="Flow")
829
+ valid_df = pd.read_csv(
830
+ TEST_CSV / "LowerKanawha.p01.bclines.7.csv",
831
+ index_col="time",
832
+ parse_dates=True,
833
+ usecols=["time", "Flow"],
834
+ dtype={"Flow": np.float32},
835
+ )
836
+ assert_frame_equal(df_bcline7, valid_df, check_dtype=False)
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes